Я написал короткую тестовую программу для сочетания тяги и odeint, для решения уравнения $ \part_tx = -2 \ pi x $ на векторе со следующим фрагментом кода:
#include <iostream>
#include <cmath>
#include <utility>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/complex.h>
#include <boost/numeric/odeint.hpp>
#include <boost/numeric/odeint/external/thrust/thrust.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_real.hpp>
#include <boost/random/variate_generator.hpp>
#ifdef USE_COMPLEX
typedef std::complex<double> value_type;
#else
typedef double value_type;
#endif
typedef std::vector<value_type> result_type;
typedef thrust::device_vector< value_type > state_type;
typedef thrust::device_vector< size_t > index_vector_type;
class boost_thrust_calculator{
public:
boost_thrust_calculator(const size_t n_dimensions,
const double cur_z,
const double delta_t) : n_dimensions(n_dimensions),
cur_z(cur_z),
delta_t(delta_t) {};
struct boost_functor {
template <class T>
__host__ __device__
void operator() (T t) const {
value_type dxdt = thrust::get<0>(t) * (-2. * M_PI);
thrust::get<1>(t) = dxdt;
}
};
template <class State, class Deriv>
void operator()(const State &x, Deriv &dxdt, value_type t) const {
thrust::for_each(
thrust::make_zip_iterator(thrust::make_tuple(
x.begin(),
dxdt.begin())
),
thrust::make_zip_iterator(thrust::make_tuple(
x.end(),
dxdt.end()
)
),
boost_functor()
);
}
double get_cur_z(void) const {
return this->cur_z;
}
private:
size_t n_dimensions;
double cur_z;
double delta_t;
};
class boost_observer {
public:
boost_observer(const size_t dimensions,
const size_t every,
std::vector<state_type> &result_data,
std::vector<double> &result_time) : dimensions(dimensions), m_every(every), m_count(0), data(result_data), time(result_time) {
}
struct boost_obs_functor {
template <class T>
__host__ __device__
void operator() (T t) const {
thrust::get<1>(t) = thrust::get<0>(t);
}
};
template <class State>
void operator()(State &x, double t) {
state_type local_data(dimensions);
thrust::fill (local_data.begin(),
local_data.end(),
value_type(0.));
thrust::for_each(thrust::make_zip_iterator(
thrust::make_tuple(x.begin(),
local_data.begin())
),
thrust::make_zip_iterator(
thrust::make_tuple(x.end(),
local_data.end())
),
boost_obs_functor()
);
data.push_back(local_data);
this->time.push_back(t);
}
void return_data(std::vector<std::pair<double, result_type>> &return_data){
return_data.clear();
if(time.size() == data.size() && time.size() != 0){
for(size_t i = 0; i < time.size(); ++i){
result_type local_data(data[i].size(), 0.);
thrust::copy(data[i].begin(),
data[i].end(),
local_data.begin());
return_data.push_back(std::make_pair(time[i], local_data));
}
}
}
private:
size_t dimensions;
size_t m_every;
size_t m_count;
std::vector<state_type> data;
std::vector<double> time;
};
void solve_equation_with_cuda_Boost_class(std::vector<std::pair<double, result_type>> &solution, const simulation_parameters ¶ms){
using namespace boost::numeric::odeint;
state_type x_0(dim);
#ifdef USE_COMPLEX
thrust::fill(x_0.begin(), x_0.end(), std::complex<double>(1., 0.));
//thrust::fill(x_0.begin() + dim, x_0.end(), std::complex<double>(0., 0.));
#else
thrust::fill(x_0.begin(), x_0.end(), 1.);
//thrust::fill(x_0.begin() + dim, x_0.end(), 0.);
#endif
std::vector<state_type> result_data;
std::vector<double> result_time;
boost_observer obs(dim, 1,
result_data,
result_time);
#ifdef ADAPTIVE
typedef runge_kutta_dopri5<state_type , value_type , state_type , value_type , thrust_algebra, thrust_operations> stepper_type;
boost_thrust_calculator local_calculator(dim,
params.tmin,
params.delta_t);
double cur_t = params.tmin;
size_t steps = 0;
while(cur_t < params.tmax) {
steps = integrate_adaptive(make_controlled(params.eps_abs, params.eps_rel, stepper_type()),
local_calculator,
x_0,
cur_t,
cur_t + params.delta_t,
1e-3 * params.delta_t);
cur_t += params.delta_t;
obs(x_0, cur_t);
}
#else
typedef runge_kutta_fehlberg78<state_type> error_stepper_type;
typedef controlled_runge_kutta< error_stepper_type > controlled_stepper_type;
controlled_stepper_type controlled_stepper;
boost_thrust_calculator local_calculator(dim,
params.tmin,
params.delta_t);
double cur_t = params.tmin;
size_t steps = 0;
while(cur_t < params.tmax) {
steps = integrate_const(runge_kutta4<state_type>(),
local_calculator,
x_0,
cur_t,
cur_t + params.delta_t,
1e-3 * params.delta_t);
cur_t += params.delta_t;
obs(x_0, cur_t);
}
#endif
obs.return_data(solution);
}
void solve_equation_with_Boost_cuda(std::vector<std::pair<double, result_type>> &boost_class_solution,
const simulation_parameters ¶ms) {
solve_equation_with_cuda_Boost_class (boost_class_solution,
params);
}
Теперь, при запуске кода без определения USE_COMPLEX
, т.е. с использованием double
в качестве типа данных, код работает нормально. При использовании std::complex<double>
код компилируется, но при запуске вылетает (с ошибкой for_each: failed to synchronize: cudaErrorLaunchFailure: unspecified launch failure
). Обратный след указывает на zip_iterator
, по которому итерируется boost_functor()
.
Почему это происходит и как я могу это исправить?