Question

Я написал короткую тестовую программу для сочетания тяги и odeint, для решения уравнения $ \part_tx = -2 \ pi x $ на векторе со следующим фрагментом кода:

#include <iostream>
#include <cmath>
#include <utility>

#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/complex.h>

#include <boost/numeric/odeint.hpp>

#include <boost/numeric/odeint/external/thrust/thrust.hpp>

#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_real.hpp>
#include <boost/random/variate_generator.hpp>

#ifdef USE_COMPLEX
typedef std::complex<double> value_type;
#else
typedef double value_type;
#endif

typedef std::vector<value_type> result_type;    


typedef thrust::device_vector< value_type > state_type;
typedef thrust::device_vector< size_t > index_vector_type;

class boost_thrust_calculator{
public:
    boost_thrust_calculator(const size_t n_dimensions,
                            const double cur_z,
                            const double delta_t) : n_dimensions(n_dimensions),
        cur_z(cur_z),
        delta_t(delta_t) {};

    struct boost_functor {
        template <class T>
        __host__ __device__
        void operator() (T t) const {
            value_type dxdt = thrust::get<0>(t) * (-2. * M_PI);
            thrust::get<1>(t) = dxdt;
        }
    };

    template <class State, class Deriv>
    void operator()(const State &x, Deriv &dxdt, value_type t) const {
        thrust::for_each(
                    thrust::make_zip_iterator(thrust::make_tuple(
                                                  x.begin(),
                                                  dxdt.begin())
                                              ),
                    thrust::make_zip_iterator(thrust::make_tuple(
                                                  x.end(),
                                                  dxdt.end()
                                                  )
                                              ),
                    boost_functor()
                    );
    }

    double get_cur_z(void) const {
        return this->cur_z;
    }

private:
    size_t n_dimensions;
    double cur_z;
    double delta_t;
};

class boost_observer {
public:
    boost_observer(const size_t dimensions,
                   const size_t every,
                   std::vector<state_type> &result_data,
                   std::vector<double> &result_time) : dimensions(dimensions), m_every(every), m_count(0), data(result_data), time(result_time) {
    }

    struct boost_obs_functor {
        template <class T>
        __host__ __device__
        void operator() (T t) const {
            thrust::get<1>(t) = thrust::get<0>(t);
        }
    };

    template <class State>
    void operator()(State &x, double t) {
        state_type local_data(dimensions);
        thrust::fill (local_data.begin(),
                      local_data.end(),
                      value_type(0.));
        thrust::for_each(thrust::make_zip_iterator(
                             thrust::make_tuple(x.begin(),
                                                local_data.begin())
                             ),
                         thrust::make_zip_iterator(
                             thrust::make_tuple(x.end(),
                                                local_data.end())
                             ),
                         boost_obs_functor()
                         );
        data.push_back(local_data);
        this->time.push_back(t);
    }

    void return_data(std::vector<std::pair<double, result_type>> &return_data){
        return_data.clear();
        if(time.size() == data.size() && time.size() != 0){
            for(size_t i = 0; i < time.size(); ++i){
                result_type local_data(data[i].size(), 0.);
                thrust::copy(data[i].begin(),
                             data[i].end(),
                             local_data.begin());
                return_data.push_back(std::make_pair(time[i], local_data));
            }
        }
    }

private:
    size_t dimensions;
    size_t m_every;
    size_t m_count;
    std::vector<state_type> data;
    std::vector<double> time;
};

void solve_equation_with_cuda_Boost_class(std::vector<std::pair<double, result_type>> &solution, const simulation_parameters &params){
    using namespace boost::numeric::odeint;

    state_type x_0(dim);

#ifdef USE_COMPLEX
    thrust::fill(x_0.begin(), x_0.end(), std::complex<double>(1., 0.));
    //thrust::fill(x_0.begin() + dim, x_0.end(), std::complex<double>(0., 0.));
#else
    thrust::fill(x_0.begin(), x_0.end(), 1.);
    //thrust::fill(x_0.begin() + dim, x_0.end(), 0.);
#endif

    std::vector<state_type> result_data;
    std::vector<double> result_time;

    boost_observer obs(dim, 1,
                       result_data,
                       result_time);

#ifdef ADAPTIVE
    typedef runge_kutta_dopri5<state_type , value_type , state_type , value_type , thrust_algebra, thrust_operations> stepper_type;
    boost_thrust_calculator local_calculator(dim,
                                             params.tmin,
                                             params.delta_t);

    double cur_t = params.tmin;
    size_t steps = 0;
    while(cur_t < params.tmax) {
        steps = integrate_adaptive(make_controlled(params.eps_abs, params.eps_rel, stepper_type()),
                                   local_calculator,
                                   x_0,
                                   cur_t,
                                   cur_t + params.delta_t,
                                   1e-3 * params.delta_t);
        cur_t += params.delta_t;
        obs(x_0, cur_t);
    }
#else
    typedef runge_kutta_fehlberg78<state_type> error_stepper_type;
    typedef controlled_runge_kutta< error_stepper_type > controlled_stepper_type;
    controlled_stepper_type controlled_stepper;

    boost_thrust_calculator local_calculator(dim,
                                             params.tmin,
                                             params.delta_t);
    double cur_t = params.tmin;
    size_t steps = 0;
    while(cur_t < params.tmax) {
        steps = integrate_const(runge_kutta4<state_type>(),
                                local_calculator,
                                x_0,
                                cur_t,
                                cur_t + params.delta_t,
                                1e-3 * params.delta_t);
        cur_t += params.delta_t;
        obs(x_0, cur_t);
    }

#endif
    obs.return_data(solution);
}

void solve_equation_with_Boost_cuda(std::vector<std::pair<double, result_type>> &boost_class_solution,
                                    const simulation_parameters &params) {
    solve_equation_with_cuda_Boost_class (boost_class_solution,
                                          params);
}

Теперь, при запуске кода без определения USE_COMPLEX, т.е. с использованием double в качестве типа данных, код работает нормально. При использовании std::complex<double> код компилируется, но при запуске вылетает (с ошибкой for_each: failed to synchronize: cudaErrorLaunchFailure: unspecified launch failure). Обратный след указывает на zip_iterator, по которому итерируется boost_functor().
Почему это происходит и как я могу это исправить?

Решение ODE с odeint и тяги, в то время как содержит комплексные числа

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Решение ODE с odeint и тяги, в то время как содержит комплексные числа

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы