Question

В Matlab есть repmat и replem .Для матриц x1 и x2 размеров [d, n1] и [d, n2] вы можете сделать это в Matlab:

n1 = size(x1, 2);
n2 = size(x2, 2);

M = repmat(x1, 1, n2) - repelem(x2, 1, n1);

Что такое эквивалентный собственный код?Ниже приведены четыре варианта, которыми я не совсем доволен.Интересно, можно ли превратить его в более быстрый однострочный?

TL; DR: вариант 2 лучше, но это зависит от компилятора и других вещей.

int d = x1.rows();
int n1 = x1.cols();
int n2 = x2.cols();

Eigen::MatrixXd M(d, n1*n2);

// Variant 1:
int idx = 0;
for (int c = 0; c != n2; c++) {
  for (int r = 0; r != n1; r++) {
    M.col(idx) = x1.col(r) - x2.col(c);
    idx += 1;
  }
}

// Variant 2:
for (int c = 0, idx = 0; c != n2; c += 1, idx += n1)
  M.block(0, idx, d, n1) = x1.colwise() - x2.col(c);

// Variant 3:
M = - x2.replicate(n1, 1);
M.resize(d, n1*n2);
M += x1.replicate(1, n2);

// Variant 5:
M = x1.rowwise().replicate(n2) - x2(all,VectorXi::LinSpaced(n1*n2,0,n2-1));

Вотмои текущие сроки, см. ниже для полного кода.Matlab рассчитан на многопоточность и однопоточность Matlab R2017b.Версии C ++, скомпилированные с флагами -O3 -DNDEBUG -march=native -mtune=native.Все были запущены на i5-6500, поэтому у меня есть AVX.

               time in seconds
Code         gcc-7  gcc-8 clang-6
-----------------------------------
Matlab mt  51
Matlab st  84
V. 1           38    37     57
V. 2           36    34     23
V. 3          598   599    187
V. 5           94   172    107

Код Matlab:

ds = 1:10;
n1s = 5:5:500;
n2s = 5:5:500;

z1 = randn(max(ds), max(n1s));
z2 = randn(max(ds), max(n2s));

tic;
s = 0;
for idx = 1:numel(ds)
    for jdx = 1:numel(n1s)
        for kdx = 1:numel(n2s)
            K = MFdiff(z1(1:ds(idx), 1:n1s(jdx)),...
                z2(1:ds(idx), 1:n2s(kdx)));
            s = s + K(1,1);
        end
    end
end
toc

Код C ++:

#include <iostream>
#include <Eigen/Dense>

using namespace Eigen;

template <typename Derived1, typename Derived2>
MatrixXd MFdiff1(
  DenseBase<Derived1> const & x1,
  DenseBase<Derived2> const & x2)
{
  int d = x1.rows();
  int n1 = x1.cols();
  int n2 = x2.cols();

  MatrixXd out(d, n1*n2);

  int idx = 0;
  for (int c = 0; c != n2; c++) {
    for (int r = 0; r != n1; r++) {
      out.col(idx) = x1.col(r) - x2.col(c);
      idx += 1;
    }
  }

  return out;
}

template <typename Derived1, typename Derived2>
MatrixXd MFdiff2(
  DenseBase<Derived1> const & x1,
  DenseBase<Derived2> const & x2)
{
  int d = x1.rows();
  int n1 = x1.cols();
  int n2 = x2.cols();

  MatrixXd out(d, n1*n2);

  for (int c = 0, idx = 0; c != n2; c+=1, idx += n1)
    out.block(0, idx, d, n1) = x1.colwise() - x2.col(c);

  return out;
}

template <typename Derived1, typename Derived2>
MatrixXd MFdiff3(
  DenseBase<Derived1> const & x1,
  DenseBase<Derived2> const & x2)
{
  int d = x1.rows();
  int n1 = x1.cols();
  int n2 = x2.cols();

  MatrixXd out;
  out = - x2.replicate(n1, 1);
  out.resize(d, n1*n2);
  out += x1.replicate(1, n2);

  return out;
}

template <typename Derived1, typename Derived2>
MatrixXd MFdiff5(
  DenseBase<Derived1> const & x1,
  DenseBase<Derived2> const & x2)
{
  int n1 = x1.cols();
  int n2 = x2.cols();

  return x1.rowwise().replicate(n2) - x2(all,VectorXi::LinSpaced(n1*n2,0,n2-1));
}

double test(VectorXi const & ds,
      VectorXi const & n1s,
      VectorXi const & n2s)
{
  MatrixXd z1 = MatrixXd::Random(ds.maxCoeff(), n1s.maxCoeff());
  MatrixXd z2 = MatrixXd::Random(ds.maxCoeff(), n2s.maxCoeff());

  double s = 0;
  for (int idx = 0; idx!=ds.rows(); idx++) {
    for (int jdx = 0; jdx!=n1s.rows(); jdx++) {
      for (int kdx = 0; kdx!=n2s.rows(); kdx++) {
        MatrixXd K = MFdiff5(z1.block(0, 0, ds(idx), n1s(jdx)),
                 z2.block(0, 0, ds(idx), n2s(kdx)));
        s += K(0,0);
      }
    }
  }

  return s;
}  

int main() {
  VectorXi ds = VectorXi::LinSpaced(10, 1, 10);
  VectorXi n1s = VectorXi::LinSpaced(100, 5, 500);
  VectorXi n2s = VectorXi::LinSpaced(100, 5, 500);

  std::cout << test(ds, n1s, n2s) << '\n';
}

ggael · Answer 1 · 23 октября 2018

С головой Eigen вы можете написать:

M = x1.rowwise().replicate(n2) - x2(Eigen::all,VectorXi::LinSpaced(n1*n2,0,n2-1));

с той же скоростью, что и ваш вариант 2.

Автономный тест (BenchTimer.h нужен клон репо), протестировано с -O3 -DNDEBUG с gcc 7 и clang 6:

#include <iostream>
#include <Eigen/Dense>
#include <bench/BenchTimer.h>
using namespace Eigen;
using namespace std;

EIGEN_DONT_INLINE
void foo1(const MatrixXd& x1, const MatrixXd& x2, MatrixXd& M)
{
  int d = x1.rows();
  int n1 = x1.cols();
  int n2 = x2.cols();

  int idx = 0;
  for (int c = 0; c != n2; c++) {
    M.block(0, idx, d, n1) = x1.colwise() - x2.col(c);
    idx += n1;
  }
}

EIGEN_DONT_INLINE
void foo2(const MatrixXd& x1, const MatrixXd& x2, MatrixXd& M)
{
  int n1 = x1.cols();
  int n2 = x2.cols();

  M = x1.rowwise().replicate(n2) - x2(all,VectorXi::LinSpaced(n1*n2,0,n2-1));
}

int main()
{
  int tries = 2;
  int rep = 1;

  int d = 100;
  int n1 = 100;
  int n2 = 100;

  MatrixXd x1(d,n1); x1.setRandom();
  MatrixXd x2(d,n2); x2.setRandom();
  MatrixXd M(d, n1*n2);

  BenchTimer t;
  BENCH(t, tries, rep, foo1(x1, x2, M));
  std::cout << "Time: " << t.best() << "s" << std::endl;

  BENCH(t, tries, rep, foo2(x1, x2, M));
  std::cout << "Time: " << t.best() << "s" << std::endl;
}

Что такое собственный эквивалент репетема Matlab?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Что такое собственный эквивалент репетема Matlab?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы