Пример данных
library(dplyr)
library(data.table)
library(Rcpp)
df <- data.frame(year = rep(2003:2004, each = 4),
mg = rep(rep(c("a", "b"), each = 2), times = 2),
plant_date = c(20, 30, 20, 30, 33, 40, 33, 40),
stage1 = c(40, 50, 42, 52, 43, 55, 48, 57),
stage2 = c(55, 65, 57, 66, 58, 68, 59, 65),
stage3 = c(61, 75, 63, 76, 66, 77, 68, 79))
set.seed(123)
dat <- data.frame(year = rep(2003:2004, each = 365), doy = rep(1:365, times = 2),
rainfall = sample(0:20, 730, replace = T))
final.dat <- dat %>% dplyr::left_join(df)
Что я хочу сделать для каждого года, взять комбинацию plant.date
и mg
, а сумму и среднее количество осадков от plant.date
до stage1
От stage1
до stage2
и stage2
до stage3
.Я сделал это:
final.dat %>% dplyr::group_by(year, plant_date, mg) %>%
dplyr::summarise(
sum_rain_stage1 = sum(rainfall[doy >= plant_date & doy <= stage1]),
sum_rain_stage2 = sum(rainfall[doy >= plant_date & doy <= stage2]),
sum_rain_stage3 = sum(rainfall[doy >= plant_date & doy <= stage3]),
mean_rain_stage1 = mean(rainfall[doy >= plant_date & doy <= stage1]),
mean_rain_stage2 = mean(rainfall[doy >= plant_date & doy <= stage2]),
mean_rain_stage3 = mean(rainfall[doy >= plant_date & doy <= stage3]))
Я хочу добиться подобного, используя Rcpp
.Но из-за нехватки знакомых мне удалось разработать скелет всего этого следующим образом:
final.dat <- data.table(final.dat)
setkey(x, year, plant_date, mg, doy)
Определить функцию (ниже) и получить ее:
Rcpp::sourceCpp("rainfall_sum.cpp"))
и запуститьфункция выглядит следующим образом:
final.dat[, c("sum_rain_stage1","sum_rain_stage2","sum_rain_stage3" , "mean_rain_stage1", , "mean_rain_stage2", "mean_rain_stage3") := rainfall_sum(doy, rainfall, plant_date, stage1, stage2 , stage3), keyby = .(year, plant_date, mg)]
Моя (неполная) функция:
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
List rainfall_sum(NumericVector doy,
NumericVector rainfall,
double plant_date,
double stage1,
double stage2,
double stage3) {
double sum_rain_stage1(n);
double sum_rain_stage2(n);
double sum_rain_stage3(n);
double mean_rain_stage1(n);
double mean_rain_stage2(n);
double mean_rain_stage3(n);
# This part I do not know how to sum/average rainfall from plant_date to stage1, stage1 to stage2 and stage2 to stage3
}
return Rcpp::List::create(Rcpp::Named("sum_rain_stage1") = sum_rain_stage1,
Rcpp::Named("sum_rain_stage2") = sum_rain_stage2,
Rcpp::Named("sum_rain_stage3") = sum_rain_stage3,
Rcpp::Named("mean_rain_stage1") = mean_rain_stage1,
Rcpp::Named("mean_rain_stage2") = mean_rain_stage2,
Rcpp::Named("mean_rain_stage3") = mean_rain_stage3);
}