Я сгенерировал примерный набор данных, который, по моему мнению, должен воспроизвести необходимые условия.Пожалуйста, сообщите, если не так.
Я использовал dplyr для выполнения большинства функций:
# load required libraries
library(magrittr)
library(dplyr)
# generate sample data
pilot_clean <-
base::data.frame(
ppid = base::c(base::rep(1,15), base::rep(2,15), base::rep(3,15))
, trialn = base::c(base::rep(1:3,15))
, SWA = base::sample(base::seq(0.00,0.02, by = .001), 45, replace = T)
) %>%
dplyr::arrange(ppid,trialn) %>%
dplyr::mutate(timestamp = base::sort(stats::runif(45,min=5, max=125)))
# set threshold
SWA_threshold = 0.01
# force null condition
pilot_clean[pilot_clean$ppid == 3 & pilot_clean$trialn == 3,"SWA"] <- SWA_threshold - .001
# determine first time in each ppid, trialn group
pilot_clean_first_time <-
pilot_clean %>%
dplyr::group_by(ppid,trialn) %>%
dplyr::filter(dplyr::row_number() == 1) %>%
dplyr::ungroup() %>%
dplyr::transmute(ppid, trialn, first_timestamp = timestamp) #use transmute to rename for future join, ungroup first to allow for column rename of grouping variable
# determine first time in each ppid, trialn group above threshold
pilot_clean_first_time_above_threshold <-
pilot_clean %>%
dplyr::group_by(ppid,trialn) %>%
dplyr::filter(SWA > SWA_threshold) %>%
dplyr::filter(dplyr::row_number() == 1) %>%
dplyr::ungroup() %>%
dplyr::transmute(ppid, trialn, first_timestamp_above_threshold = timestamp) #use transmute to rename for future join, ungroup first to allow for column rename of grouping variable
# get unique list of ppid and trialn (to enable left join and null condition)
pilot_ppid_trial_list <-
pilot_clean %>%
dplyr::select(ppid,trialn) %>%
unique()
# produce final result set with ppid, trialn, first time, and first time above threshold
pilot_clean_new <-
pilot_ppid_trial_list %>%
dplyr::left_join(pilot_clean_first_time) %>%
dplyr::left_join(pilot_clean_first_time_above_threshold) %>%
dplyr::mutate(adjusted_first_timestamp_above_threshold = first_timestamp_above_threshold - first_timestamp) # calculate final result