Я наткнулся на похожую проблему и написал функцию.
#df needs to be a structured balanced paneldata set sorted by id and date
#OBS the function deletes the row where the NA value would have been.
df <- data.frame(id = c(1, 1, 1, 1, 1, 2, 2,2,2,2),
date = c(1992, 1993, 1991, 1990, 1994, 1992, 1991
,1994,1990,1993),
value = c(4.1, 4.5, 3.3, 5.3, 3.0, 3.2, 5.2,5.3,3.4,5.6))
# sort paneldata set
library(dplyr)
df<-arrange(df,id,date)
#Function
# a=df
# b=colname of variable/variables that you want to lag
# q=number of lag years
# t=colname of date/time column
retraso<-function(a,b,q,t){
sto<-max(as.numeric(unique(a[[t]])))
sta<-min(as.numeric(unique(a[[t]])))
yo<-a[which(a[[t]]>=(sta+q)),]
la<-function(a,d,t,sto,sta){
ja<-data.frame(a[[d]],a[[t]])
colnames(ja)<-c(d,t)
ja<-ja[which(ja[[t]]<=(sto-q)),1]
return(ja)
}
for (i in 1:length(b)){
yo[[b[i]]] <-la(a,b[i],t,sto,sta)
}
return(yo)
}
#lag df 1 year
df<-retraso(df,"value",1,"date")