У меня есть некоторые данные, которые выглядят так:
begyr1 gvkey1 endyr1 pdpco2 begyr2 gvkey2 endyr2 pdpco3 begyr3 gvkey3 endyr3 pdpco4 begyr4 gvkey4 endyr4
1982 10537 1999 NA NA NA NA NA NA NA NA NA NA NA NA
1995 63658 1999 NA NA NA NA NA NA NA NA NA NA NA NA
1961 9236 1996 NA NA NA NA NA NA NA NA NA NA NA NA
1990 101352 2006 NA NA NA NA NA NA NA NA NA NA NA NA
1963 8150 2005 NA NA NA NA NA NA NA NA NA NA NA NA
1961 9967 1988 NA NA NA NA NA NA NA NA NA NA NA NA
1973 5578 2004 NA NA NA NA NA NA NA NA NA NA NA NA
1959 7946 1991 NA NA NA NA NA NA NA NA NA NA NA NA
1989 24467 1995 NA NA NA NA NA NA NA NA NA NA NA NA
1994 62471 2002 NA NA NA NA NA NA NA NA NA NA NA NA
1965 8783 1995 29217 1996 29217 2005 NA NA NA NA NA NA NA NA
1954 4521 1983 14572 1987 14572 1987 NA NA NA NA NA NA NA NA
1966 2919 1988 9411 1989 9411 1995 NA NA NA NA NA NA NA NA
1969 6785 1996 7985 1997 7985 2005 NA NA NA NA NA NA NA NA
1995 65578 2004 5180 2005 5180 2006 NA NA NA NA NA NA NA NA
1985 11904 1998 10787 1999 10787 2005 NA NA NA NA NA NA NA NA
1961 9967 1988 2403 1989 2403 2006 NA NA NA NA NA NA NA NA
1981 8658 1999 3336 2000 3336 2005 NA NA NA NA NA NA NA NA
1950 6096 1999 10787 2000 10787 2005 NA NA NA NA NA NA NA NA
1980 2901 1990 3011 1991 3011 2005 NA NA NA NA NA NA NA NA
Столбцы:
begyr[x]
= год начала
gvkey[x]
= ключ ID
endyr[x]
= год окончания
Итак, строка 11
в данных будет выглядеть так:
Строка:
begyr1 gvkey1 endyr1 pdpco2 begyr2 gvkey2 endyr2
1965 8783 1995 29217 1996 29217 2005
1965 8783 29217
1966 8783 29217
1967 8783 29217
1968 8783 29217
...
1993 8783 29217
1994 8783 29217
1995 8783 29217
...
1996 29217 29217
1997 29217 29217
1998 29217 29217
...
2004 29217 29217
2005 29217 29217
Я следил за документ, в котором они предоставляют код STATA для обработки этой части процесса. Из того, что я понимаю, авторы определяют gvkey
как NULL, а затем строят простой forl oop по столбцам.
gen gvkey=.
forvalue i=1/5 {
replace gvkey = gvkey`i' if gvkey`i'~=. & year>=begyr`i' &
year<=endyr`i'
}
keep if gvkey~=.
Данные:
data <- structure(list(begyr1 = c(1982L, 1995L, 1961L, 1990L, 1963L,
1961L, 1973L, 1959L, 1989L, 1994L, 1965L, 1954L, 1966L, 1969L,
1995L, 1985L, 1961L, 1981L, 1950L, 1980L, NA, 1950L, NA, 1950L,
NA, NA, NA, NA, NA, NA, 1950L, 1975L, 1960L, 1991L, 1961L, 1977L,
1977L, 1995L, 1977L, 1991L, 1950L, 1950L, 1950L, 1960L, 1968L,
1950L, 1968L, 1986L, 1950L, 1950L, NA, 1984L, NA, NA, NA, NA,
NA, NA, NA, NA), gvkey1 = c(10537L, 63658L, 9236L, 101352L, 8150L,
9967L, 5578L, 7946L, 24467L, 62471L, 8783L, 4521L, 2919L, 6785L,
65578L, 11904L, 9967L, 8658L, 6096L, 2901L, NA, 3650L, NA, 1300L,
NA, NA, NA, NA, NA, NA, 5245L, 9844L, 3167L, 25723L, 10633L,
10329L, 10329L, 66034L, 9226L, 25723L, 2827L, 2827L, 2827L, 7686L,
8829L, 2827L, 8829L, 13571L, 2827L, 7099L, NA, 11907L, NA, NA,
NA, NA, NA, NA, NA, NA), endyr1 = c(1999L, 1999L, 1996L, 2006L,
2005L, 1988L, 2004L, 1991L, 1995L, 2002L, 1995L, 1983L, 1988L,
1996L, 2004L, 1998L, 1988L, 1999L, 1999L, 1990L, NA, 2006L, NA,
2006L, NA, NA, NA, NA, NA, NA, 1987L, 1980L, 1988L, 1997L, 1986L,
1996L, 1996L, 2000L, 1983L, 1997L, 1985L, 1985L, 1985L, 1981L,
1985L, 1985L, 1985L, 1987L, 1985L, 1995L, NA, 2006L, NA, NA,
NA, NA, NA, NA, NA, NA), pdpco2 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 29217L, 14572L, 9411L, 7985L, 5180L, 10787L, 2403L,
3336L, 10787L, 3011L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
61132L, 2146L, 24969L, 4140L, 14641L, 3282L, 3282L, 10530L, 6066L,
4140L, 162254L, 162254L, 162254L, 28478L, 13333L, 162254L, 13333L,
4781L, 162254L, 100528L, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), begyr2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1996L,
1987L, 1989L, 1997L, 2005L, 1999L, 1989L, 2000L, 2000L, 1991L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1995L, 1981L, 1991L,
1998L, 1988L, 1997L, 1997L, 2002L, 1984L, 1998L, 1987L, 1987L,
1987L, 1983L, 1986L, 1987L, 1986L, 1988L, 1987L, 1997L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), gvkey2 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 29217L, 14572L, 9411L, 7985L, 5180L, 10787L,
2403L, 3336L, 10787L, 3011L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 61132L, 2145L, 24969L, 4140L, 14641L, 3282L, 3282L, 10530L,
6066L, 4140L, 13934L, 13934L, 13934L, 5555L, 13333L, 13934L,
13333L, 4781L, 13934L, 100528L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), endyr2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2005L,
1987L, 1995L, 2005L, 2006L, 2005L, 2006L, 2005L, 2005L, 2005L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1998L, 1983L, 1997L,
2003L, 1988L, 2001L, 2001L, 2003L, 1993L, 2003L, 1996L, 1996L,
1996L, 1991L, 1996L, 1996L, 1996L, 1990L, 1996L, 1998L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), pdpco3 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 13197L, 2146L, 1278L, 29685L,
4735L, 5606L, 5606L, 7883L, 19349L, 29685L, 162254L, 162254L,
162254L, 28478L, 6096L, 162254L, 6096L, 3586L, 162254L, 157415L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), begyr3 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1999L, 1984L, 1998L,
2004L, 1989L, 2002L, 2002L, 2004L, 1994L, 2004L, 1998L, 1998L,
1998L, 1992L, 1997L, 1998L, 1997L, 1991L, 1998L, 2000L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), gvkey3 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 13197L, 2146L, 1278L, 29685L,
4735L, 5606L, 5606L, 7883L, 19349L, 29685L, 125434L, 125434L,
125434L, 28478L, 6096L, 125434L, 6096L, 3586L, 125434L, 148971L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), endyr3 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1999L, 2005L, 1999L,
2004L, 2003L, 2005L, 2005L, 2005L, 2005L, 2004L, 2002L, 2002L,
2002L, 1997L, 1999L, 2002L, 1999L, 1995L, 2002L, 2000L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), pdpco4 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 162254L, 162254L, 162254L, 14385L, 10787L, 162254L,
10787L, 12679L, 162254L, 157415L, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), begyr4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2003L,
2003L, 2003L, 1998L, 2000L, 2003L, 2000L, 1996L, 2003L, 2001L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), gvkey4 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 162254L, 162254L, 162254L, 14385L, 10787L,
162254L, 10787L, 12679L, 162254L, 157415L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), endyr4 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
2006L, 2006L, 2006L, 2005L, 2005L, 2006L, 2005L, 2005L, 2006L,
2005L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-60L), class = "data.frame")