Я пытаюсь расширить свои данные на begyrs
и endyrs
. Данные, которые у меня есть, выглядят так:
pdpcols pdpvalues begyrs begyrsvalues gvkeys gvkeysvalues endyrs endyrsvalues
<chr> <int> <chr> <int> <chr> <int> <chr> <int>
1 pdpco1 138490 begyr1 1998 gvkey1 138490 endyr1 2001
2 pdpco1 138490 begyr1 1998 gvkey1 138490 endyr2 2005
3 pdpco1 138490 begyr2 2003 gvkey1 138490 endyr1 2001
4 pdpco1 138490 begyr2 2003 gvkey1 138490 endyr2 2005
5 pdpco2 6266 begyr1 1998 gvkey1 138490 endyr1 2001
6 pdpco2 6266 begyr1 1998 gvkey1 138490 endyr2 2005
7 pdpco2 6266 begyr2 2003 gvkey1 138490 endyr1 2001
8 pdpco2 6266 begyr2 2003 gvkey1 138490 endyr2 2005
9 pdpco1 138490 begyr1 1998 gvkey1 138490 endyr1 2001
10 pdpco1 138490 begyr1 1998 gvkey1 138490 endyr2 2005
Я хотел бы расширить данные таким образом, чтобы строки были расширены от begyrsvalues
до endyrsvalues
для каждого наблюдения. Таким образом, строка 1 будет расширена с 1998
до 2001
, а строка 2 - с 1998
до 2005
. Это создаст довольно много повторяющихся значений.
Я попытался group_by(gvkeysvalues)
, а затем попытался расширить их без удачи.
Данные:
data <- structure(list(source = c("MTCH1", "MTCH1", "MTCH1", "MTCH1",
"MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1",
"MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "m2006", "m2006",
"m2006", "m2006", "m2006", "m2006", "m2006", "m2006", "MTCH1",
"MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1",
"m2006", "m2006", "m2006", "m2006", "m2006", "m2006", "m2006",
"m2006", "m2006", "m2006", "m2006", "m2006", "m2006", "m2006",
"m2006", "m2006", "PASS1", "PASS1", "PASS1", "PASS1", "PASS1",
"PASS1", "PASS1", "PASS1", "PASS1"), id = c(8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 23L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 3786L, 3786L, 3786L, 3786L, 3786L, 3786L, 3786L, 3786L,
5432L), cod = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L
), .Label = c("01 Unassigned", "02 US corporation", "03 Foreign corp, incl. state-owned",
"04 US individual", "05 Foreign individual", "06 US government",
"07 Foreign government", "08 US local government", "09 US state government",
"US university", "Foreign university", "US institute", "Foreign institute",
"US hospital/med inst", "Foreign hospital/med inst"), class = "factor"),
cod_fix = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), pdpass = c(12233315L, 12233315L, 12233315L, 12233315L,
12233315L, 12233315L, 12233315L, 12233315L, 12233315L, 12233315L,
12233315L, 12233315L, 12233315L, 12233315L, 12233315L, 12233315L,
11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 11864228L,
11864228L, 11864228L, 12233315L, 12233315L, 12233315L, 12233315L,
12233315L, 12233315L, 12233315L, 12233315L, 11864228L, 11864228L,
11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 11864228L,
11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 11864228L,
11864228L, 11864228L, 11146104L, 11146104L, 11146104L, 11146104L,
11146104L, 11146104L, 11146104L, 11146104L, 11628725L), standard_name = c("3 DIMENSIONAL PHARM",
"3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM",
"3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM",
"3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC",
"3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC",
"3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC",
"3D LABS INC LTD", "3D LABS INC LTD", "3D LABS INC LTD",
"3D LABS INC LTD", "3D LABS INC LTD", "3D LABS INC LTD",
"3D LABS INC LTD", "3D LABS INC LTD", "3D PHARM INC", "3D PHARM INC",
"3D PHARM INC", "3D PHARM INC", "3D PHARM INC", "3D PHARM INC",
"3D PHARM INC", "3D PHARM INC", "3DLABS INC LED", "3DLABS INC LED",
"3DLABS INC LED", "3DLABS INC LED", "3DLABS INC LED", "3DLABS INC LED",
"3DLABS INC LED", "3DLABS INC LED", "3DLABS INC LTD", "3DLABS INC LTD",
"3DLABS INC LTD", "3DLABS INC LTD", "3DLABS INC LTD", "3DLABS INC LTD",
"3DLABS INC LTD", "3DLABS INC LTD", "CAMBRIDGE SOUNDWORKS INC",
"CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC",
"CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC",
"CAMBRIDGE SOUNDWORKS INC", "CREATIVE TECH LTD"), uspto_assignee = c(715707L,
715707L, 715707L, 715707L, 715707L, 715707L, 715707L, 715707L,
715707L, 715707L, 715707L, 715707L, 715707L, 715707L, 715707L,
715707L, 727325L, 727325L, 727325L, 727325L, 727325L, 727325L,
727325L, 727325L, 715707L, 715707L, 715707L, 715707L, 715707L,
715707L, 715707L, 715707L, 727325L, 727325L, 727325L, 727325L,
727325L, 727325L, 727325L, 727325L, 727325L, 727325L, 727325L,
727325L, 727325L, 727325L, 727325L, 727325L, 85595L, 85595L,
85595L, 85595L, 85595L, 85595L, 85595L, 85595L, 721579L),
pdpcols = c("pdpco1", "pdpco1", "pdpco1", "pdpco1", "pdpco2",
"pdpco2", "pdpco2", "pdpco2", "pdpco1", "pdpco1", "pdpco1",
"pdpco1", "pdpco2", "pdpco2", "pdpco2", "pdpco2", "pdpco1",
"pdpco1", "pdpco1", "pdpco1", "pdpco2", "pdpco2", "pdpco2",
"pdpco2", "pdpco1", "pdpco1", "pdpco1", "pdpco1", "pdpco2",
"pdpco2", "pdpco2", "pdpco2", "pdpco1", "pdpco1", "pdpco1",
"pdpco1", "pdpco2", "pdpco2", "pdpco2", "pdpco2", "pdpco1",
"pdpco1", "pdpco1", "pdpco1", "pdpco2", "pdpco2", "pdpco2",
"pdpco2", "pdpco1", "pdpco1", "pdpco1", "pdpco1", "pdpco2",
"pdpco2", "pdpco2", "pdpco2", "pdpco1"), pdpvalues = c(138490L,
138490L, 138490L, 138490L, 6266L, 6266L, 6266L, 6266L, 138490L,
138490L, 138490L, 138490L, 6266L, 6266L, 6266L, 6266L, 64005L,
64005L, 64005L, 64005L, 25773L, 25773L, 25773L, 25773L, 138490L,
138490L, 138490L, 138490L, 6266L, 6266L, 6266L, 6266L, 64005L,
64005L, 64005L, 64005L, 25773L, 25773L, 25773L, 25773L, 64005L,
64005L, 64005L, 64005L, 25773L, 25773L, 25773L, 25773L, 30060L,
30060L, 30060L, 30060L, 25773L, 25773L, 25773L, 25773L, 25773L
), begyrs = c("begyr1", "begyr1", "begyr2", "begyr2", "begyr1",
"begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2",
"begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1",
"begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2",
"begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1",
"begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2",
"begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1",
"begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2",
"begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1",
"begyr1", "begyr2", "begyr2", "begyr1"), begyrsvalues = c(1998L,
1998L, 2003L, 2003L, 1998L, 1998L, 2003L, 2003L, 1998L, 1998L,
2003L, 2003L, 1998L, 1998L, 2003L, 2003L, 1995L, 1995L, 2002L,
2002L, 1995L, 1995L, 2002L, 2002L, 1998L, 1998L, 2003L, 2003L,
1998L, 1998L, 2003L, 2003L, 1995L, 1995L, 2002L, 2002L, 1995L,
1995L, 2002L, 2002L, 1995L, 1995L, 2002L, 2002L, 1995L, 1995L,
2002L, 2002L, 1993L, 1993L, 1998L, 1998L, 1993L, 1993L, 1998L,
1998L, 1992L), gvkeys = c("gvkey1", "gvkey1", "gvkey1", "gvkey1",
"gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1",
"gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1",
"gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2",
"gvkey2", "gvkey2", "gvkey1", "gvkey1", "gvkey1", "gvkey1",
"gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey2", "gvkey2",
"gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2",
"gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2",
"gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2",
"gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey1"), gvkeysvalues = c(138490L,
138490L, 138490L, 138490L, 138490L, 138490L, 138490L, 138490L,
138490L, 138490L, 138490L, 138490L, 138490L, 138490L, 138490L,
138490L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L,
25773L, 25773L, 138490L, 138490L, 138490L, 138490L, 138490L,
138490L, 138490L, 138490L, 25773L, 25773L, 25773L, 25773L,
25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L,
25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L,
25773L, 25773L, 25773L, 25773L, 25773L), endyrs = c("endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1",
"endyr2", "endyr1"), endyrsvalues = c(2001L, 2005L, 2001L,
2005L, 2001L, 2005L, 2001L, 2005L, 2001L, 2005L, 2001L, 2005L,
2001L, 2005L, 2001L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L,
2005L, 2000L, 2005L, 2001L, 2005L, 2001L, 2005L, 2001L, 2005L,
2001L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L,
2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 2005L,
1997L, 2005L, 1997L, 2005L, 1997L, 2005L, 1997L, 2005L, 2006L
)), row.names = c(NA, -57L), class = c("tbl_df", "tbl", "data.frame"
))