Я использовал rbind
для объединения двух наборов данных в кадр данных 828 x 5, называемый вегетация . Вот пример моего текущего фрейма данных:
site year sos eos vegetation
EPIC_alligatorriver.csv 2016 176 301
EPIC_alligatorriver.csv 2018 164 291
Landsat_alligatorriver.csv 2016 170 303
Pheno_alligatorriver.csv 2017 152 288
Landsat_NEON.BART42.csv 2017 115 290
Pheno_NEON.BART42.csv 2017 120 290
alligatorriver.csv NA NA NA deciduous broadleaf
NEON.BART42.csv NA NA NA mixed forest
Вот что мне нужно
site year sos eos vegetation
EPIC_alligatorriver.csv 2016 176 301 deciduous broadleaf
EPIC_alligatorriver.csv 2018 164 291 deciduous broadleaf
Landsat_alligatorriver.csv 2016 170 303 deciduous broadleaf
Pheno_alligatorriver.csv 2017 152 288 deciduous broadleaf
Landsat_NEON.BART42.csv 2017 115 290 mixed forest
Pheno_NEON.BART42.csv 2017 120 290 mixed forest
alligatorriver.csv NA NA NA deciduous broadleaf
NEON.BART42.csv NA NA NA mixed forest
По сути, мне нужно заполнить пустой столбец растительности, сопоставляя правильный тип растительности на основе на имя сайта.
Я также могу удалить rbind и хранить все данные о растительности в совершенно другом двухстолбцовом фрейме данных
site vegetation
alligatorriver.csv deciduous broadleaf
konza.csv grassland
merbleue.csv wetland
NEON.BART42.csv mixed forest
, если это облегчает задачу. Всего существует 7 типов растительности и 99 участков за трехлетний период (2016, 2017, 2018). Любая помощь приветствуется!
df1 (partial): structure(list(site = c("EPIC_alligatorriver.csv", "EPIC_alligatorriver.csv",
"EPIC_alligatorriver.csv", "EPIC_arbutuslakeinlet.csv", "EPIC_arbutuslakeinlet.csv",
"EPIC_arbutuslakeinlet.csv", "EPIC_archboldavir.csv", "EPIC_archboldavir.csv",
"EPIC_archboldavir.csv", "EPIC_archboldavirx.csv"), year = c(2016L, 2017L, 2018L, 2016L, 2017L, 2018L, 2016L,
2017L, 2018L, 2016L), sos = c(117, 111, 122, 147, 145, 144, 98, 156,
114, 98), eos = c(294, 294, 274, 276, 271, 274, 315, 295, 307, 315), vegetation = c("", "", "", "", "", "", "", "", "", "")), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "data.frame")
df2: structure(list(vegetation = structure(c(2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 6L, 3L, 2L, 4L, 2L, 2L, 3L, 4L, 2L, 3L, 2L,
1L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 6L, 6L, 4L, 1L, 4L, 1L, 4L, 2L,
3L, 2L, 4L, 7L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 2L, 4L, 7L, 2L,
4L, 2L, 2L, 5L, 2L, 4L, 2L, 5L, 2L, 5L, 2L, 6L, 2L, 5L, 3L, 2L,
2L, 5L, 2L, 4L, 4L, 2L, 4L, 2L, 2L, 2L, 1L, 2L, 2L, 4L, 6L, 2L,
1L, 4L, 2L, 1L, 3L, 2L, 2L, 4L, 3L, 2L, 3L, 2L, 2L), .Label = c("agriculture",
"deciduous broadleaf", "evergreen needlefeaf", "grassland", "mixed forest",
"shrub", "wetland"), class = "factor"), site = structure(1:99, .Label = c("alligatorriver",
"arbutuslakeinlet", "archboldavir", "archboldavirx", "archboldpnot",
"archboldpnotx", "arsmnswanlake1", "ashburnham", "bartlettir",
"bostoncommon", "bullshoals", "burnssagebrush", "canadaOBS",
"caryinstitute", "cperuvb", "downerwoods", "dukehw", "goodnow",
"grandteton", "harvard", "harvardbarn", "harvardbarn2", "harvardfarmsouth",
"harvardhemlock", "harvardhemlock2", "harvardlph", "howland1",
"howland2", "hubbardbrook", "huyckpreserveny", "jerbajada", "jernort",
"kansas", "kelloggcorn", "kendall", "kingmanfarm", "konza", "lacclair",
"laclaflamme", "laurentides", "lethbridge", "lostcreek", "luckyhills",
"mandanh5", "mandani2", "mead1", "mead2", "mead3", "meadpasture",
"merbleue", "missouriozarks", "montebondonegrass", "montebondonepeat",
"morganmonroe", "nationalelkrefuge", "ncssm", "NEON.BART33",
"NEON.BART42", "NEON.DELA33", "NEON.DSNY33", "NEON.HARV33", "NEON.HARV42",
"NEON.JERC33", "NEON.JERC42", "NEON.LENO33", "NEON.ONAQ33", "NEON.ORNL33",
"NEON.ORNL42", "NEON.RMNP33", "NEON.SERC33", "NEON.TREE33", "NEON.TREE42",
"NEON.UNDE33", "NEON.WOOD33", "ninemileprairie", "northattkeboroma",
"oakville", "proctor", "queens", "readingma", "rosemountnprs",
"russellsage", "sanford", "sevilletagrass", "sevilletashrub",
"shalehillsczo", "southerngreatplains", "stjones", "sweetbriar",
"sweetbriargrass", "sylvania", "tonzi", "umichbiological2", "usgseros",
"usmpj", "uwmfieldsta", "warrenwilson", "willowcreek", "worcester"
), class = "factor")), class = "data.frame", row.names = c(NA,
-99L))