Я пытаюсь выяснить, как изменить набор данных названий политических партий с широких на длинные, используя dplyr
и pivot_longer
.
Для каждого Party_ID
существует ряд добавлены постоянные столбцы (Party_Name_Short, Party_Name, Country, Party_in_orig_title)
, а также ряд изменяющихся во времени факторов: election, Date, Rename, Reason, Party_Title, alliance, member_parties, split, parent_party, merger, child_party, successor, predecessor
. Коэффициенты изменения времени регистрировались до 11 раз для каждой партии, что отражается в индексе в названии.
Чтобы предоставить образец, я выбрал первые три столбца, меняющих время, для каждой партии и образец 5 случайных строк:
structure(list(Party_Name_Short = c("LZJ-PS", "ZiZi", "MNR",
"MDP", "E200"), Party_Name = c("Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200"), Country = c("SVN", "HRV", "FRA", "PRT",
"EST"), Party_ID = c(1987, 2612, 1263, 1281, 2720), Party_in_orig_title = c(0,
0, 0, 0, 0), Date1 = c(2011, NA, 1999, 1987, NA), Rename1 = c("Lista Zorana Jankovica – Pozitivna Slovenija",
NA, "Mouvement national républicain", "ID", NA), Reason1 = c("foundation",
NA, "split from FN", "split", NA), Party_Title1 = c(0, NA, 0,
0, NA), alliance1 = c(0, NA, 0, 0, NA), member_parties1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
split1 = c(0, NA, 1, 1, NA), parent_party1 = c(NA, NA, "FN",
"MDP", NA), merger1 = c(0, NA, 0, 0, NA), child_party1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), successor1 = c(0, NA, 0, 0, NA), predecessor1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), Date2 = c(2012, NA, NA, NA, NA), Rename2 = c("Pozitivna Slovenija",
NA, NA, NA, NA), Reason2 = c("renamed", NA, NA, NA, NA),
Party_Title2 = c(0, NA, NA, NA, NA), alliance2 = c(0, NA,
NA, NA, NA), member_parties2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), split2 = c(0,
NA, NA, NA, NA), parent_party2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), merger2 = c(0,
NA, NA, NA, NA), child_party2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), successor2 = c(0,
NA, NA, NA, NA), predecessor2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Date3 = c(2014,
NA, NA, NA, NA), Rename3 = c("ZaAB", NA, NA, NA, NA), Reason3 = c("split",
NA, NA, NA, NA), Party_Title3 = c(0, NA, NA, NA, NA), alliance3 = c(0,
NA, NA, NA, NA), member_parties3 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), split3 = c(1,
NA, NA, NA, NA), parent_party3 = c("LZJ-PS", NA, NA, NA,
NA), merger3 = c(0, NA, NA, NA, NA), child_party3 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), successor3 = c(0, NA, NA, NA, NA), predecessor3 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), election1 = structure(c(15309, 16740, 11839, 6390, 17956
), class = "Date"), election2 = structure(c(16252, NA, NA,
NA, NA), class = "Date"), election3 = structure(c(16344,
NA, NA, NA, NA), class = "Date")), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
Я бы хотел, чтобы данные следовали "длинной" структуре, в которой каждый party_id и постоянные коэффициенты повторяются 11 раз, и есть отдельные столбцы для факторов, изменяющих время. После ответа с самым высоким рейтингом, сформулированного здесь Я попробовал различные варианты следующей команды:
pivot_longer(cols = starts_with(c("election", "Date", "Rename", "Reason", "Party_Title",
"alliance", "member_parties", "split", "parent_party",
"merger", "child_party", "successor", "predecessor")),
names_to = c(".value", "election", "Date", "Rename", "Reason", "Party_Title",
"alliance", "member_parties", "split", "parent_party",
"merger", "child_party", "successor", "predecessor"), names_sep = "_") %>%
select(-matches("election[1-9]"), -matches("Date[1-9]"), -matches("Rename[1-9]"),
-matches("Reason[1-9]"), -matches("alliance[1-9]"), -matches("member_parties[1-9]"),
-matches("split[1-9]"), -matches("parent_party[1-9]"), -matches("merger[1-9]"),
-matches("child_party[1-9]"), -matches("successor[1-9]"), -matches("predecessor[1-9]"),
-matches("Party_Title[1-9]"), -matches("election1[0-2]"), -matches("Date1[0-2]"), -matches("Rename1[0-2]"),
-matches("Reason1[0-2]"), -matches("alliance1[0-2]"), -matches("member_parties1[0-2]"),
-matches("split1[0-2]"), -matches("parent_party1[0-2]"), -matches("merger1[0-2]"),
-matches("child_party1[0-2]"), -matches("successor1[0-2]"), -matches("predecessor1[0-2]"),
-matches("Party_Title1[0-2]"))
Однако по некоторым причинам я получаю много пропущенных значений и не достигаю форма данных, которые я хотел бы иметь. Буду признателен за любую подсказку, если у вас есть идея, как это сделать. Спасибо!
Обновление :
Я бы хотел, чтобы окончательный результат выглядел примерно так:
structure(list(Party_Name_Short = c("LZJ-PS", "ZiZi", "MNR",
"MDP", "E200", "LZJ-PS", "ZiZi", "MNR", "MDP", "E200", "LZJ-PS",
"ZiZi", "MNR", "MDP", "E200"), Party_Name = c("Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200", "Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200", "Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200"), Country = c("SVN", "HRV", "FRA", "PRT",
"EST", "SVN", "HRV", "FRA", "PRT", "EST", "SVN", "HRV", "FRA",
"PRT", "EST"), Party_ID = c(1987, 2612, 1263, 1281, 2720, 1987,
2612, 1263, 1281, 2720, 1987, 2612, 1263, 1281, 2720), Party_in_orig_title = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), time = c(1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3), Date = c(2011, NA, 1999,
1987, NA, 2012, NA, NA, NA, NA, 2014, NA, NA, NA, NA), Rename = c("Lista Zorana Jankovica – Pozitivna Slovenija",
NA, "Mouvement national républicain", "ID", NA, "Pozitivna Slovenija",
NA, NA, NA, NA, "ZaAB", NA, NA, NA, NA), Reason = c("foundation",
NA, "split from FN", "split", NA, "renamed", NA, NA, NA, NA,
"split", NA, NA, NA, NA), Party_Title = c(0, NA, 0, 0, NA, 0,
NA, NA, NA, NA, 0, NA, NA, NA, NA), alliance = c(0, NA, 0, 0,
NA, 0, NA, NA, NA, NA, 0, NA, NA, NA, NA), member_parties = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), split = c(0,
NA, 1, 1, NA, 0, NA, NA, NA, NA, 1, NA, NA, NA, NA), parent_party = c(NA,
NA, "FN", "MDP", NA, NA, NA, NA, NA, NA, "LZJ-PS", NA, NA, NA,
NA), merger = c(0, NA, 0, 0, NA, 0, NA, NA, NA, NA, 0, NA, NA,
NA, NA), child_party = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), successor = c(0, NA, 0, 0, NA, 0, NA,
NA, NA, NA, 0, NA, NA, NA, NA), predecessor = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), election = structure(c(1322697600,
1446336000, 1022889600, 552096000, 1551398400, 1404172800, NA,
NA, NA, NA, 1412121600, NA, NA, NA, NA), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -15L), class = c("tbl_df",
"tbl", "data.frame"))
Примечание: недавно добавленные time
обратите внимание, что это только для примера, с тремя изменяющимися во времени факторами, тогда как на самом деле в данных 11.