Я работаю в колл-центре, и эти данные - все соответствующие данные для нашей системы автодозвона. Начиная с 2019 года наша компания запустила несколько кампаний, чтобы охватить участников множеством возможных рецептов вместо 1.
В настоящее время мы сталкиваемся с тем, что у члена X будет потенциально 6 рецептов, с которыми мы пытаемся связаться с ними, и зачастую они дублируются. Отдел, собирающий все эти данные, просматривает столбцы IDCARD_MBR_ID и TARGET_DOC_ для уникальных значений, не обращая внимания на название рецептурного препарата.
Моя цель - удалить дубликаты отпускаемых по рецепту лекарств. и пополнить в любом свободном месте
Порядок приема лекарств также не всегда одинаков. Краткий пример этого может выглядеть следующим образом:
H0000000001 (member data columns) Target1, Target2
H0000000001 (same member data) Target3, Target1, Target2
я бы хотел видеть:
H0000000001 (member data columns) Target1, Target2,Target3
Я уже обработал свои данные с помощью сбора, удалил дубликаты, перенумеровал свои цели 1-6 (чтобы я мог создать новую строку, если найдено> 3) и распространил ее обратно. Моя проблема с моим распространением .. У меня есть нечто, похожее на это:
structure(list(IDCARD_MBR_ID = c("H0000000001", "H0000000001",
"H0000000001", "H0000000002", "H0000000002", "H0000000002", "H0000000003",
"H0000000003", "H0000000003", "H0000000004", "H0000000004", "H0000000004",
"H0000000005", "H0000000005", "H0000000005", "H0000000006", "H0000000006",
"H0000000006", "H0000000007", "H0000000007", "H0000000007", "H0000000008",
"H0000000008", "H0000000008", "H0000000009", "H0000000009", "H0000000009",
"H0000000010", "H0000000010", "H0000000010", "H0000000011", "H0000000011",
"H0000000011", "H0000000012", "H0000000012", "H0000000012"),
SDR_PERSON_ID = c("ID000000001", "ID000000001", "ID000000001",
"ID000000002", "ID000000002", "ID000000002", "ID000000003",
"ID000000003", "ID000000003", "ID000000004", "ID000000004",
"ID000000004", "ID000000005", "ID000000005", "ID000000005",
"ID000000006", "ID000000006", "ID000000006", "ID000000007",
"ID000000007", "ID000000007", "ID000000008", "ID000000008",
"ID000000008", "ID000000009", "ID000000009", "ID000000009",
"ID000000010", "ID000000010", "ID000000010", "ID000000011",
"ID000000011", "ID000000011", "ID000000012", "ID000000012",
"ID000000012"), FIRST_NAME = c("First1", "First1", "First1",
"First2", "First2", "First2", "First3", "First3", "First3",
"First4", "First4", "First4", "First5", "First5", "First5",
"First6", "First6", "First6", "First7", "First7", "First7",
"First8", "First8", "First8", "First9", "First9", "First9",
"First10", "First10", "First10", "First11", "First11", "First11",
"First12", "First12", "First12"), LAST_NAME = c("Last1",
"Last1", "Last1", "Last2", "Last2", "Last2", "Last3", "Last3",
"Last3", "Last4", "Last4", "Last4", "Last5", "Last5", "Last5",
"Last6", "Last6", "Last6", "Last7", "Last7", "Last7", "Last8",
"Last8", "Last8", "Last9", "Last9", "Last9", "Last10", "Last10",
"Last10", "Last11", "Last11", "Last11", "Last12", "Last12",
"Last12"), GENDER = c("F", "F", "F", "M", "M", "M", "M",
"M", "M", "F", "F", "F", "F", "F", "F", "F", "F", "F", "M",
"M", "M", "F", "F", "F", "F", "F", "F", "M", "M", "M", "F",
"F", "F", "F", "F", "F"), DOB = c("1901-01-01", "1901-01-01",
"1901-01-01", "1901-01-02", "1901-01-02", "1901-01-02", "1901-01-03",
"1901-01-03", "1901-01-03", "1901-01-04", "1901-01-04", "1901-01-04",
"1901-01-05", "1901-01-05", "1901-01-05", "1901-01-06", "1901-01-06",
"1901-01-06", "1901-01-07", "1901-01-07", "1901-01-07", "1901-01-08",
"1901-01-08", "1901-01-08", "1901-01-09", "1901-01-09", "1901-01-09",
"1901-01-10", "1901-01-10", "1901-01-10", "1901-01-11", "1901-01-11",
"1901-01-11", "1901-01-12", "1901-01-12", "1901-01-12"),
ADDRESS1 = c("123 Testing Way", "123 Testing Way", "123 Testing Way",
"456 Testing Way", "456 Testing Way", "456 Testing Way",
"789 Testing Way", "789 Testing Way", "789 Testing Way",
"1122 Testing Way", "1122 Testing Way", "1122 Testing Way",
"1455 Testing Way", "1455 Testing Way", "1455 Testing Way",
"1788 Testing Way", "1788 Testing Way", "1788 Testing Way",
"2121 Testing Way", "2121 Testing Way", "2121 Testing Way",
"2454 Testing Way", "2454 Testing Way", "2454 Testing Way",
"2787 Testing Way", "2787 Testing Way", "2787 Testing Way",
"3120 Testing Way", "3120 Testing Way", "3120 Testing Way",
"3453 Testing Way", "3453 Testing Way", "3453 Testing Way",
"3786 Testing Way", "3786 Testing Way", "3786 Testing Way"
), ADDRESS2 = c(NA, NA, NA, NA, NA, NA, "APT 302", "APT 302",
"APT 302", "APT 225", "APT 225", "APT 225", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), CITY = c("CLARKSVILLE", "CLARKSVILLE",
"CLARKSVILLE", "MONROE", "MONROE", "MONROE", "LOS ANGELES",
"LOS ANGELES", "LOS ANGELES", "TULSA", "TULSA", "TULSA",
"RED BUD", "RED BUD", "RED BUD", "MISSION VIEJO", "MISSION VIEJO",
"MISSION VIEJO", "MILLCREEK", "MILLCREEK", "MILLCREEK", "COLUMBIA",
"COLUMBIA", "COLUMBIA", "HOUSTON", "HOUSTON", "HOUSTON",
"APOPKA", "APOPKA", "APOPKA", "SEALY", "SEALY", "SEALY",
"SEWARD", "SEWARD", "SEWARD"), ST = c("IN", "IN", "IN", "MI",
"MI", "MI", "CA", "CA", "CA", "OK", "OK", "OK", "IL", "IL",
"IL", "CA", "CA", "CA", "UT", "UT", "UT", "MO", "MO", "MO",
"TX", "TX", "TX", "FL", "FL", "FL", "TX", "TX", "TX", "NE",
"NE", "NE"), ZIP = c(47129L, 47129L, 47129L, 48162L, 48162L,
48162L, 90028L, 90028L, 90028L, 74136L, 74136L, 74136L, 62278L,
62278L, 62278L, 92692L, 92692L, 92692L, 84124L, 84124L, 84124L,
65201L, 65201L, 65201L, 77057L, 77057L, 77057L, 32703L, 32703L,
32703L, 77474L, 77474L, 77474L, 68434L, 68434L, 68434L),
EMAIL_ADDRESS = c("Test-mail@test.com", "Test-mail@test.com",
"Test-mail@test.com", NA, NA, NA, NA, NA, NA, NA, NA, NA,
"Test-mail@test.com", "Test-mail@test.com", "Test-mail@test.com",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Test-mail@test.com",
"Test-mail@test.com", "Test-mail@test.com", NA, NA, NA, NA,
NA, NA), PHONE1 = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L), ARG_CLIENT_NUM = c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L), FORMULARYID = c(19430L, 19430L, 19431L,
19431L, 19431L, 19430L, 19430L, 19430L, 19430L, 19430L, 19430L,
19430L, 19430L, 19430L, 19430L, 19431L, 19431L, 19431L, 19430L,
19430L, 19431L, 19431L, 19431L, 19430L, 19430L, 19430L, 19431L,
19430L, 19430L, 19430L, 19430L, 19430L, 19430L, 19430L, 19430L,
19431L), CAMPAIGN_TYPE = c("PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD"), LIS_INDICATOR = c("N",
"N", "N", "N", "N", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "N", "N", "N", "N", "Y", "Y", "N", "N", "N", "N", "N",
"N", "N", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y", "N"), MONTH = c("February / DIALER",
"February / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "February / DIALER",
"February / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "February / DIALER", "January / DIALER",
"January / DIALER", "February / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "February / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER"), Calltype1 = c("Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings"), Calltype2 = c("Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings"), Calltype3 = c("Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings"), TARGET_DOC_1 = c("A000000000025001",
NA, NA, "A000000000026001", NA, NA, "A000000000027001", NA,
NA, "A000000000028001", NA, NA, "A000000000029001", NA, NA,
"A000000000030001", NA, NA, "A000000000031001", NA, NA, "A000000000032001",
NA, NA, "A000000000033001", NA, NA, "A000000000034001", NA,
NA, "A000000000035001", NA, NA, "A000000000036001", NA, NA
), TARGET_DOC_2 = c(NA, "A000000000013001", NA, NA, "A000000000014001",
NA, NA, "A000000000015001", NA, NA, "A000000000016001", NA,
NA, "A000000000017001", NA, NA, "A000000000018001", NA, NA,
"A000000000019001", NA, NA, "A000000000020001", NA, NA, "A000000000021001",
NA, NA, "A000000000022001", NA, NA, "A000000000023001", NA,
NA, "A000000000024001", NA), TARGET_DOC_3 = c(NA, NA, "A000000000001001",
NA, NA, "A000000000002001", NA, NA, "A000000000003001", NA,
NA, "A000000000004001", NA, NA, "A000000000005001", NA, NA,
"A000000000006001", NA, NA, "A000000000007001", NA, NA, "A000000000008001",
NA, NA, "A000000000009001", NA, NA, "A000000000010001", NA,
NA, "A000000000011001", NA, NA, "A000000000012001"), TARGET_DRUG_ID_1 = c(57237014401,
NA, NA, 65862058301, NA, NA, 57237014301, NA, NA, 68180075701,
NA, NA, 65862058401, NA, NA, 55111033901, NA, NA, 65862058201,
NA, NA, 65862058405, NA, NA, 65862058601, NA, NA, 68180075801,
NA, NA, 65862058601, NA, NA, 68180076001, NA, NA), TARGET_DRUG_ID_2 = c(NA,
57237014401, NA, NA, 65862058301, NA, NA, 57237014301, NA,
NA, 68180075701, NA, NA, 65862058401, NA, NA, 55111033901,
NA, NA, 65862058201, NA, NA, 65862058405, NA, NA, 65862058601,
NA, NA, 68180075801, NA, NA, 65862058601, NA, NA, 68180076001,
NA), TARGET_DRUG_ID_3 = c(NA, NA, 65862058701, NA, NA, 65862058701,
NA, NA, 55111034001, NA, NA, 65862058601, NA, NA, 65862058701,
NA, NA, 55111034001, NA, NA, 65862058401, NA, NA, 65862058405,
NA, NA, 68180047301, NA, NA, 68180075801, NA, NA, 65862058701,
NA, NA, 65862058301), TARGET_NAME_1 = c("Target11", NA, NA,
"Target12", NA, NA, "Target13", NA, NA, "Target14", NA, NA,
"Target15", NA, NA, "Target16", NA, NA, "Target17", NA, NA,
"Target18", NA, NA, "Target19", NA, NA, "Target20", NA, NA,
"Target21", NA, NA, "Target22", NA, NA), TARGET_NAME_2 = c(NA,
"Target21", NA, NA, "Target22", NA, NA, "Target23", NA, NA,
"Target24", NA, NA, "Target25", NA, NA, "Target26", NA, NA,
"Target27", NA, NA, "Target28", NA, NA, "Target29", NA, NA,
"Target30", NA, NA, "Target31", NA, NA, "Target32", NA),
TARGET_NAME_3 = c(NA, NA, "Target31", NA, NA, "Target32",
NA, NA, "Target33", NA, NA, "Target34", NA, NA, "Target35",
NA, NA, "Target36", NA, NA, "Target37", NA, NA, "Target38",
NA, NA, "Target39", NA, NA, "Target40", NA, NA, "Target41",
NA, NA, "Target42")), class = "data.frame", row.names = c(NA,
-36L))}
Каждый набор целей находится на отдельной линии ... и я не могу понять, как их свернуть и удалить NA, чтобы я получил результат, который выглядит следующим образом:
structure(list(IDCARD_MBR_ID = c("H0000000001", "H0000000002",
"H0000000003", "H0000000004", "H0000000005", "H0000000006", "H0000000007",
"H0000000008", "H0000000009", "H0000000010", "H0000000011", "H0000000012"
), SDR_PERSON_ID = c("ID000000001", "ID000000002", "ID000000003",
"ID000000004", "ID000000005", "ID000000006", "ID000000007", "ID000000008",
"ID000000009", "ID000000010", "ID000000011", "ID000000012"),
FIRST_NAME = c("First1", "First2", "First3", "First4", "First5",
"First6", "First7", "First8", "First9", "First10", "First11",
"First12"), LAST_NAME = c("Last1", "Last2", "Last3", "Last4",
"Last5", "Last6", "Last7", "Last8", "Last9", "Last10", "Last11",
"Last12"), GENDER = c("F", "M", "M", "F", "F", "F", "M",
"F", "F", "M", "F", "F"), DOB = c("1/1/1901", "1/2/1901",
"1/3/1901", "1/4/1901", "1/5/1901", "1/6/1901", "1/7/1901",
"1/8/1901", "1/9/1901", "1/10/1901", "1/11/1901", "1/12/1901"
), ADDRESS1 = c("123 Testing Way", "456 Testing Way", "789 Testing Way",
"1122 Testing Way", "1455 Testing Way", "1788 Testing Way",
"2121 Testing Way", "2454 Testing Way", "2787 Testing Way",
"3120 Testing Way", "3453 Testing Way", "3786 Testing Way"
), ADDRESS2 = c(NA, NA, "APT 302", "APT 225", NA, NA, NA,
NA, NA, NA, NA, NA), CITY = c("CLARKSVILLE", "MONROE", "LOS ANGELES",
"TULSA", "RED BUD", "MISSION VIEJO", "MILLCREEK", "COLUMBIA",
"HOUSTON", "APOPKA", "SEALY", "SEWARD"), ST = c("IN", "MI",
"CA", "OK", "IL", "CA", "UT", "MO", "TX", "FL", "TX", "NE"
), ZIP = c(47129L, 48162L, 90028L, 74136L, 62278L, 92692L,
84124L, 65201L, 77057L, 32703L, 77474L, 68434L), EMAIL_ADDRESS = c("Test-mail@test.com",
NA, NA, NA, "Test-mail@test.com", NA, NA, NA, NA, "Test-mail@test.com",
NA, NA), PHONE1 = 1:12, ARG_CLIENT_NUM = c(6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), FORMULARYID = c(19430L,
19431L, 19430L, 19430L, 19430L, 19431L, 19430L, 19431L, 19430L,
19430L, 19430L, 19430L), CAMPAIGN_TYPE = c("PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD",
"PDP_Lean MAPD", "PDP_Lean MAPD", "PDP_Lean MAPD"), LIS_INDICATOR = c("N",
"N", "Y", "Y", "Y", "N", "Y", "N", "N", "Y", "Y", "Y"), MONTH = c("February / DIALER",
"January / DIALER", "February / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER", "January / DIALER",
"January / DIALER", "January / DIALER"), Calltype1 = c("Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings"), Calltype2 = c("Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings"), Calltype3 = c("Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings", "Cost Savings",
"Cost Savings", "Cost Savings", "Cost Savings"), TARGET_DOC_1 = c("A000000000025001",
"A000000000026001", "A000000000027001", "A000000000028001",
"A000000000029001", "A000000000030001", "A000000000031001",
"A000000000032001", "A000000000033001", "A000000000034001",
"A000000000035001", "A000000000036001"), TARGET_DOC_2 = c("A000000000013001",
"A000000000014001", "A000000000015001", "A000000000016001",
"A000000000017001", "A000000000018001", "A000000000019001",
"A000000000020001", "A000000000021001", "A000000000022001",
"A000000000023001", "A000000000024001"), TARGET_DOC_3 = c("A000000000001001",
"A000000000002001", "A000000000003001", "A000000000004001",
"A000000000005001", "A000000000006001", "A000000000007001",
"A000000000008001", "A000000000009001", "A000000000010001",
"A000000000011001", "A000000000012001"), TARGET_DRUG_ID_1 = c(57237014401,
65862058301, 57237014301, 68180075701, 65862058401, 55111033901,
65862058201, 65862058405, 65862058601, 68180075801, 65862058601,
68180076001), TARGET_DRUG_ID_2 = c(57237014401, 65862058301,
57237014301, 68180075701, 65862058401, 55111033901, 65862058201,
65862058405, 65862058601, 68180075801, 65862058601, 68180076001
), TARGET_DRUG_ID_3 = c(65862058701, 65862058701, 55111034001,
65862058601, 65862058701, 55111034001, 65862058401, 65862058405,
68180047301, 68180075801, 65862058701, 65862058301), TARGET_NAME_1 = c("Target11",
"Target12", "Target13", "Target14", "Target15", "Target16",
"Target17", "Target18", "Target19", "Target20", "Target21",
"Target22"), TARGET_NAME_2 = c("Target21", "Target22", "Target23",
"Target24", "Target25", "Target26", "Target27", "Target28",
"Target29", "Target30", "Target31", "Target32"), TARGET_NAME_3 = c("Target31",
"Target32", "Target33", "Target34", "Target35", "Target36",
"Target37", "Target38", "Target39", "Target40", "Target41",
"Target42")), class = "data.frame", row.names = c(NA, -12L
))
Я смотрел на подобные вопросы, такие как этот:
Свернуть дублированные строки
Свернуть дублированные строки с разными значениями в нескольких столбцах, используя R
Моя самая большая проблема в том, что число моих столбцов намного больше (по необходимости ... для целостности все строки должны оставаться вместе). Я очень быстро теряюсь.
РЕДАКТИРОВАТЬ 1: Добавление 'head (12)' DF (обратите внимание, что DF очень широкий и его трудно вставить для форматирования. Вот столбцы, затронутые проблемой) ... также, не уверен, почему в этом тестовые данные, некоторые из которых являются NA, а некоторые являются NA .. в моих реальных данных они являются истинными пропущенными значениями NA.
Calltype3 TARGET_DOC_1 TARGET_DOC_2 TARGET_DOC_3 TARGET_DRUG_ID_1 TARGET_DRUG_ID_2 TARGET_DRUG_ID_3
1 Cost Savings A000000000025001 <NA> <NA> 57237014401 NA NA
2 Cost Savings <NA> A000000000013001 <NA> NA 57237014401 NA
3 Cost Savings <NA> <NA> A000000000001001 NA NA 65862058701
4 Cost Savings A000000000026001 <NA> <NA> 65862058301 NA NA
5 Cost Savings <NA> A000000000014001 <NA> NA 65862058301 NA
6 Cost Savings <NA> <NA> A000000000002001 NA NA 65862058701
7 Cost Savings A000000000027001 <NA> <NA> 57237014301 NA NA
8 Cost Savings <NA> A000000000015001 <NA> NA 57237014301 NA
9 Cost Savings <NA> <NA> A000000000003001 NA NA 55111034001
10 Cost Savings A000000000028001 <NA> <NA> 68180075701 NA NA
11 Cost Savings <NA> A000000000016001 <NA> NA 68180075701 NA
12 Cost Savings <NA> <NA> A000000000004001 NA NA 65862058601
TARGET_NAME_1 TARGET_NAME_2 TARGET_NAME_3
1 Target11 <NA> <NA>
2 <NA> Target21 <NA>
3 <NA> <NA> Target31
4 Target12 <NA> <NA>
5 <NA> Target22 <NA>
6 <NA> <NA> Target32
7 Target13 <NA> <NA>
8 <NA> Target23 <NA>
9 <NA> <NA> Target33
10 Target14 <NA> <NA>
11 <NA> Target24 <NA>
12 <NA> <NA> Target34
РЕДАКТИРОВАТЬ 4: на основе предложения Humpelstielzchen я выполнил это:
Мой спред DF = spread1
spread2 <- spread1 %>%
group_by(IDCARD_MBR_ID) %>%
mutate(MONTH = MONTH[row_number()==1],
FORMULARYID = FORMULARYID[row_number()==1],
LIS_INDICATOR = LIS_INDICATOR[row_number() ==1]) %>%
group_by_at(vars(-starts_with("TARGET"))) %>%
summarise_at(vars(starts_with("TARGET")), list(. = ~.[which(!is.na(.))[1]]))
и, похоже, он работал правильно; У меня есть фрейм данных с более чем 1 миллионом начальных строк, это помогло мне сократить до ~ 600k; мне потребуется немного времени, чтобы проверить точность, но выглядит великолепно!