Извлечь номера строк в соответствие с индексом R не работает - PullRequest
2 голосов
/ 13 апреля 2020

Я пытаюсь сохранить первую строку серии, используя функцию сопоставления с R, а затем применяю ее к исходному фрейму данных, чтобы собрать только строку, в которой было выполнено первое условие. Я собираюсь выполнить вычисление и логический тест для этих дублированных значений в новых столбцах, но после получения правильного индекса совпадений и применения их к исходному фрейму данных я получаю полностью перепутанный результат, а не первое значение, которое я должен получить как содержится в соответствующей строке.

# Extract first row's value

DT1 <- InsiderList3[!duplicated(`Insider CIK`), ]

# Construct Index for Each Grouping (Insider CIK) most recent reported transaction date
index2 <- match(as.character(InsiderList3$`Insider CIK`),as.character(DT1$`Insider CIK`))
dt3 <- InsiderList3[as.numeric(index2),]

Вот пример dput:

dput(head(InsiderList3[c('Insider CIK', 'Transaction Date', 'Issuer')], 75))
structure(list(`Insider CIK` = c("0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001008134", "0001008134", 
"0001008134", "0001008134", "0001008134", "0001009891", "0001012859", 
"0001012859", "0001012859", "0001012859"), `Transaction Date` = structure(c(18358, 
18358, 18101, 18065, 18065, 18039, 17729, 17700, 17674, 17674, 
17345, 17345, 17326, 17014, 17014, 17014, 17014, 17014, 17014, 
17001, 16964, 16964, 16598, 16590, 16582, 16582, 16409, 16288, 
16288, 16245, 16245, 16217, 16161, 16072, 16052, 15967, 15880, 
15869, 15771, 15710, 15710, 15687, 15603, 15523, 15354, 15354, 
15030, 14979, 14840, 14049, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 18358, 18358, 
18358, 18261), class = "Date"), Issuer = c("TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"SANDRIDGE ENERGY INC", "SANDRIDGE ENERGY INC", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "Seventy Seven Energy Inc.", 
"Seventy Seven Energy Inc.", "Seventy Seven Energy Inc.", "Seventy Seven Energy Inc.", 
"Seventy Seven Energy Inc.", "Seventy Seven Energy Inc.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "Seventy Seven Energy Inc.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"Seventy Seven Energy Inc.", "Seventy Seven Energy Inc.", "Seventy Seven Energy Inc.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "CHESAPEAKE ENERGY CORP", "CHESAPEAKE ENERGY CORP", 
"CHESAPEAKE ENERGY CORP", "CHESAPEAKE ENERGY CORP", "TRANSATLANTIC PETROLEUM LTD.", 
"CHESAPEAKE ENERGY CORP", "CHESAPEAKE ENERGY CORP", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "CHESAPEAKE ENERGY CORP", "CHESAPEAKE ENERGY CORP", 
"CHESAPEAKE ENERGY CORP", "TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "TRANSATLANTIC PETROLEUM LTD.", 
"TRANSATLANTIC PETROLEUM LTD.", "QUEST RESOURCE CORP", "QUEST RESOURCE CORP", 
"CHESAPEAKE ENERGY CORP", "CHESAPEAKE ENERGY CORP", "CHESAPEAKE ENERGY CORP", 
"CHESAPEAKE ENERGY CORP", "CHESAPEAKE ENERGY CORP", "TRANSATLANTIC PETROLEUM LTD.", 
"CHESAPEAKE ENERGY CORP", "Seventy Seven Energy Inc.", "CHESAPEAKE OILFIELD OPERATING LLC", 
"TRANSATLANTIC PETROLEUM LTD.", "QUEST RESOURCE CORP", "CHESAPEAKE ENERGY CORP", 
"CHESAPEAKE ENERGY CORP", "CVR ENERGY INC", "CHESAPEAKE ENERGY CORP", 
"SANDRIDGE ENERGY INC", "TRANSATLANTIC PETROLEUM LTD.", "Seventy Seven Energy Inc.", 
"CHESAPEAKE ENERGY CORP", NA, "NATIONAL HEALTHCARE CORP", "NATIONAL HEALTHCARE CORP", 
"NATIONAL HEALTHCARE CORP", "NATIONAL HEALTHCARE CORP")), row.names = c(NA, 
75L), class = "data.frame")

Помощь очень ценится.

Ответы [ 2 ]

1 голос
/ 13 апреля 2020

Мы можем использовать first, чтобы получить первое значение каждого столбца после группировки по Insider CIK

InsiderList3 %>% 
       group_by(`Insider CIK`) %>%
        mutate_all(first)
# A tibble: 75 x 3
# Groups:   Insider CIK [3]
#   `Insider CIK` `Transaction Date` Issuer                      
#   <chr>         <date>             <chr>                       
# 1 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 2 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 3 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 4 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 5 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 6 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 7 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 8 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# 9 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
#10 0001008134    2020-04-06         TRANSATLANTIC PETROLEUM LTD.
# … with 65 more rows

Или с data.table

library(data.table)
setDT(InsiderList3)[, .SD[rep(1, .N)], by = .(`Insider CIK`)]
# Insider CIK Transaction Date                       Issuer
# 1:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 2:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 3:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 4:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 5:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 6:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 7:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 8:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# 9:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#10:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#11:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#12:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#13:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#14:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#15:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#16:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#17:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#18:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#19:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#20:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#21:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#22:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#23:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#24:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#25:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#26:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#27:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#28:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#29:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#30:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#31:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#32:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#33:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#34:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#35:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#36:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#37:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#38:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#39:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#40:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#41:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#42:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#43:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#44:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#45:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#46:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#47:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#48:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#49:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#50:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#51:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#52:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#53:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#54:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#55:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#56:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#57:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#58:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#59:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#60:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#61:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#62:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#63:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#64:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#65:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#66:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#67:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#68:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#69:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#70:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#71:  0001009891             <NA>                         <NA>
#72:  0001012859       2020-04-06     NATIONAL HEALTHCARE CORP
#73:  0001012859       2020-04-06     NATIONAL HEALTHCARE CORP
#74:  0001012859       2020-04-06     NATIONAL HEALTHCARE CORP
#75:  0001012859       2020-04-06     NATIONAL HEALTHCARE CORP

Если Insider CIK уже не упорядочен, тогда мы создадим rowid перед тем, как получить первую строку по группе

InsiderList3$rid <- seq_len(nrow(InsiderList3))
setDT(InsiderList3)[,  c(list(rid = rid), .SD[rep(1L, .N)]),
   by = .(`Insider CIK`), .SDcols = setdiff(names(InsiderList3), 
      'rid')][order(rid)][, rid := NULL][]

С кодом OP это будет обратный match и его копия элементы

idx <- match(as.character(DT1$`Insider CIK`), 
              as.character(InsiderList3$`Insider CIK`) )
out <- InsiderList3[rep(idx, table(InsiderList3$`Insider CIK`)),]
row.names(out) <- NULL
out
#   Insider CIK Transaction Date                       Issuer
#1   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#2   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#3   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#4   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#5   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#6   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#7   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
#8   0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# ...
1 голос
/ 13 апреля 2020

Немного изменяя данные, чтобы показать идентификатор строки, сохраняется при создании второго набора данных.

Данные:

InsiderList3$`Insider CIK`[75] <- "0001008134"

Код:

library(data.table)
setDT(InsiderList3)
df2 <- InsiderList3[rowid(`Insider CIK`) == 1, ]
InsiderList4 <- copy(InsiderList3)
InsiderList4[df2, on = c("Insider CIK"), `:=` (`Transaction Date` = `i.Transaction Date`,
                                               Issuer = i.Issuer)]

Вывод:

InsiderList4
#   Insider CIK Transaction Date                       Issuer
# 1:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 2:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 3:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 4:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 5:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 6:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 7:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 8:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 9:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 10:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 11:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 12:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 13:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 14:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 15:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 16:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 17:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 18:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 19:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 20:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 21:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 22:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 23:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 24:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 25:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 26:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 27:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 28:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 29:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 30:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 31:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 32:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 33:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 34:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 35:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 36:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 37:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 38:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 39:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 40:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 41:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 42:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 43:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 44:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 45:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 46:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 47:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 48:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 49:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 50:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 51:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 52:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 53:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 54:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 55:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 56:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 57:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 58:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 59:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 60:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 61:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 62:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 63:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 64:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 65:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 66:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 67:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 68:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 69:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 70:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 71:  0001008134       2019-12-31 TRANSATLANTIC PETROLEUM LTD.
# 72:  0001009891             <NA>                           NA
# 73:  0001012859       2020-04-06     NATIONAL HEALTHCARE CORP
# 74:  0001012859       2020-04-06     NATIONAL HEALTHCARE CORP
# 75:  0001008134       2020-04-06 TRANSATLANTIC PETROLEUM LTD.
# Insider CIK Transaction Date                       Issuer
...