Сгруппируйте по итоговому показателю поиска, не значению NA и конкретному значению, используя суммирование в R - PullRequest
2 голосов
/ 05 апреля 2019

У меня есть этот образец набора данных (df)

structure(list(from = c("(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452"), to = c("(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542"), extension = c("9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls"), forwarded_to = c(NA, "(323) 457-2039", 
"(310) 598-1753", "(818) 900-0706", "(818) 539-7811", "(213) 344-4965", 
"(213) 458-7662", "(818) 208-3012", "(818) 293-0175", "(818) 284-4238", 
"(818) 206-9506", "(310) 299-7340", "(310) 405-0875", "(213) 260-9113", 
"(213) 805-5208", "(818) 887-3058", "(424) 271-2141", "(213) 218-6579", 
"(818) 638-9466", "(213) 784-7164", "(323) 457-2038", "(213) 805-6959", 
"(228) 285-7898", "(213) 341-1055", "(213) 568-0979", "(213) 344-4905", 
"(818) 459-3811", NA, "(323) 457-2039", "(310) 598-1753", "(818) 900-0706", 
"(818) 539-7811", "(213) 344-4965", "(213) 458-7662", "(818) 208-3012", 
"(818) 293-0175", "(818) 284-4238", "(818) 206-9506", "(310) 299-7340", 
"(310) 405-0875", "(213) 260-9113", "(213) 805-5208", "(818) 887-3058", 
"(424) 271-2141", "(213) 218-6579", "(818) 638-9466", "(213) 784-7164", 
"(323) 457-2038", "(213) 805-6959", "(228) 285-7898", "(213) 341-1055", 
"(213) 568-0979", "(213) 344-4905", "(818) 459-3811"), date = c("Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018"), time = structure(c(55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55500, 55500, 
55500, 55500, 55500, 55500, 55500, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55380, 55380, 
55380, 55380, 55380, 55380, 55380), class = c("hms", "difftime"
), units = "secs"), action = c("Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe"), action_result = c("Accepted", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "IP Phone Offline", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "Stopped", 
"Stopped", "IP Phone Offline", "Stopped", "Stopped", "Call connected", 
"Stopped", "Accepted", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"IP Phone Offline", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "Stopped", "Stopped", "IP Phone Offline", "Call connected", 
"Stopped", "Stopped", "Stopped"), result_description = c("The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
NA, NA, "The call connected to and was accepted by this number.", 
NA, "The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
"The call connected to and was accepted by this number.", NA, 
NA, NA), duration = structure(c(297, 52, 52, 51, 51, 51, 51, 
51, 51, 0, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 9, 9, 0, 9, 
9, 236, 9, 71, 52, 52, 52, 51, 51, 51, 51, 51, 0, 51, 51, 51, 
51, 51, 51, 51, 51, 51, 51, 7, 7, 0, 13, 7, 7, 7), class = c("hms", 
"difftime"), units = "secs"), ID = c(19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L), CallConnected = c(FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), who_answered = c("NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "(213) 344-4905", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "(213) 341-1055", "NA", 
"NA", "NA")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-54L))

Это показывает, что данные вызова телефонных номеров, которые сделали вызов и получили вызов. В полном наборе данных гораздо больше пар чисел, но для краткости я опубликовал только одну пару

Я бы хотел найти число TRUE в CallConnected в каждой паре чисел, первый номер телефона внутри who_answered и значение продолжительности, соответствующее Call Connected. Я думал об использовании group_by и summarise, но я не могу найти способ.

Желаемый результат будет выглядеть так:

from          | to           | CallConnected | WhoAnswered  | Duration
----------------------------------------------------------------------
(122) 212-3452|(700) 890-6542| 2             |(213) 344-4905| 00:03:56

Ответы [ 4 ]

2 голосов
/ 05 апреля 2019

Используя dplyr, мы можем посчитать число CallConnected, взяв sum из TRUE значений, найти первое значение не-NA "who_answered столбца и первое TRUE значение из duration.

library(dplyr)

df %>%
  group_by(from, to) %>%
  summarise(count_call = sum(CallConnected), 
            who_answered = who_answered[which.max(who_answered != "NA")],
            Duration = duration[which.max(CallConnected)])


#  from           to             count_call who_answered   Duration
#  <chr>          <chr>               <int> <chr>          <time>  
#1 (122) 212-3452 (700) 890-6542          2 (213) 344-4905 03'56" 
2 голосов
/ 05 апреля 2019

Я думаю, вы были довольно близки к решению.Вот моя версия:

df %>%
  group_by(from, to) %>%
  filter(CallConnected) %>%
  summarise(CallConnected = n(),
            WhoAnswered = first(who_answered),
            Duration = first(duration))
1 голос
/ 05 апреля 2019

Может как то так?

output <- df %>%
  filter(CallConnected == TRUE) %>%
  group_by(from, to) %>%
  summarise(duration = sum(duration), firstWhoAnswered = first(who_answered), CallConnected = n())
0 голосов
/ 05 апреля 2019

Вы также можете сделать это-

> library(data.table)
> setDT(dt)[CallConnected==TRUE,.(from,to,CallConnected,who_answered,duration)][,head(.SD,1),by=.(from,to,CallConnected)]

             from             to CallConnected   who_answered duration
1: (122) 212-3452 (700) 890-6542          TRUE (213) 344-4905 00:03:56
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...