Я ищу текучесть кадров каждым отделом в месяц.Мои данные состоят из идентификатора сотрудника, даты найма, отдела, даты увольнения и HR_Status.
Я хочу найти оборот по месяцам по дивизионам.
Leavers = Счетчик текущего месяца, взятый из TermDate
Оборот за определенный месяц = Leavers текущего месяца / AVG (Количество строк за последний месяц и текущий месяц)
Ранее я задавал этот вопрос, и кто-то ответил на него здесь, но это было не по группе Отдела.
Поиск месяца по обороту за месяц
Воспроизвести
structure(list(TerminationDate = structure(c(16921, 16921, 12814,
13028, 15392, 15160, 15186, NA, 17135, 12788, 14491, NA, 15166,
15126, 15113, 17060, 13283, 12916, NA, 17905, 15611, 17135, 13299,
17183, 17256, 12761, 17256, 17256, 14421, 14526, 12892, 17214,
14526, 14526, 15160, 12863, 12726, 14491, 13010, 16073, 16073,
13955, 15125, 15317, NA, 15429, 15474, 12843, 15237, 12921), class = "Date"),
HireDate = structure(c(759283200, 759283200, 766281600, 773020800,
781056000, 781056000, 783216000, 786240000, 787708800, 792201600,
792547200, 790732800, 796694400, 802828800, 813715200, 764985600,
828316800, 846374400, 848188800, 848361600, 848793600, 850003200,
861580800, 867715200, 870134400, 873072000, 875664000, 875664000,
876182400, 876700800, 878342400, 878342400, 878515200, 879120000,
879724800, 881193600, 881539200, 883612800, 883612800, 883612800,
883612800, 883612800, 883612800, 883612800, 883612800, 888710400,
888710400, 890697600, 893030400, 893376000), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), HrstatusName = c("Resigned", "Resigned",
"Resigned", "Resigned", "Resigned", "Resigned", "Resigned",
"Regular", "Resigned", "Resigned", "Resigned", "Regular",
"Gross Misconduct", "Resigned", "Resigned", "Deceased", "Resigned",
"Resigned", "Regular", "Terminated", "Resigned", "Resigned",
"Resigned", "Resigned", "Resigned", "Resigned", "Resigned",
"Resigned", "Resigned", "Resigned", "Resigned", "Resigned",
"Resigned", "Resigned", "Resigned", "Resigned", "Resigned",
"Resigned", "Resigned", "Terminated", "Terminated", "Terminated",
"Retired", "Resigned", "Regular", "Resigned", "Resigned",
"Resigned", "Resigned", "Resigned"), EmployeeId = c("39bab084",
"39bab084", "5664d681", "520d0890", "20d65e2d", "eb8a8d88",
"1d30178f", "6dec15c8", "f67d57de", "76fb57f8", "3b33f6f3",
"a0a2b4e5", "9aa8d595", "dc820f73", "acace7da", "740ee9ec",
"35c81bd8", "f075debf", "5602d50b", "0d2d3f55", "5de0aee6",
"30cb76f7", "a9af8af5", "a95d601b", "32cc220d", "c476b80a",
"90772765", "90772765", "9c79745a", "ec579cf3", "f152ac4b",
"00041e9e", "b261e06b", "0efff3b5", "44db7a6c", "63d42ba4",
"38fbc1fa", "9960e29c", "48d52953", "051d8858", "051d8858",
"f44b3a8d", "3f17e928", "250c1bac", "68c4baa7", "7c3e5ee1",
"e7af1cf7", "cb4236d2", "f85f925b", "432da957"), Division = c("a60c5c5c",
"a60c5c5c", "3cc0c23b", "7e23b2d7", "3cc0c23b", "3cc0c23b",
"3cc0c23b", "eae5d36f", "c3abc225", "3cc0c23b", "7e23b2d7",
"eae5d36f", "3cc0c23b", "7e23b2d7", "eae5d36f", "a60c5c5c",
"7e23b2d7", "3cc0c23b", "3cc0c23b", "3cc0c23b", "c3abc225",
"c3abc225", "c3abc225", "5d980f59", "c3abc225", "eae5d36f",
"c3abc225", "c3abc225", "3cc0c23b", "a60c5c5c", "c3abc225",
"eae5d36f", "7e23b2d7", "a60c5c5c", "3cc0c23b", "3cc0c23b",
"3cc0c23b", "7e23b2d7", "3cc0c23b", "7e23b2d7", "7e23b2d7",
"7e23b2d7", "7e23b2d7", "3cc0c23b", "eae5d36f", "c3abc225",
"3cc0c23b", "216743cf", "3cc0c23b", "3cc0c23b")), class = c("data.table",
"data.frame"), row.names = c(NA, -50L), .internal.selfref = <pointer: 0x0000000006261ef0>)
Пробный код
library(data.table)
df_leavers <- setDT(df)[, `:=` (TermDate = as.Date(as.character(TermDate)),
HireDate = as.Date(as.character(HireDate)))]
df_presences <- copy(df_leavers)
df_leavers <- df_leavers[, TermDate := format(TermDate, "%Y-%m")][!is.na(TermDate), (Leavers = .N), , by = TermDate]
df_presences <- df_presences[, maxTerm := max(TermDate, na.rm = T)][
is.na(TermDate), TermDate := maxTerm][
, .(YearMonth = format(seq(HireDate, TermDate, by = "month"), "%Y-%m")), by = 1:nrow(df)][
, (Presences = .N), by = YearMonth]
df_final <- df_leavers[df_presences, on = .(TermDate = YearMonth)]
setnames(df_final, c("YearMonth", "Leavers", "Presences"))
df_final <- df_final[is.na(Leavers), Leavers := 0][order(YearMonth),][, previousMonth := shift(Presences)][
is.na(previousMonth), previousMonth := 0][, AvgPresences := (Presences + previousMonth) / 2][
, Turnover := round(Leavers / AvgPresences, 2)][, "previousMonth" := NULL]
Это дает мне вывод правильно, за исключением того, что не делится на деление
Желаемый выход
Date Turnover Division
2019-01 0.23 XYC
2019-01 0.02 ZYV