Замените значение строки суммой других строк при выполнении условий - PullRequest
1 голос
/ 08 октября 2019

Я пытаюсь выяснить, как суммировать строки на основе нескольких условий, но сохраняю все строки в моем фрейме данных, поэтому суммируемое значение имеют только те, которые соответствуют условию, все остальные имеют исходное значение (из столбца с именем iresult_posPPP). У меня есть данные для нескольких стран, разных лет, отделов и т. Д. Если в столбце ISO указано «ВСЕ», мне нужно, чтобы это значение было суммой всех строк в iresult_posPPP, имеющих одинаковые значения для DataYear, Division, ДалееDetails1 и ДалееDetails2.

Я прикрепил dput моего фрейма данных ниже:

   Data <- structure(list(ID.x = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Indicator = c("Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre"), IndicatorID = c(98L, 
98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 
98L, 98L, 98L, 98L, 98L, 98L, 98L), InputA = c(3762.134507, 4447.309316, 
4293.110921, 3862.76676, 4071.175351, 4323.016371, 4036.130293, 
4450.854575, 4224.807119, 4563, 4888.173223, 4473.037451, 4136.032594, 
4429.615323, 4972.768468, 4910.636192, 4947.585664, 4119.657378, 
4809.477176, 4135.606089, 4935.381334), InputAName = c("Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym"
), InputAUnit = c("#", "#", "#", "#", "#", "#", "#", "#", "#", 
"#", "#", "#", "#", "#", "#", "#", "#", "#", "#", "#", "#"), 
    OutputCode = c("GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP"), DataYear = c(2016L, 
    2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L), Country.x = c("Uzbekistan", "Vanuatu", "Venezuela, RB", 
    "Vietnam", "Virgin Islands (U.S.)", "West Bank and Gaza", 
    "Yemen, Rep.", "Zambia", "Zimbabwe", "ALL", "Afghanistan", 
    "Albania", "Algeria", "American Samoa", "Andorra", "Angola", 
    "Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia"
    ), ISO = c("UZB", "VUT", "VEN", "VNM", "VIR", "PSE", "YEM", 
    "ZMB", "ZWE", "ALL", "AFG", "ALB", "DZA", "ASM", "AND", "AGO", 
    "ATG", "ARG", "ARM", "ABW", "AUS"), Division = c("Two", "Two", 
    "Two", "Two", "Two", "Two", "Two", "Two", "Two", "One", "One", 
    "One", "One", "One", "One", "One", "One", "One", "One", "One", 
    "One"), FurtherDetails1 = c("fd1b", "fd1b", "fd1b", "fd1b", 
    "fd1b", "fd1b", "fd1b", "fd1b", "fd1b", "fd1a", "fd1a", "fd1a", 
    "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", 
    "fd1a"), FurtherDetails2 = c("fd2b", "fd2b", "fd2b", "fd2b", 
    "fd2b", "fd2b", "fd2b", "fd2b", "fd2b", "fd2a", "fd2a", "fd2a", 
    "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", 
    "fd2a"), ID.y = c(168L, 168L, 168L, 168L, 168L, 168L, 168L, 
    168L, 168L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 
    189L, 189L, 189L, 189L), Code.x = c("GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP"
    ), CoefficientYear = c(2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L), Percent = c(-2.69, 
    -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CoeffqInflation = c(-0.0269, 
    -0.0269, -0.0269, -0.0269, -0.0269, -0.0269, -0.0269, -0.0269, 
    -0.0269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Method = c("", 
    "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
    "", "", "", "", ""), ID.x.x = c(248L, 248L, 248L, 248L, 248L, 
    248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 
    248L, 248L, 248L, 248L, 248L, 248L), InputCode = c("GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP"), CoeffqFactor = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Year = c(2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L), Type = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ID.y.y = c(212L, 
    213L, 214L, 215L, 216L, 217L, 218L, 219L, 220L, 221L, 1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), Country.y = c("Uzbekistan", 
    "Vanuatu", "Venezuela, RB", "Vietnam", "Virgin Islands (U.S.)", 
    "West Bank and Gaza", "Yemen, Rep.", "Zambia", "Zimbabwe", 
    "ALL", "Afghanistan", "Albania", "Algeria", "American Samoa", 
    "Andorra", "Angola", "Antigua and Barbuda", "Argentina", 
    "Armenia", "Aruba", "Australia"), Currency = c("GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP"), CoeffqPPP = c(0.184436278, 0.184436278, 0.367527677, 
    0.184436278, 0.994525717, 0.184436278, 0.184436278, 0.184436278, 
    0.112411792, 0, 0.112411792, 0.347344672, 0.367527677, 0.367527677, 
    0.994525717, 0.264750217, 0.994525717, 0.389806873, 0.188019865, 
    0.994525717, 0.994525717), Step = c(3, 3, 3, 3, 3, 3, 3, 
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), Coeffq = c(2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717
    ), CoeffqYear = c("2017", "2017", "2017", "2017", "2017", 
    "2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017", 
    "2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017"
    ), CoeffqUnit = c("GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP"), Step2iresult = c(1.32904727554592, 
    1.57109861939866, 1.51662503362256, 1.36459757855418, 1.43822197171544, 
    1.52718971619107, 1.42584162253612, 1.57235105117809, 1.49249538546973, 
    1.61196860639412, 1.72684237959508, 1.58018758409756, 1.46113405578585, 
    1.56484787181204, 1.75672729723484, 1.73477786082112, 1.74783098133106, 
    1.45534919185534, 1.69904146852532, 1.46098338458917, 1.74351956410071
    ), iresult_pos = c(3564.46552325877, 4213.64007550875, 4067.54352800449, 
    3659.81038551155, 3857.2688325682, 4095.8776944954, 3824.06510187511, 
    4216.9990605352, 4002.82852462449, 4442.76248384841, 4759.36721667692, 
    4355.17049663636, 4027.04589975837, 4312.8925651774, 4841.73333125022, 
    4781.23827430327, 4817.21410774609, 4011.10217955023, 4682.744853048, 
    4026.63063339563, 4805.33136843763), iresult_posPPP = c(675.590128629298, 
    798.631273618818, 1536.26022396863, 693.661294511865, 3942.19818248036, 
    776.311208628047, 724.793273270513, 799.267918050159, 462.403789458181, 
    0, 535.008997612704, 1512.74526765823, 1480.05082471057, 
    1585.10738563022, 4815.22831278442, 1265.8338706505, 4790.8433144487, 
    1563.55519789396, 880.449055099529, 4004.58771777195, 4779.02562461803
    ), iresult_pos_m = c(0.00356446552325877, 0.00421364007550875, 
    0.00406754352800449, 0.00365981038551155, 0.0038572688325682, 
    0.0040958776944954, 0.00382406510187511, 0.0042169990605352, 
    0.00400282852462449, 0.00444276248384841, 0.00475936721667692, 
    0.00435517049663636, 0.00402704589975837, 0.0043128925651774, 
    0.00484173333125022, 0.00478123827430327, 0.00481721410774609, 
    0.00401110217955023, 0.004682744853048, 0.00402663063339563, 
    0.00480533136843763), ID = c(NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    StepName = c("Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia"), ImpactID = c(10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L), Allocation = c(1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Polarity = c("Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive"
    ), Waterfall = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
    "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
    "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), Int_Ext = c("External", 
    "External", "External", "External", "External", "External", 
    "External", "External", "External", "External", "External", 
    "External", "External", "External", "External", "External", 
    "External", "External", "External", "External", "External"
    ), ImpactCode = c("GDF", "GDF", "GDF", "GDF", "GDF", "GDF", 
    "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", 
    "GDF", "GDF", "GDF", "GDF", "GDF", "GDF"), ImpactName = c("Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial"
    ), Name = c("Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres"), Capital = c("Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured"), Flow = c("TBC", 
    "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", 
    "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", 
    "TBC", "TBC"), iresult = c(3564.46552325877, 4213.64007550875, 
    4067.54352800449, 3659.81038551155, 3857.2688325682, 4095.8776944954, 
    3824.06510187511, 4216.9990605352, 4002.82852462449, 4442.76248384841, 
    4759.36721667692, 4355.17049663636, 4027.04589975837, 4312.8925651774, 
    4841.73333125022, 4781.23827430327, 4817.21410774609, 4011.10217955023, 
    4682.744853048, 4026.63063339563, 4805.33136843763), iresult_m = c(0.00356446552325877, 
    0.00421364007550875, 0.00406754352800449, 0.00365981038551155, 
    0.0038572688325682, 0.0040958776944954, 0.00382406510187511, 
    0.0042169990605352, 0.00400282852462449, 0.00444276248384841, 
    0.00475936721667692, 0.00435517049663636, 0.00402704589975837, 
    0.0043128925651774, 0.00484173333125022, 0.00478123827430327, 
    0.00481721410774609, 0.00401110217955023, 0.004682744853048, 
    0.00402663063339563, 0.00480533136843763)), row.names = 1760:1780, class = "data.frame")

Это суть кода, который я сейчас пытаюсь запустить, в настоящее время думая о подходе оператора if:

Data %>% 
 mutate(iresult_pos = ifelse(ISO= 'ALL', (sum of the rows in iresult_posPPP where DataYear, Division, FurtherDetails1 and FutherDetails2 are the same), iresult_posPPP)

Есть ли аккуратный способ сделать это, или, возможно, пропущенный шаг, так как я ломал голову / разбирался с переполнением стека и не мог придумать хороший способ сделать это. Любая помощь будет принята с благодарностью!

Спасибо!

Ответы [ 2 ]

3 голосов
/ 08 октября 2019

Может быть, нам нужна группа по сумме

library(dplyr)
Data %>% 
   group_by(DataYear, Division, FurtherDetails1, FurtherDetails2) %>% 
   mutate(iresult_pos =sum(iresult_posPPP))
1 голос
/ 08 октября 2019

Еще одна опция, использующая фиктивный набор данных, чтобы показать, как он работает:

# create dummy data
data <- data.frame(replicate(10,sample(0:1,1000,rep=TRUE)))

# sum X2, X3, X4, X9 where X1 == 1, store as X11
data <- data %>% mutate(
        X11 = rowSums(select_(.,'X2','X3','X4','X9')))
...