Желание суммировать несколько столбцов в R, но не суммировать правильно - PullRequest
0 голосов
/ 29 июня 2018

Я хочу обобщить мои данные в конечном итоге за квартал. что я могу сделать с помощью следующего кода, но поскольку INV_ITEM_ID в приведенном ниже коде не является числовым типом, он не позволяет мне добавить его в список.

temptable[ ,lapply(.SD, sum), by = quarter(INVOICE_DT), 
    .SDcols = c("INV_ITEM_ID","Ext Sale", "Ext Total Cost", "CE100", "CE110","CE120","QTY_SOLD","PACKSLIP_WHSL")]

При запуске скрипта я получаю следующую ошибку

Error in gsum(INV_ITEM_ID) : 
  Type 'character' not supported by GForce sum (gsum). Either add the prefix base::sum(.) or turn off GForce optimization using options(datatable.optimize=1)

Ниже я предоставил подмножество моих данных (первые 30 строк).

library(data.table)
temptable = setDT(structure(list(INVOICE_DT = c("2016-01-29", "2016-01-29", "2016-01-29", 
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", 
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", 
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", 
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", 
"2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", "2016-01-29", 
"2016-01-29", "2016-01-29"), BRANCH_CODE = c("AK001", "AK001", 
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001", 
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001", 
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001", 
"AK001", "AK001", "AK001", "AK001", "AK001", "AK001", "AK001"
), INVOICE_NO = c("AK0015522", "AK0015529", "AK0015515", "AK0015529", 
"AK0015514", "AK0015513", "AK0015519", "AK0015519", "AK0015520", 
"AK0015520", "AK0015520", "AK0015522", "AK0015520", "AK0015520", 
"AK0015520", "AK0015518", "AK0015525", "AK0015520", "AK0015519", 
"AK0015520", "AK0015525", "AK0015518", "AK0015529", "AK0015519", 
"AK0015525", "AK0015519", "AK0015519", "AK0015522", "AK0015522", 
"AK0015529"), INV_SEQ_NO = c(6L, 6L, 3L, 3L, 3L, 3L, 148L, 149L, 
91L, 45L, 162L, 34L, 125L, 39L, 159L, 56L, 64L, 127L, 160L, 93L, 
45L, 34L, 5L, 138L, 46L, 38L, 161L, 35L, 28L, 4L), INV_ITEM_ID = c("11239190", 
"11239190", "55692", "400050-131280", "0700535", "0800758", "69970", 
"0445050", "0819092", "0115160", "0115160", "0703316", "55692", 
"0115157", "0115157", "0115157", "440704-131280", "55618", "0703318", 
"0205428", "0702119", "0115157", "465278", "465278", "0702121", 
"15166", "0703315", "0703315", "0715718", "0243945"), ITEM_DESCR = c("Au-Ve-Co# 20898", 
"Au-Ve-Co# 20898", "MM HSS T TAP 16X1.5", "3/16\"\"\"\"H05703-100R Hs", 
"18/2 BLK SOOW", "3x1x1/4 180G FW", "BF SPEC SER HV1-6C", "1/4MP External Seat", 
"1-1/2xNHConditonDisc", "HCS 7/16-14x1 3/4 YZ8", "HCS 7/16-14x1 3/4 YZ8", 
"25AMP MINI AUTO FUSE", "MM HSS T TAP 16X1.5", "HCS 7/16-14x1.25 YZ8", 
"HCS 7/16-14x1.25 YZ8", "HCS 7/16-14x1.25 YZ8", "BPOLY MELBW 1/4TX1/8", 
"MM HSS P TAP 5 X 0.8", "MAXI-BLADE 20 AMP", "1-1/2\"\"\"\" T3 VF SURF", 
"16ga Red Wire/PPF", "HCS 7/16-14x1.25 YZ8", "80I 45 ELBOW 1/4 BLK", 
"80I 45 ELBOW 1/4 BLK", "16ga Blk Wire/PPF", "HCS 7/16-14x3.25 YZ8", 
"20AMP MINI AUTO FUSE", "20AMP MINI AUTO FUSE", "3/0-3/8\"\"\"\"HvyWalCprLug", 
"97053 Pink Marker"), STD_ITEM = c("Y", "Y", "Y", "Y", "Y", "Y", 
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), PRIVATE_LABEL = c("N", 
"N", "Y", "N", "N", "N", "N", "N", "N", "N", "N", "N", "Y", "N", 
"N", "N", "N", "Y", "N", "N", "N", "N", "Y", "Y", "N", "N", "N", 
"N", "N", "N"), CATEGORY_PATH1 = c("08.5-Fleet & Automotive", 
"08.5-Fleet & Automotive", "07-Cutting Tools & Metalworking", 
"08-Hydraulics & Pneumatics", "09-Electrical", "05-Abrasives", 
"08-Hydraulics & Pneumatics", "07.3-Plumbing", "05-Abrasives", 
"01-Fasteners", "01-Fasteners", "08.5-Fleet & Automotive", "07-Cutting Tools & Metalworking", 
"01-Fasteners", "01-Fasteners", "01-Fasteners", "08-Hydraulics & Pneumatics", 
"07-Cutting Tools & Metalworking", "08.5-Fleet & Automotive", 
"05-Abrasives", "08.5-Fleet & Automotive", "01-Fasteners", "07.3-Plumbing", 
"07.3-Plumbing", "08.5-Fleet & Automotive", "01-Fasteners", "08.5-Fleet & Automotive", 
"08.5-Fleet & Automotive", "08.5-Fleet & Automotive", "11-Chemicals & Paints"
), CATEGORY_PATH2 = c("03-Batteries & Accessories", "03-Batteries & Accessories", 
"04-Threading and Tapping", "02-Hose and Hose Reels", "05-Electrical Wire and Accessories", 
"03-Coated and Non-Woven Abrasives", "07-Valves & Accessories", 
"04-Valves & Valve Accessories", "03-Coated and Non-Woven Abrasives", 
"01-Bolts", "01-Bolts", "02-Fleet Electrical Products", "04-Threading and Tapping", 
"01-Bolts", "01-Bolts", "01-Bolts", "01-Fittings", "04-Threading and Tapping", 
"02-Fleet Electrical Products", "03-Coated and Non-Woven Abrasives", 
"02-Fleet Electrical Products", "01-Bolts", "01-Pipe Fittings", 
"01-Pipe Fittings", "02-Fleet Electrical Products", "01-Bolts", 
"02-Fleet Electrical Products", "02-Fleet Electrical Products", 
"02-Fleet Electrical Products", "11-Paints, Marking, and Accessories"
), CUST_ID = c("AK0010001", "AK0010001", "AK0010001", "AK0010001", 
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001", 
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001", 
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001", 
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001", 
"AK0010001", "AK0010001", "AK0010001", "AK0010001", "AK0010001", 
"AK0010001"), CUSTOMER_TIER = c("Tier 3", "Tier 3", "Tier 3", 
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", 
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", 
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", 
"Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3", "Tier 3"), 
    IS_VENDING = c("No", "No", "No", "No", "No", "No", "No", 
    "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
    "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
    "No", "No", "No"), SALE_PRICE = c(10.53, 10.53, 23.0175, 
    2.0251, 0.4755, 6.4725, 15.8235, 1.468, 1.0275, 0.3425, 0.3425, 
    0.3726, 23.0175, 0.2513, 0.2513, 0.2513, 2.7412, 5.7675, 
    2.226, 1.5579, 0.1418, 0.2513, 12.0835, 12.0835, 0.1297, 
    0.625, 0.3726, 0.3726, 3.514, 4.1112), TOTAL_COST = c(6.877449, 
    6.877449, 9.399562, 0.924873, 0.245128, 2.953287, 6.227787, 
    0.703494, 0.52035, 0.083454, 0.083454, 0.134285, 9.399562, 
    0.062686, 0.062686, 0.062686, 1.3493, 2.386274, 0.77503, 
    0.524996, 0.051595, 0.062686, 4.47782, 4.47782, 0.051628, 
    0.164998, 0.125893, 0.125893, 2.183983, 1.469989), POS_COST = c(6.877449, 
    6.877449, 9.399562, 0.924873, 0.245128, 2.953287, 6.227787, 
    0.703494, 0.52035, 0.083454, 0.083454, 0.134285, 9.399562, 
    0.062686, 0.062686, 0.062686, 1.3493, 2.386274, 0.77503, 
    0.524996, 0.051595, 0.062686, 4.47782, 4.47782, 0.051628, 
    0.164998, 0.125893, 0.125893, 2.183983, 1.469989), CE100 = c(6.55, 
    6.55, 9.25, 0.95, 0.23785, 2.81, 5.93127, 1.17, 0.508, 0.074946, 
    0.074946, 0.12, 9.25, 0.058626, 0.058626, 0.058626, 1.31, 
    2.35, 0.61, 0.5, 0.05172, 0.058626, 4.2844, 4.2844, 0.05172, 
    0.12255, 0.12, 0.12, 2.08, 1.43), CE110 = c(NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), CE120 = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), CE200 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CORP_PRICE = c(NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), QTY_SOLD = c(15L, 
    10L, 6L, 50L, 250L, 10L, 10L, 20L, 50L, 25L, 25L, 50L, 6L, 
    25L, 25L, 25L, 10L, 6L, 10L, 30L, 100L, 25L, 15L, 10L, 100L, 
    20L, 50L, 50L, 4L, 36L), PACKSLIP_WHSL = c(NA, NA, 30.69, 
    2.63, 0.7315, 8.63, 20.55, 1.96, 1.37, 0.685, 0.685, 0.5323, 
    30.69, 0.5025, 0.5025, 0.5025, 3.56, 7.69, 3.18, 1.82, 0.2145, 
    0.5025, 18.59, 18.59, 0.1961, 1.25, 0.5323, 0.5323, 5.02, 
    5.71), PRICING_GROUP = c("PGB", "PGB", "PGB", "PGB", "PGB", 
    "PGB", "PGG", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", 
    "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", 
    "PGB", "PGB", "PGB", "PGB", "PGB", "PGB", "PGB"), PGG_MIN_PRICE = c(257.9043375, 
    171.936225, 184.14, 131.5, 175.56, 86.3, 154.125, 39.2, 68.5, 
    15.07, 15.07, 26.615, 184.14, 11.055, 11.055, 11.055, 35.6, 
    46.14, 31.8, 54.6, 21.45, 11.055, 253.7535, 185.9, 19.61, 
    22, 26.615, 26.615, 20.08, 205.56), PGY_MIN_PRICE = c(206.32347, 
    137.54898, 171.2502, 128.87, 170.07375, 80.259, 147.96, 36.456, 
    63.705, 13.3575, 13.3575, 24.75195, 171.2502, 9.79875, 9.79875, 
    9.79875, 33.108, 45.2172, 29.574, 50.778, 19.9485, 9.79875, 
    245.388, 163.592, 18.2373, 19.5, 24.75195, 24.75195, 18.6744, 
    191.1708), PGR_MIN_PRICE = c(171.936225, 114.62415, 162.0432, 
    124.925, 166.41625, 75.944, 141.795, 35.672, 60.28, 9.9325, 
    9.9325, 24.21965, 162.0432, 7.28625, 7.28625, 7.28625, 32.396, 
    43.833, 28.938, 48.048, 19.5195, 7.28625, 239.811, 159.874, 
    17.8451, 14.5, 24.21965, 24.21965, 18.2728, 180.8928), `Ext Sale` = c(157.95, 
    105.3, 138.105, 101.255, 118.875, 64.725, 158.235, 29.36, 
    51.375, 8.5625, 8.5625, 18.63, 138.105, 6.2825, 6.2825, 6.2825, 
    27.412, 34.605, 22.26, 46.737, 14.18, 6.2825, 181.2525, 120.835, 
    12.97, 12.5, 18.63, 18.63, 14.056, 148.0032), `Ext Total Cost` = c(103.161735, 
    68.77449, 56.397372, 46.24365, 61.282, 29.53287, 62.27787, 
    14.06988, 26.0175, 2.08635, 2.08635, 6.71425, 56.397372, 
    1.56715, 1.56715, 1.56715, 13.493, 14.317644, 7.7503, 15.74988, 
    5.1595, 1.56715, 67.1673, 44.7782, 5.1628, 3.29996, 6.29465, 
    6.29465, 8.735932, 52.919604), cust_part = c("AK001000111239190", 
    "AK001000111239190", "AK001000155692", "AK0010001400050-131280", 
    "AK00100010700535", "AK00100010800758", "AK001000169970", 
    "AK00100010445050", "AK00100010819092", "AK00100010115160", 
    "AK00100010115160", "AK00100010703316", "AK001000155692", 
    "AK00100010115157", "AK00100010115157", "AK00100010115157", 
    "AK0010001440704-131280", "AK001000155618", "AK00100010703318", 
    "AK00100010205428", "AK00100010702119", "AK00100010115157", 
    "AK0010001465278", "AK0010001465278", "AK00100010702121", 
    "AK001000115166", "AK00100010703315", "AK00100010703315", 
    "AK00100010715718", "AK00100010243945")), .Names = c("INVOICE_DT", 
"BRANCH_CODE", "INVOICE_NO", "INV_SEQ_NO", "INV_ITEM_ID", "ITEM_DESCR", 
"STD_ITEM", "PRIVATE_LABEL", "CATEGORY_PATH1", "CATEGORY_PATH2", 
"CUST_ID", "CUSTOMER_TIER", "IS_VENDING", "SALE_PRICE", "TOTAL_COST", 
"POS_COST", "CE100", "CE110", "CE120", "CE200", "CORP_PRICE", 
"QTY_SOLD", "PACKSLIP_WHSL", "PRICING_GROUP", "PGG_MIN_PRICE", 
"PGY_MIN_PRICE", "PGR_MIN_PRICE", "Ext Sale", "Ext Total Cost", 
"cust_part"), row.names = c(NA, 
30L), class = c("data.table", "data.frame")))

Я также пытался запустить скрипт по INV_ITEM_ID, но скрипт не возвращает только отдельные значения.

Мне не удалось подвести итог по INV_ITEM_ID.

1 Ответ

0 голосов
/ 03 июля 2018

Я бы посоветовал вам сначала создать еще один столбец в ваших данных, чтобы представить четверть INVOICE_DT:

temptable[,INVOICE_Q := quarter(INVOICE_DT)]

Затем выполните операцию суммирования с помощью INVOICE_Q и INV_ITEM_ID:

temptable[,lapply(X = .SD,FUN = sum), by = c("INV_ITEM_ID","INVOICE_Q"), 
           .SDcols = c("Ext Sale", "Ext Total Cost", "CE100", "CE110","CE120","QTY_SOLD","PACKSLIP_WHSL")]

Затем вы можете удалить созданный столбец INVOICE_Q, если требуется:

temptable[,INVOICE_Q := NULL]

Надеюсь, это поможет!

...