Я хочу посчитать частоту всех парных комбинаций item
на group
.
have <- data.frame(group=c("a", "a", "a",
"b", "b",
"c",
"d", "d",
"e", "e",
"f", "f", "f"),
item=c("apple", "banana", "black cherry",
"apple", "black cherry",
"orange",
"banana", "black cherry",
"banana", "black cherry",
"apple", "banana", "black cherry"))
have
# group item
# 1 a apple
# 2 a banana
# 3 a black cherry
# 4 b apple
# 5 b black cherry
# 6 c orange
# 7 d banana
# 8 d black cherry
# 9 e banana
# 10 e black cherry
# 11 f apple
# 12 f banana
# 13 f black cherry
# almost what I want...
# cons: repeats pairs and does not include zeros
have %>%
# https://stackoverflow.com/a/38335011/841405
full_join(have, by="group") %>%
group_by(item.x, item.y) %>%
summarise(length(unique(group))) %>%
filter(item.x!=item.y) %>%
mutate(item = paste(item.x, item.y, sep=", "))
# item.x item.y `length(unique(group))` item
# 1 apple banana 2 apple, banana
# 2 apple black cherry 3 apple, black cherry
# 3 banana apple 2 banana, apple
# 4 banana black cherry 4 banana, black cherry
# 5 black cherry apple 3 black cherry, apple
# 6 black cherry banana 4 black cherry, banana
# want I really want
# item.x item.y `length(unique(group))` item
# 1 apple banana 2 apple, banana
# 2 apple black cherry 3 apple, black cherry
# 3 apple orange 0 apple, orange
# 4 banana black cherry 4 banana, black cherry
# 5 banana orange 0 banana, orange
# 6 black cherry orange 0 black cherry, orange