У меня длинный стол с повторяющимися комбинациями area
и cluster
.
counts <- tibble::tribble(
~age, ~area, ~cluster, ~norm.to.area,
"gw_25", "cingulate", "cluster_1", 0.03,
"gw_20", "cingulate", "cluster_1", 0.03,
"gw_18", "hippocampus", "cluster_1", 0.02,
"gw_25", "insula", "cluster_1", 0.01,
"gw_20", "motor", "cluster_1", 0.01,
"gw_22", "motor", "cluster_1", 0.01,
"gw_25", "motor", "cluster_1", 0.01,
"gw_14", "motor", "cluster_1", 0.01,
"gw_18", "motor", "cluster_1", 0.01,
"gw_19", "motor", "cluster_1", 0.01,
"gw_17", "motor", "cluster_1", 0.01,
"gw_20", "occipital", "cluster_1", 0.01,
"gw_17", "occipital", "cluster_1", 0.01,
"gw_18", "occipital", "cluster_1", 0.01,
"gw_19", "occipital", "cluster_1", 0.01,
"gw_22", "occipital", "cluster_1", 0.01,
"gw_14", "occipital", "cluster_1", 0.01,
"gw_22", "parietal", "cluster_1", 0,
"gw_25", "parietal", "cluster_1", 0,
"gw_17", "parietal", "cluster_1", 0,
"gw_19", "parietal", "cluster_1", 0,
"gw_20", "parietal", "cluster_1", 0,
"gw_20", "PFC", "cluster_1", 0.01,
"gw_22", "PFC", "cluster_1", 0.01,
"gw_25", "PFC", "cluster_1", 0.01
)
Я хочу создать новую переменную sum.norm.to.area
, которая представляет собой сумму norm.to.area
для каждого cluster
, используя значение norm.to.area
только ОДИН РАЗ для каждой комбинации area / subcluster.merge
.
Я пытался group_by
cluster
, но это суммирует значения столько раз, сколько появляется данная комбинация.
counts %>% group_by(cluster) %>% mutate(sum.norm.to.area = sum(norm.to.area)
Спасибо за ваш совет.
ОБНОВЛЕНИЕ 1:
Пробовал с использованием суммирования, как предложено ниже, но происходит то же самое (за исключением, конечно, без добавления в качестве нового столбца):
> counts %>% group_by(subcluster.merge, area) %>% summarize(sum(norm.to.area))
tibble::tribble(
~cluster . , ~area, ~sum.norm.to.area.,
"cluster_1", "PFC", 0.06,
"cluster_1", "somatosensory", 0.05,
"cluster_1", "motor", 0.07,
"cluster_1", "parietal", 0,
"cluster_1", "temporal", 0.03,
"cluster_1", "occipital", 0.06,
"cluster_1", "hippocampus", 0.02,
"cluster_1", "insula", 0.01,
"cluster_1", "cingulate", 0.06,
"cluster_10-34", "PFC", 0.42,
"cluster_10-34", "somatosensory", 0.35,
"cluster_10-34", "motor", 0.48,
"cluster_10-34", "parietal", 0.36,
"cluster_10-34", "temporal", 0.28,
"cluster_10-34", "occipital", 0.4,
"cluster_10-34", "hippocampus", 0.12,
"cluster_10-34", "insula", 0,
"cluster_10-34", "cingulate", 0,
"cluster_11", "PFC", 0.18,
"cluster_11", "somatosensory", 0.15,
"cluster_11", "motor", 0.14,
"cluster_11", "parietal", 0.12,
"cluster_11", "temporal", 0.04,
"cluster_11", "occipital", 0.18,
"cluster_11", "hippocampus", 0.02
)
ОБНОВЛЕНИЕ 2
Это вывод, который я хочу, но способ, которым я прихожу к нему, слишком запутан. Я хотел бы найти более простой способ использовать mutate и не использовать join
.
> tmp <- counts %>% distinct(area, cluster, .keep_all = TRUE) %>%
add_count(cluster, wt = norm.to.area, name = "sum.norm.to.area")
counts %>% left_join(tmp, by = c("cluster", "area"))
Желаемый вывод:
sum.norm.to.area
является результатом добавления norm.to.area
(только один раз) для всех уникальных комбинаций area
и cluster
:
tibble::tribble(
~age, ~area, ~cluster, ~norm.to.area, ~sum.norm.to.area,
"gw_25", "cingulate", "cluster_1", 0.03, 0.11,
"gw_20", "cingulate", "cluster_1", 0.03, 0.11,
"gw_18", "hippocampus", "cluster_1", 0.02, 0.11,
"gw_25", "insula", "cluster_1", 0.01, 0.11,
"gw_20", "motor", "cluster_1", 0.01, 0.11,
"gw_22", "motor", "cluster_1", 0.01, 0.11,
"gw_25", "motor", "cluster_1", 0.01, 0.11,
"gw_14", "motor", "cluster_1", 0.01, 0.11,
"gw_18", "motor", "cluster_1", 0.01, 0.11,
"gw_19", "motor", "cluster_1", 0.01, 0.11,
"gw_17", "motor", "cluster_1", 0.01, 0.11,
"gw_20", "occipital", "cluster_1", 0.01, 0.11,
"gw_17", "occipital", "cluster_1", 0.01, 0.11,
"gw_18", "occipital", "cluster_1", 0.01, 0.11,
"gw_19", "occipital", "cluster_1", 0.01, 0.11,
"gw_22", "occipital", "cluster_1", 0.01, 0.11,
"gw_14", "occipital", "cluster_1", 0.01, 0.11,
"gw_22", "parietal", "cluster_1", 0, 0.11,
"gw_25", "parietal", "cluster_1", 0, 0.11,
"gw_17", "parietal", "cluster_1", 0, 0.11,
"gw_19", "parietal", "cluster_1", 0, 0.11,
"gw_20", "parietal", "cluster_1", 0, 0.11,
"gw_20", "PFC", "cluster_1", 0.01, 0.11,
"gw_22", "PFC", "cluster_1", 0.01, 0.11,
"gw_25", "PFC", "cluster_1", 0.01, 0.11,
"gw_18", "PFC", "cluster_1", 0.01, 0.11,
"gw_19", "PFC", "cluster_1", 0.01, 0.11,
"gw_17", "PFC", "cluster_1", 0.01, 0.11,
"gw_22", "somatosensory", "cluster_1", 0.01, 0.11,
"gw_20", "somatosensory", "cluster_1", 0.01, 0.11,
"gw_25", "somatosensory", "cluster_1", 0.01, 0.11,
"gw_18", "somatosensory", "cluster_1", 0.01, 0.11,
"gw_19", "somatosensory", "cluster_1", 0.01, 0.11,
"gw_25", "temporal", "cluster_1", 0.01, 0.11,
"gw_19", "temporal", "cluster_1", 0.01, 0.11,
"gw_20", "temporal", "cluster_1", 0.01, 0.11
)