У меня есть следующий фрейм данных
a <-
structure(list(Sample_1 = structure(c(Bacteria_A = 1L, Bacteria_B = 2L,
Bacteria_C = 3L, `4` = 1L, `5` = 2L, `6` = 2L, `7` = 3L, `8` = 1L
), .Label = c("12", "23", "25", "soil"), class = "factor"), Sample_2 = structure(c(Bacteria_A = 3L,
Bacteria_B = 2L, Bacteria_C = 1L, `4` = 3L, `5` = 2L, `6` = 2L,
`7` = 1L, `8` = 3L), .Label = c("10", "12", "23", "soil"), class = "factor"),
Sample_3 = structure(c(Bacteria_A = 2L, Bacteria_B = 1L,
Bacteria_C = 3L, `4` = 2L, `5` = 1L, `6` = 1L, `7` = 3L,
`8` = 2L), .Label = c("33", "45", "50", "soil"), class = "factor"),
Sample_4 = structure(c(Bacteria_A = 1L, Bacteria_B = 3L,
Bacteria_C = 2L, `4` = 1L, `5` = 3L, `6` = 3L, `7` = 2L,
`8` = 1L), .Label = c("32", "38", "44", "soil"), class = "factor"),
Sample_5 = structure(c(Bacteria_A = 2L, Bacteria_B = 3L,
Bacteria_C = 1L, `4` = 2L, `5` = 3L, `6` = 3L, `7` = 1L,
`8` = 2L), .Label = c(" 3", "34", "55", "soil"), class = "factor"),
Sample_6 = structure(c(Bacteria_A = 1L, Bacteria_B = 2L,
Bacteria_C = 3L, `4` = 1L, `5` = 2L, `6` = 2L, `7` = 3L,
`8` = 1L), .Label = c(" 0", " 3", "34", "soil"), class = "factor"),
Genus = c("Bacteria_A", "Bacteria_B", "Bacteria_C", "Bacteria_A",
"Bacteria_B", "Bacteria_B", "Bacteria_C", "Bacteria_A"),
Group = c("Soil", "Soil", "Soil", "Water", "Water", "Water",
"Water", "Water")), row.names = c(NA, 8L), class = "data.frame")
> a
Sample_1 Sample_2 Sample_3 Sample_4 Sample_5 Sample_6 Genus Group
1 12 23 45 32 34 0 Bacteria_A Soil
2 23 12 33 44 55 3 Bacteria_B Soil
3 25 10 50 38 3 34 Bacteria_C Soil
4 12 23 45 32 34 0 Bacteria_A Water
5 23 12 33 44 55 3 Bacteria_B Water
6 23 12 33 44 55 3 Bacteria_B Water
7 25 10 50 38 3 34 Bacteria_C Water
8 12 23 45 32 34 0 Bacteria_A Water
Я хочу сравнить эффект обработки для каждой бактерии в почве против воды. Например, wilcox.test для BActeria_A в почве против воды. Как я могу это сделать ??
До сих пор я пытался развернуть более широкий диапазон данных, чтобы бактерии имели имена столбцов
nms <- colnames(a)[1:(ncol(a)-2)]
> nms
[1] "Sample_1" "Sample_2" "Sample_3" "Sample_4" "Sample_5" "Sample_6"
d <- a %>%
pivot_wider(names_from = Genus, values_from=nms )
group_by(name) %>%
summarise(mean_Soil = mean(value[Group == "Soil"]),
mean_Water= mean(value[Group == "Water"]),
pvalue = wilcox.test(value ~ Group)$p.value)
Error in group_by(name) : object 'name' not found
Ожидаемый результат будет выглядеть примерно так (поддельные значения в это пример). Это просто для иллюстрации желаемого результата.
#> # A tibble: 3 x 4
#> name mean_soil mean_water pvalue
#> <chr> <dbl> <dbl> <dbl>
#> 1 Bacteria_A 24.3 24 0.936
#> 2 Bacteria_B 28.3 29 0.873
#> 3 Bacteria_C 26.7 23.8 0.748