Использование group_by
из tidyverse
библиотеки
library(tidyverse)
df <-
tibble(
region = 1:10,
group = c(2, 4, 3, 1, 1, 2, 3, 4, 3, 1),
mid_pop = c(1146, 1682, 2891, 7654, 3289, 1128, 2121, 3217, 1616, 1717)
) # your data set
weight <- c(.66, 2, 2, .7, .2, .3, .7, .75, .33, .16)
df_wt <-
df %>%
bind_cols(weight = weight) %>%
mutate(weighted = mid_pop * weight) %>% # your second data set: mid_pop(weighted)
group_by(group) %>%
summarise(pop = mean(weighted)) # average
## > df_wt
## # A tibble: 4 x 2
## group pop
## <dbl> <dbl>
## 1 1 2097.
## 2 2 547.
## 3 3 2600.
## 4 4 2888.
outer
Функция с операцией "-"
может дать парную разницу
wt_pop <- df_wt %>% select(pop) %>% pull()
outer(wt_pop, wt_pop, "-") # symmetric matrix for the answer
## [,1] [,2] [,3] [,4]
## [1,] 0.0000 1549.393 -503.2200 -791.6017
## [2,] -1549.3933 0.000 -2052.6133 -2340.9950
## [3,] 503.2200 2052.613 0.0000 -288.3817
## [4,] 791.6017 2340.995 288.3817 0.0000
В качестве альтернативы, вы можетепоследовательно применять outer
.
Вам необходимо изменить его на фрейм данных с такой функцией, как as.data.frame()
df %>%
bind_cols(weight = weight) %>%
mutate(weighted = mid_pop * weight) %>%
group_by(group) %>%
summarise(pop = mean(weighted)) %>%
do(outer(.$pop, .$pop, "-") %>% as_tibble())
## # A tibble: 4 x 4
## V1 V2 V3 V4
## <dbl> <dbl> <dbl> <dbl>
## 1 0 1549. -503. -792.
## 2 -1549. 0 -2053. -2341.
## 3 503. 2053. 0 -288.
## 4 792. 2341. 288. 0