Основа c R подход:
mat <- matrix(rep(0, 64), ncol = 8)
mat[as.matrix(df)] <- 1
Выход
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 0 0 1 0 0 0 0
[2,] 1 0 1 0 0 1 0 0
[3,] 0 0 1 1 0 0 1 0
[4,] 0 0 0 1 1 1 0 0
[5,] 0 0 1 1 0 0 1 0
[6,] 0 0 1 0 0 0 0 1
[7,] 1 0 1 0 1 0 0 0
[8,] 0 0 0 0 1 0 0 0
Или с tidyverse
:
library(tidyverse)
df$unique = 1
expand.grid(source = 1:8, sink = 1:8) %>%
left_join(distinct(df)) %>%
replace_na(list(unique = 0)) %>%
pivot_wider(id_cols = source, names_from = sink, names_prefix = "Sink", values_from = unique)
Вывод
# A tibble: 8 x 9
# Groups: source [8]
source Sink1 Sink2 Sink3 Sink4 Sink5 Sink6 Sink7 Sink8
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 0 0 1 0 0 0 0
2 2 1 0 1 0 0 1 0 0
3 3 0 0 1 1 0 0 1 0
4 4 0 0 0 1 1 1 0 0
5 5 0 0 1 1 0 0 1 0
6 6 0 0 1 0 0 0 0 1
7 7 1 0 1 0 1 0 0 0
8 8 0 0 0 0 1 0 0 0
Редактировать :
Вот пересмотренный подход, который учитывает количество различных частиц, которые имеют одинаковые sink
и source
.
df %>%
group_by(sink, source) %>%
summarise(unique = n_distinct(particle)) %>%
distinct() %>%
right_join(expand.grid(source = 1:8, sink = 1:8)) %>%
replace_na(list(unique = 0)) %>%
pivot_wider(id_cols = source, names_from = sink, names_prefix = "Sink", values_from = unique)
Выход
# A tibble: 8 x 9
source Sink1 Sink2 Sink3 Sink4 Sink5 Sink6 Sink7 Sink8
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 0 0 1 0 0 0 0
2 2 1 0 1 0 0 1 0 0
3 3 0 0 1 1 0 0 2 0
4 4 0 0 0 1 1 1 0 0
5 5 0 0 1 1 0 0 1 0
6 6 0 0 1 0 0 0 0 1
7 7 1 0 1 0 1 0 0 0
8 8 0 0 0 0 1 0 0 0
Редактировать :
С исправленными данными и базой R, вы также можете рассмотреть следующее:
df$source = factor(df$source, levels = 1:8)
df$sink = factor(df$sink, levels = 1:8)
df2 <- aggregate(particle ~ source + sink, df, function(x) length(unique(x)))
xtabs(particle ~ source + sink, data = df2)
Данные
df <- data.frame(
particle = c(1:7, 1:7, 1:9),
source = c(1:7,1:7,1:8,3),
sink = c(1,1,4,6,7,8,5,4,6,7,5,4,8,3,1,3,3,4,3,3,1,5,7)
)