Возможно, было бы проще с пакетом, таким как igraph
, но вот способ сделать это в базе R ...
df <- data.frame(X = c("A","B","D"), Y = c("B","C","E"), stringsAsFactors = FALSE)
nodes <- data.frame(node = unique(c(df$X, df$Y)),
stringsAsFactors = FALSE) #set of all unique values
nodes$group <- 1:nrow(nodes) #start with each in its own group
for(i in seq_len(nrow(df))){ #loop through df
rowvalues <- unlist(df[i, ]) #linked pair in row i
nodes$group[nodes$node %in% rowvalues] <- min(nodes$group[nodes$node %in% rowvalues])
#set to min group for linked pair
}
#end result is nodes$group containing unique values for each linked group
nodes
node group
1 A 1
2 B 1
3 D 3
4 C 1
5 E 3
#groups can be extracted as a list with...
split(nodes$node, nodes$group)
$`1`
[1] "A" "B" "C"
$`3`
[1] "D" "E"