С dplyr
и tidyr
:
df1<-structure(list(id = 1:5, age = c(14L, 23L, 52L, 41L, 32L)), .Names = c("id", "age"), class = "data.frame", row.names = c(NA, -5L))
df2<-structure(list(id1 = c(1L, 1L, 3L, 5L), id2 = c(2L, 5L, 4L, 2L)), .Names = c("id1", "id2"), class = "data.frame", row.names = c(NA, -4L))
require(dplyr,tidyr)
dff<-left_join(df1,df2,by=c(id="id1")) %>%
left_join(df2,by=c(id="id2")) %>%
gather("friend","friend_id",id1,id2) %>%
filter(!is.na(friend_id)) %>%
group_by(id) %>%
mutate(friend=paste0("friend",row_number()),connections=n()) %>%
spread(friend,friend_id)
# A tibble: 5 x 5
# Groups: id [5]
id age connections friend1 friend2
<int> <int> <int> <int> <int>
1 1 14 2 2 5
2 2 23 2 1 5
3 3 52 1 4 NA
4 4 41 1 3 NA
5 5 32 2 1 2