Если я правильно понял ваш вопрос, вы можете попробовать это.
library(dplyr)
df <- pattern_df %>%
left_join(subject_df, "seq") %>%
group_by(width.x, seq, names.x) %>%
summarise(subject_names = paste(names.y, collapse=",")) %>%
`colnames<-`(c("width", "seq", "names", "subject_names"))
Вывод:
> df
width seq names subject_names
1 1000 GGTAAGAGTTTCTTAACAGATCTCAACATTTGCTATATAC...AGATTATTTGTCCTTTGAGATAAAATTACCAC P1 NA
2 1000 GGTGAGCAAAATCAAGCAATGCATTGTTTGTTTTGGAGGG...CTATTTATGTACTACCTTTTTTTTTTAGAAAA P4 S5,S6
3 1000 GGTGAGTGTATGATTGATAACTAATCTCTTAGATTAACCA...CATGATATGAAATGGTTCCTAAAGATCCAGAC P3 NA
4 1000 TGTAAGTAATACTTAATGGTAATTTTTGTTTTCTCTTTCA...AGAAGCAAGGAGACCCGTTAGAGGAAGCATCC P2 NA
Пример данных: ( примечание , которое у меня естьдобавил еще две строки в конце в subject_df
)
pattern_df <- structure(list(width = c(1000L, 1000L, 1000L, 1000L), seq = c("GGTAAGAGTTTCTTAACAGATCTCAACATTTGCTATATAC...AGATTATTTGTCCTTTGAGATAAAATTACCAC",
"TGTAAGTAATACTTAATGGTAATTTTTGTTTTCTCTTTCA...AGAAGCAAGGAGACCCGTTAGAGGAAGCATCC",
"GGTGAGTGTATGATTGATAACTAATCTCTTAGATTAACCA...CATGATATGAAATGGTTCCTAAAGATCCAGAC",
"GGTGAGCAAAATCAAGCAATGCATTGTTTGTTTTGGAGGG...CTATTTATGTACTACCTTTTTTTTTTAGAAAA"
), names = c("P1", "P2", "P3", "P4")), .Names = c("width", "seq",
"names"), class = "data.frame", row.names = c(NA, -4L))
# width seq names
#1 1000 GGTAAGAGTTTCTTAACAGATCTCAACATTTGCTATATAC...AGATTATTTGTCCTTTGAGATAAAATTACCAC P1
#2 1000 TGTAAGTAATACTTAATGGTAATTTTTGTTTTCTCTTTCA...AGAAGCAAGGAGACCCGTTAGAGGAAGCATCC P2
#3 1000 GGTGAGTGTATGATTGATAACTAATCTCTTAGATTAACCA...CATGATATGAAATGGTTCCTAAAGATCCAGAC P3
#4 1000 GGTGAGCAAAATCAAGCAATGCATTGTTTGTTTTGGAGGG...CTATTTATGTACTACCTTTTTTTTTTAGAAAA P4
subject_df <- structure(list(width = c(1000L, 1000L, 1000L, 1000L, 1000L, 1000L
), seq = c("GTAGGTACCTGGGAATTCACAAATTAAGACTTTTGAATA...TTCTTATTCAACCGTAGTAACATTAGATGAATA",
"GTGAGCGCTGCTGCCCAAGCCGCCTGGCTATGCTCGATT...AGATGGCCTTTTCTCTCAGCCCACTGTGACCTA",
"GTAAGTACAGGCTGAAAGTTACATGCTCTCCAAGGGTGA...ACATAGTAATGAATAGACTTTCAGACACAGCAT",
"GTAAGTTGCTTGTTTCTTAAATGTTAGGATCTATTACTT...AACAATATAGGTAAGTCTAGCCCTCAAGGCGCT",
"GGTGAGCAAAATCAAGCAATGCATTGTTTGTTTTGGAGGG...CTATTTATGTACTACCTTTTTTTTTTAGAAAA",
"GGTGAGCAAAATCAAGCAATGCATTGTTTGTTTTGGAGGG...CTATTTATGTACTACCTTTTTTTTTTAGAAAA"
), names = c("S1", "S2", "S3", "S4", "S5", "S6")), .Names = c("width",
"seq", "names"), class = "data.frame", row.names = c(NA, -6L))
# width seq names
#1 1000 GTAGGTACCTGGGAATTCACAAATTAAGACTTTTGAATA...TTCTTATTCAACCGTAGTAACATTAGATGAATA S1
#2 1000 GTGAGCGCTGCTGCCCAAGCCGCCTGGCTATGCTCGATT...AGATGGCCTTTTCTCTCAGCCCACTGTGACCTA S2
#3 1000 GTAAGTACAGGCTGAAAGTTACATGCTCTCCAAGGGTGA...ACATAGTAATGAATAGACTTTCAGACACAGCAT S3
#4 1000 GTAAGTTGCTTGTTTCTTAAATGTTAGGATCTATTACTT...AACAATATAGGTAAGTCTAGCCCTCAAGGCGCT S4
#5 1000 GGTGAGCAAAATCAAGCAATGCATTGTTTGTTTTGGAGGG...CTATTTATGTACTACCTTTTTTTTTTAGAAAA S5
#6 1000 GGTGAGCAAAATCAAGCAATGCATTGTTTGTTTTGGAGGG...CTATTTATGTACTACCTTTTTTTTTTAGAAAA S6