В ответ на предложение Julian_Hn вот dput моего набора данных:
dput(Proteome)
structure(list(Protein.name = structure(c(1L, 1L, 1L, 1L, 2L,
3L), .Label = c("HCTF", "IFT", "ROSF"), class = "factor"), X..Proteins = c(5L,
5L, 5L, 5L, 3L, 7L), X..PSMs = c(3L, 1L, 6L, 2L, 2L, 4L), Previous.5.amino.acids = structure(c(4L,
5L, 4L, 2L, 3L, 1L), .Label = c("CWYAT", "FCLKP", "MGCPT", "NCTMY",
"TMYFC"), class = "factor"), Sequence = structure(c(5L, 1L, 4L,
2L, 3L, 6L), .Label = c("FCLKPGCNFHAESTRGYR", "GCNFHAESTR", "GFGFNWPHAVR",
"GHFCLKPGCNFHAESTR", "GHFCLKPGCNFHAESTRGYR", "GNFSVKLMNR"), class = "factor")), .Names = c("Protein.name",
"X..Proteins", "X..PSMs", "Previous.5.amino.acids", "Sequence"
), class = "data.frame", row.names = c(NA, -6L))