У меня есть данные, подобные этой
df <- structure(list(Division = structure(c(1L, 1L, 1L, 2L, 2L, 3L), .Label = c("Main data",
"Second data", "Third data"), class = "factor"), Gene = structure(1:6, .Label = c("ABI3BP",
"ADIPOQ", "AEBP1", "AGRN", "AMBN", "AMELX"), class = "factor"),
IDs = c(17265L, 13633L, 303L, 329L, 452L, 461L), IDs.Links = c(17265L,
13633L, 303L, 329L, 452L, 461L), UniID = structure(c(1L,
4L, 2L, 3L, 6L, 5L), .Label = c("B4DSV9:D3YTG3:E9PPR9:E9PRB5:H0Y897",
"C9JLQ8:H7C0W8:H7C1J5", "H0Y5U1:O00468", "Q15848", "Q99217",
"Q9NP70"), class = "factor"), Refseq_IDs = structure(c(4L,
3L, 1L, 6L, 5L, 2L), .Label = c("NP_001120.3", "NP_001133.1:NP_872621.1:NP_872622.1",
"NP_001171271.1:NP_004788.1", "NP_056244.2:XP_005247340.1",
"NP_057603.1", "NP_940978.2:XP_005244806.1:XP_006710696.1"
), class = "factor"), Orthology = structure(1:6, .Label = c("Mouse:Abi3bp|",
"Mouse:Adipoq|", "Mouse:Aebp1|", "Mouse:Agrn|", "Mouse:Ambn|",
"Mouse:Amelx|"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
в одном из столбцов, называемых UniID, у меня есть много строк, которые разделены : Я хочу поместить каждую из них в новую строку и повторить остальные столбцы
Выход желания выглядит так
df2 <-structure(list(Division = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 3L), .Label = c("Main data", "Second data",
"Third data"), class = "factor"), Gene = structure(c(1L, 1L,
1L, 1L, 1L, 2L, 3L, 4L, 5L, 6L, 6L, 7L, 8L), .Label = c("ABI3BP",
"ADIPOQ", "AEBP1", "AEBP2", "AEBP3", "AGRN", "AMBN", "AMELX"), class = "factor"),
IDs = c(17265L, 17265L, 17265L, 17265L, 17265L, 13633L, 303L,
303L, 303L, 329L, 329L, 452L, 461L), IDs.Links = c(17265L,
17265L, 17265L, 17265L, 17265L, 13633L, 303L, 303L, 303L,
329L, 329L, 452L, 461L), UniID = structure(c(1L, 3L, 4L,
5L, 7L, 11L, 2L, 8L, 9L, 6L, 10L, 13L, 12L), .Label = c("B4DSV9",
"C9JLQ8", "D3YTG3", "E9PPR9", "E9PRB5", "H0Y5U1", "H0Y897",
"H7C0W8", "H7C1J5", "O00468", "Q15848", "Q99217", "Q9NP70"
), class = "factor"), Refseq_IDs = structure(c(4L, 4L, 4L,
4L, 4L, 3L, 1L, 1L, 1L, 6L, 7L, 5L, 2L), .Label = c("NP_001120.3",
"NP_001133.1:NP_872621.1:NP_872622.1", "NP_001171271.1:NP_004788.1",
"NP_056244.2:XP_005247340.1", "NP_057603.1", "NP_940978.2:XP_005244806.1:XP_006710696.1",
"NP_940978.2:XP_005244806.1:XP_006710696.2"), class = "factor"),
Orthology = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L,
4L, 4L, 5L, 6L), .Label = c("Mouse:Abi3bp|", "Mouse:Adipoq|",
"Mouse:Aebp1|", "Mouse:Agrn|", "Mouse:Ambn|", "Mouse:Amelx|"
), class = "factor")), class = "data.frame", row.names = c(NA,
-13L))
Я нашел другой пост и попытался сделать это, но безуспешно.
s <- strsplit(as.character(df$UniID), ':')
mydf<-data.frame(director=unlist(s), IDs=rep(df$IDs, lengths(s)))
дает мне только столбцы идентификаторов и UniID
mydf<- df[, lapply(.SD, function(x) unlist(tstrsplit(x, ":", fixed=TRUE))), by = IDs][!is.na(UniID)]
Error in `[.data.frame`(df, , lapply(.SD, function(x) unlist(tstrsplit(x, :
unused argument (by = IDs)
Этот
mydf<- df[, strsplit(as.character(UniID), ":", fixed=TRUE),
+ by = .(IDs, UniID)][,.(UniID = V1, IDs)]
Error in `[.data.frame`(df, , strsplit(as.character(UniID), ":", fixed = TRUE), :
unused argument (by = .(IDs, UniID))