Допустим, ваша таблица аннотаций выглядит следующим образом:
ann_table = structure(list(ID = structure(c(9L, 8L, 16L, 6L, 4L, 12L, 3L,
10L, 5L, 13L, 2L, 11L, 1L, 7L, 15L, 17L, 14L), .Label = c("1557256_a_at",
"1557557_at", "1557770_at", "200000_s_at", "200001_at", "200002_at",
"200003_s_at", "200004_at", "200005_at", "200006_at", "200007_at",
"200008_s_at", "200009_at", "200010_at", "200011_s_at", "208010_s_at",
"235668_at"), class = "factor"), `Gene Symbol` = structure(c(3L,
4L, 12L, 15L, 11L, 6L, 7L, 9L, 2L, 6L, 8L, 16L, 5L, 14L, 1L,
10L, 13L), .Label = c("ARF3", "CAPNS1", "EIF3D", "EIF4G2", "GABRB1",
"GDI2", "IPO11", "MATN1-AS1", "PARK7", "PRDM1", "PRPF8", "PTPN22",
"RPL11", "RPL28", "RPL35", "SRP14"), class = "factor")), class = "data.frame", row.names = c("200005_at",
"200004_at", "208010_s_at", "200002_at", "200000_s_at", "200008_s_at",
"1557770_at", "200006_at", "200001_at", "200009_at", "1557557_at",
"200007_at", "1557256_a_at", "200003_s_at", "200011_s_at", "235668_at",
"200010_at"))
И матрица, которую вы показали:
mydata = structure(list(C1 = c(9.372446, 11.777132, 12.199002, 12.962001,
11.233249, 10.133455, 11.004593, 11.070449, 10.289446, 11.408603,
12.081858, 10.28021), C2 = c(9.299193, 11.692039, 12.544633,
12.445636, 10.961737, 10.43836, 10.584082, 10.949878, 10.304524,
11.114905, 11.927526, 10.026668), C3 = c(10.017991, 12.013986,
12.365026, 12.441083, 11.588765, 10.897712, 11.182127, 11.643648,
10.381671, 11.442766, 12.161929, 11.108151), C4 = c(9.181135,
11.9361, 12.026991, 12.809544, 11.206607, 10.653804, 11.375084,
11.581889, 10.136443, 11.44824, 12.299117, 10.371077), C5 = c(9.734582,
11.832065, 12.439448, 12.622457, 11.674372, 11.359433, 11.182136,
11.730024, 11.101463, 12.008252, 12.303224, 10.770123)), class = "data.frame", row.names = c("200000_s_at",
"200001_at", "200002_at", "200003_s_at", "200004_at", "200005_at",
"200006_at", "200007_at", "200008_s_at", "200009_at", "200010_at",
"200011_s_at"))
Генные символы для соответствующих имен строк:
as.character(ann_table[match(rownames(mydata),ann_table$ID),"Gene Symbol"])
[1] "PRPF8" "CAPNS1" "RPL35" "RPL28" "EIF4G2" "EIF3D"
[7] "PARK7" "SRP14" "GDI2" "GDI2" "RPL11" "ARF3"
Или вы можете использовать библиотеку:
library(hgu133plus2.db)
mapIds(hgu133plus2.db,keys=rownames(mydata),column=c("SYMBOL"),keytype = "PROBEID")
Но прежде чем подставлять имена строк матрицы, вы должны проверить наличие NA и дублированных СИМВОЛОВ.