Надеюсь, у тебя все хорошо. После почти длительного кодирования на R с использованием нескольких пакетов, поверх которых была Limma, я сумел объединить два набора данных и удалить из них пакетный эффект. Однако при выполнении кодов R для поиска дифференциально экспрессируемых генов в этих наборах данных я столкнулся с некоторыми ошибками.
Все мои коды и ошибки / предупреждения следующие:
> data1 <- read.delim("GSE21222_series_matrix.txt", comment.char = "!")
> data2 <- read.delim("GSE46872_series_matrix.txt", comment.char = "!")
> dim(data1)
[1] 54675 19
> dim(data2)
[1] 33252 13
> max(data1[,-1])
[1] 73226.37
> min(data1[,-1])
[1] 0.07260976
> max(data2[,-1])
[1] 14.6658
> min(data2[,-1])
[1] 2.604358
> data1[,-1] <- log2(1+data1[,-1])
> library(data.table)
> annot1 <- fread("GPL570-55999.txt", data.table=F)
> annot2 <- fread("GPL6244-17930 -.txt", data.table=F)
> dim(annot1)
[1] 54675 16
> dim(annot2)
[1] 33297 3
> colnames(annot1)
[1] "ID" "GB_ACC"
[3] "SPOT_ID" "Species Scientific Name"
[5] "Annotation Date" "Sequence Type"
[7] "Sequence Source" "Target Description"
[9] "Representative Public ID" "Gene Title"
[11] "Gene Symbol" "ENTREZ_GENE_ID"
[13] "RefSeq Transcript ID" "Gene Ontology Biological Process"
[15] "Gene Ontology Cellular Component" "Gene Ontology Molecular Function"
> colnames(annot2)
[1] "ID" "Symbol" "ENTREZ_GENE_ID"
> annot1$'Gene Symbol' <- sub(" .*", "", annot1$'Gene Symbol')
> annot1$'ENTREZ_GENE_ID' <- sub(" .*", "", annot1$'ENTREZ_GENE_ID')
> annot1 <- annot1[,c("ID", "Gene Symbol", "ENTREZ_GENE_ID")]
> colnames(annot1)[2] <- "Symbol"
> library(limma)
> length(intersect(annot1$Symbol, annot2$Symbol))
[1] 18035
> length(intersect(annot1$ENTREZ_GENE_ID, annot2$ENTREZ_GENE_ID))
[1] 17432
> rownames(annot1) <- annot1$ID
> rownames(annot2) <- annot2$ID
> data1$Entrez <- annot1[as.character(data1$ID_REF), "ENTREZ_GENE_ID"]
> data2$Entrez <- annot2[as.character(data2$ID_REF), "ENTREZ_GENE_ID"]
> colnames(data1)
[1] "ID_REF" "GSM530601" "GSM530602" "GSM530603" "GSM530604" "GSM530605" "GSM530606" "GSM530607"
[9] "GSM530608" "GSM530609" "GSM530610" "GSM530611" "GSM530612" "GSM530613" "GSM530614" "GSM530615"
[17] "GSM530616" "GSM530617" "GSM530618" "Entrez"
> colnames(data2)
[1] "ID_REF" "GSM1139484" "GSM1139485" "GSM1139486" "GSM1139487" "GSM1139488" "GSM1139489"
[8] "GSM1139490" "GSM1139491" "GSM1139492" "GSM1139493" "GSM1139494" "GSM1139495" "Entrez"
> data1 <- data1[,-1]
> data2 <- data2[,-1]
> data1 <- aggregate(.~Entrez, data1, mean)
> data2 <- aggregate(.~Entrez, data2, mean)
> dim(data1)
[1] 21181 19
> dim(data2)
[1] 19358 13
> all <- merge(data1, data2, by="Entrez")
> all <- subset(all, Entrez !="")
> rownames(all) <- all$Entrez
> all <- subset(all, select= -Entrez)
> max(all)
[1] 16.03965
> min(all)
[1] 0.3357633
> batch <- factor(c(rep(1, ncol(data1)-1), rep(2, ncol(data2)-1)))
> gr <- c(rep("Primed", 12), rep("Naive", 10), "Primed", "Naive", "Primed", rep("Naive", 4), "Primed")
> library(ggplot2)
> library(sva)
> allc <- ComBat(as.matrix(all), batch = batch)
> allm <- allc - rowMeans(allc)
> pc <- prcomp(allm)
> pcr <- data.frame(pc$r[,1:3], batch)
> allc <- as.data.frame(allc)
> allc$description <- factor(gr)
Error in `$<-.data.frame`(`*tmp*`, description, value = c(2L, 2L, 2L, :
replacement has 30 rows, data has 17431
> allc <- ComBat(as.matrix(all), batch = batch)
> allc$description <- factor(gr)
Warning message:
In allc$description <- factor(gr) : Coercing LHS to a list```