У меня есть две матрицы, такие как
Я хочу получить пересечение
> head(small[1:3,])
Chrom1 Start1 End1 Strand1 Chrom2 Start2 End2
1 1 28677074 28677079 + 1 28706324 28706329
2 1 186383731 186383738 + 1 186383731 186383732
3 1 245902589 245902590 + 1 246007384 246007385
Strand2
1 -
2 -
3 -
>
> dim(small)
[1] 1594 8
https://www.dropbox.com/s/mkplrg1236f8qtd/111.txt?dl=0
И большая матрица
> head(big[1:3,])
Ensembl_Gene_ID Chromosome_Name Gene_Start Gene_End
1 ENSG00000233440 13 23708313 23708703
2 ENSG00000207157 13 23726725 23726825
3 ENSG00000229483 13 23743974 23744736
Associated_Gene_Name X5UTR_Start X5UTR_End X3UTR_Start X3UTR_End
1 HMGA1P6 NA NA NA NA
2 RNY3P4 NA NA NA NA
3 LINC00362 NA NA NA NA
Transcript_count Gene_Biotype Exon_Chr_Start Exon_Chr_End
1 1 pseudogene 23708313 23708703
2 1 misc_RNA 23726725 23726825
3 1 lincRNA 23744691 23744736
Exon_Rank_in_Transcript Ensembl_Transcript_ID Transcript_Start
1 1 ENST00000418454 23708313
2 1 ENST00000384428 23726725
3 1 ENST00000414345 23743974
Transcript_End strand
1 23708703 1
2 23726825 -1
3 23744736 -1
> dim(big)
[1] 1048575 18
>
https://www.dropbox.com/s/bit4iw2ne19td63/big.txt?dl=0
Мне нужно что-то вроде
Chrom1 Start1 End1 Strand1 Chrom2 Start2 End2 Strand2 GeneName.node1 GeneName.node2
chr1 14480603 14481217 + chr1 14747789 14748719 - - -
chr1 16169956 16170596 + chr1 16217823 16218463 - RP11-169K16.9 SPEN
У меня есть сценарий R вроде
#### Determining breakpoint locations: and adding to table
small$breakpoint1 <- apply(small[,c("Strand1","Start1","End1")], 1,
function(x) ifelse(x[1] == "+",as.numeric(x[3]),
as.numeric(x[2])))
small$breakpoint2 <- apply(small[,c("Strand2","Start2","End2")], 1,
function(x) ifelse(x[1] == "+",as.numeric(x[3]),
as.numeric(x[2])))
svinfo$breakpoint1.ordered <- apply(svinfo[,c("breakpoint1","breakpoint2")],1,
function(x) min(as.numeric(x[1]),as.numeric(x[2])))
svinfo$breakpoint2.ordered <- apply(svinfo[,c("breakpoint1","breakpoint2")],1,
function(x) max(as.numeric(x[1]),as.numeric(x[2])))
#######
### Start SV annotation:
gr.hg19.gene <- GRanges(
seqnames = Rle(hg19$Chromosome_Name),
ranges = IRanges(start=as.numeric(hg19$Gene_Start), end = as.numeric(hg19$Gene_End)),
EnsemblGeneID = hg19$Ensembl_Gene_ID,
GeneName = hg19$Associated_Gene_Name,
TranscriptCount = hg19$Transcript_count,
ExonChrStart = hg19$Exon_Chr_Start,
ExonChrEnd = hg19$Exon_Chr_End,
ExonRankInTranscript = hg19$Exon_Rank_in_Transcript,
EnsemblTranscriptID = hg19$Ensembl_Transcript_ID,
TranscriptStart = hg19$Transcript_Start,
TranscriptEnd = hg19$Transcript_End,
GeneStrand = hg19$Strand,
ExonID = paste(hg19$Exon_Chr_Start,hg19$Exon_Chr_End,sep=".")
)
gr.svinfo.node1 <- GRanges(
seqnames = Rle(svinfo$Chrom1),
ranges = IRanges(start=as.numeric(svinfo$breakpoint1), end = as.numeric(svinfo$breakpoint1)),
Type = svinfo$Type,
Node1.Strand = svinfo$Strand1,
ID = svinfo$ID
)
gr.svinfo.node2 <- GRanges(
seqnames = Rle(svinfo$Chrom2),
ranges = IRanges(start=as.numeric(svinfo$breakpoint2), end = as.numeric(svinfo$breakpoint2)),
Type = svinfo$Type,
Node2.Strand = svinfo$Strand2,
ID = svinfo$ID
)
Но я не знаю, как получить гены, связанные с каждой частью маленькой матрицы
Может ли кто-нибудь помочь мне?