Проверка правильности на итоговом кадре данных для пометки нерегулярных вызовов - PullRequest
0 голосов
/ 28 января 2020

, поэтому у меня есть код (см. Ниже), который берет аллильные c файлы дискриминации из программного обеспечения BioRad и запускает для генерации окончательной переменной генотипа.

Все лунки должны иметь конечный генотип, кроме A01, A02 , B01, B02, C01, C02, D01 и D02 (перечислены как «NA»)

В прикрепленном наборе данных есть образец в A10, который читает пробел и имеет пропущенное значение ''. Мне нужно добавить часть кода для генерации новой переменной ('flag'), которая помечает любые лунки без значения.

###Partial Code for converting call to allele variants###


###G1-1###

##Load in G1-1 file from PCR###

APOL_1_Allelic_Discrimination_G1_1 <- read.csv("admin_2019-03-17 08-04-00_BR007717_PLATE6_G1-1_SAMPLES41-80_3-17-2019 -  Allelic Discrimination Results_ADSheet.csv")
attach(APOL_1_Allelic_Discrimination_G1_1)
drops <- c("X","Sample","Type","RFU1","RFU2")
G1_1 <- APOL_1_Allelic_Discrimination_G1_1[ , !(names(APOL_1_Allelic_Discrimination_G1_1) %in% drops)]


G1_1 <- G1_1 %>% mutate(G1_1_1 = case_when(Call == "Allele 1" ~ "G1^{S342G}", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "G1^{S342G}", Call == "No Call" ~ "Blank"),
                        G1_1_2 = case_when(Call == "Allele 1" ~ "G1^{S342G}", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "+", Call == "No Call" ~ "Blank"))

G1_1$Call <- NULL

###G1-2###

##Load in G1-2 file from PCR###

APOL_1_Allelic_Discrimination_G1_2 <- read.csv("admin_2019-03-17 04-59-11_BR007717_PLATE5_G1-2_SAMPLES41-80_3-17-2019 -  Allelic Discrimination Results_ADSheet.csv")
attach(APOL_1_Allelic_Discrimination_G1_2)
drops <- c("X","Sample","Type","RFU1","RFU2")
G1_2 <- APOL_1_Allelic_Discrimination_G1_2[ , !(names(APOL_1_Allelic_Discrimination_G1_2) %in% drops)]

G1_2 <- G1_2 %>% mutate(G1_2_1 = case_when(Call == "Allele 1" ~ "+", Call == "Allele 2" ~ "G1^{I384M}", Call == "Heterozygote" ~ "G1^{I384M}", Call == "No Call" ~ "Blank"),
                        G1_2_2 = case_when(Call == "Allele 1" ~ "+", Call == "Allele 2" ~ "G1^{I384M}", Call == "Heterozygote" ~ "+", Call == "No Call" ~ "Blank"))

G1_2$Call <- NULL

###G2###

##Load in G2 file from PCR###

APOL_1_Allelic_Discrimination_G2 <- read.csv("admin_2019-03-17 01-41-46_BR007717_PLATE4_G2_SAMPLES41-80_3-17-2019 -  Allelic Discrimination Results_ADSheet.csv")
attach(APOL_1_Allelic_Discrimination_G2)
drops <- c("X","Sample","Type","RFU1","RFU2")
G2 <- APOL_1_Allelic_Discrimination_G2[ , !(names(APOL_1_Allelic_Discrimination_G2) %in% drops)]

G2 <- G2 %>% mutate(G2_1 = case_when(Call == "Allele 1" ~ "G2", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "G2", Call == "No Call" ~ "Blank"),
                        G2_2 = case_when(Call == "Allele 1" ~ "G2", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "+", Call == "No Call" ~ "Blank"))

G2$Call <- NULL

###Merge G1-1, G1-2 and G2 together###

G1 <- join(G1_1,G1_2,by="Well")

G1_G2 <- join(G1,G2,by="Well")


Набор данных (dput)


structure(list(Well = structure(1:10, .Label = c("A01", "A02", 
"A03", "A04", "A05", "A06", "A07", "A08", "A09", "A10"), class = "factor"), 
    G1_1_1 = structure(c(2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 
    1L), .Label = c("+", "Blank", "G1^{S342G}"), class = "factor"), 
    G1_1_2 = structure(c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = c("+", "Blank"), class = "factor"), G1_2_1 = structure(c(2L, 
    2L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 2L), .Label = c("+", "Blank", 
    "G1^{I384M}"), class = "factor"), G1_2_2 = structure(c(2L, 
    2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("+", "Blank"
    ), class = "factor"), G2_1 = structure(c(2L, 2L, 1L, 1L, 
    1L, 1L, 3L, 3L, 1L, 1L), .Label = c("+", "Blank", "G2"), class = "factor"), 
    G2_2 = structure(c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("+", "Blank"), class = "factor"), Final.genotype.of.APOL1 = structure(c(NA, 
    NA, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 1L), .Label = c("", "G0/G0", 
    "G1^{GM}/G2"), class = "factor"), no.APOL1.Risk.Alleles = c(NA, 
    NA, 1L, 1L, 1L, 1L, NA, NA, 1L, NA), X1.APOL1.Risk.Alleles = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), X2.APOL1.Risk.Alleles = c(NA, 
    NA, NA, NA, NA, NA, 1L, 1L, NA, NA)), row.names = c(NA, 10L
), class = "data.frame")

...