Обращаясь к этой теме , я могу заменить шаблоны столбцами.Мне нужно заранее знать, в какую колонку мне нужно перейти.здесь заголовок dataframe
clinicalData <- structure(list(Patients = structure(c(187L, 63L, 167L, 88L, 173L,
139L), .Label = c("TCGA.02.0001.01", "TCGA.02.0003.01", "TCGA.02.0004.01",
"TCGA.02.0006.01", "TCGA.02.0007.01", "TCGA.02.0009.01", "TCGA.02.0010.01",
"TCGA.02.0011.01", "TCGA.02.0014.01", "TCGA.02.0015.01", "TCGA.02.0016.01",
"TCGA.02.0021.01", "TCGA.02.0023.01", "TCGA.02.0024.01", "TCGA.02.0025.01",
"TCGA.02.0026.01", "TCGA.02.0027.01", "TCGA.02.0028.01", "TCGA.02.0033.01",
"TCGA.02.0034.01", "TCGA.02.0037.01", "TCGA.02.0038.01", "TCGA.02.0039.01",
"TCGA.02.0043.01", "TCGA.02.0046.01", "TCGA.02.0047.01", "TCGA.02.0048.01",
"TCGA.02.0052.01", "TCGA.02.0054.01", "TCGA.02.0055.01", "TCGA.02.0057.01",
"TCGA.02.0058.01", "TCGA.02.0060.01", "TCGA.02.0064.01", "TCGA.02.0068.01",
"TCGA.02.0069.01", "TCGA.02.0070.01", "TCGA.02.0071.01", "TCGA.02.0074.01",
"TCGA.02.0075.01", "TCGA.02.0079.01", "TCGA.02.0080.01", "TCGA.02.0083.01",
"TCGA.02.0084.01", "TCGA.02.0085.01", "TCGA.02.0086.01", "TCGA.02.0087.01",
"TCGA.02.0089.01", "TCGA.02.0099.01", "TCGA.02.0102.01", "TCGA.02.0104.01",
"TCGA.02.0106.01", "TCGA.02.0107.01", "TCGA.02.0111.01", "TCGA.02.0113.01",
"TCGA.02.0114.01", "TCGA.02.0115.01", "TCGA.02.0116.01", "TCGA.02.0258.01",
"TCGA.02.0260.01", "TCGA.02.0266.01", "TCGA.02.0269.01", "TCGA.02.0271.01",
"TCGA.02.0281.01", "TCGA.02.0285.01", "TCGA.02.0289.01", "TCGA.02.0290.01",
"TCGA.02.0317.01", "TCGA.02.0321.01", "TCGA.02.0324.01", "TCGA.02.0325.01",
"TCGA.02.0326.01", "TCGA.02.0330.01", "TCGA.02.0332.01", "TCGA.02.0333.01",
"TCGA.02.0337.01", "TCGA.02.0338.01", "TCGA.02.0339.01", "TCGA.02.0422.01",
"TCGA.02.0430.01", "TCGA.02.0432.01", "TCGA.02.0439.01", "TCGA.02.0440.01",
"TCGA.02.0446.01", "TCGA.02.0451.01", "TCGA.02.0456.01", "TCGA.06.0122.01",
"TCGA.06.0124.01", "TCGA.06.0125.01", "TCGA.06.0126.01", "TCGA.06.0127.01",
"TCGA.06.0128.01", "TCGA.06.0129.01", "TCGA.06.0130.01", "TCGA.06.0132.01",
"TCGA.06.0133.01", "TCGA.06.0137.01", "TCGA.06.0138.01", "TCGA.06.0139.01",
"TCGA.06.0141.01", "TCGA.06.0143.01", "TCGA.06.0145.01", "TCGA.06.0146.01",
"TCGA.06.0147.01", "TCGA.06.0148.01", "TCGA.06.0149.01", "TCGA.06.0152.01",
"TCGA.06.0154.01", "TCGA.06.0156.01", "TCGA.06.0157.01", "TCGA.06.0158.01",
"TCGA.06.0162.01", "TCGA.06.0164.01", "TCGA.06.0166.01", "TCGA.06.0168.01",
"TCGA.06.0169.01", "TCGA.06.0171.01", "TCGA.06.0173.01", "TCGA.06.0174.01",
"TCGA.06.0175.01", "TCGA.06.0176.01", "TCGA.06.0177.01", "TCGA.06.0178.01",
"TCGA.06.0179.01", "TCGA.06.0182.01", "TCGA.06.0184.01", "TCGA.06.0185.01",
"TCGA.06.0187.01", "TCGA.06.0188.01", "TCGA.06.0189.01", "TCGA.06.0190.01",
"TCGA.06.0194.01", "TCGA.06.0195.01", "TCGA.06.0197.01", "TCGA.06.0201.01",
"TCGA.06.0206.01", "TCGA.06.0208.01", "TCGA.06.0209.01", "TCGA.06.0210.01",
"TCGA.06.0211.01", "TCGA.06.0213.01", "TCGA.06.0214.01", "TCGA.06.0219.01",
"TCGA.06.0221.01", "TCGA.06.0237.01", "TCGA.06.0238.01", "TCGA.06.0241.01",
"TCGA.06.0394.01", "TCGA.06.0397.01", "TCGA.06.0402.01", "TCGA.06.0409.01",
"TCGA.06.0410.01", "TCGA.06.0412.01", "TCGA.06.0413.01", "TCGA.06.0414.01",
"TCGA.06.0644.01", "TCGA.06.0645.01", "TCGA.06.0646.01", "TCGA.06.0648.01",
"TCGA.08.0244.01", "TCGA.08.0246.01", "TCGA.08.0344.01", "TCGA.08.0345.01",
"TCGA.08.0346.01", "TCGA.08.0347.01", "TCGA.08.0348.01", "TCGA.08.0349.01",
"TCGA.08.0350.01", "TCGA.08.0351.01", "TCGA.08.0352.01", "TCGA.08.0353.01",
"TCGA.08.0354.01", "TCGA.08.0355.01", "TCGA.08.0356.01", "TCGA.08.0357.01",
"TCGA.08.0358.01", "TCGA.08.0359.01", "TCGA.08.0360.01", "TCGA.08.0373.01",
"TCGA.08.0375.01", "TCGA.08.0380.01", "TCGA.08.0385.01", "TCGA.08.0386.01",
"TCGA.08.0389.01", "TCGA.08.0390.01", "TCGA.08.0392.01", "TCGA.08.0509.01",
"TCGA.08.0510.01", "TCGA.08.0511.01", "TCGA.08.0512.01", "TCGA.08.0514.01",
"TCGA.08.0516.01", "TCGA.08.0517.01", "TCGA.08.0518.01", "TCGA.08.0520.01",
"TCGA.08.0521.01", "TCGA.08.0522.01", "TCGA.08.0524.01", "TCGA.08.0525.01",
"TCGA.08.0529.01", "TCGA.08.0531.01", "TCGA.12.0615.01", "TCGA.12.0616.01",
"TCGA.12.0618.01", "TCGA.12.0619.01", "TCGA.12.0620.01"), class = "factor"),
ACGH_DATA = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NO",
"YES"), class = "factor"), CANCER_TYPE = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Glioma", class = "factor"),
CANCER_TYPE_DETAILED = structure(c(1L, 1L, 1L, 1L, 1L, 1L
), .Label = "Glioblastoma Multiforme", class = "factor"),
COMPLETE_DATA = structure(c(1L, 1L, 2L, 2L, 2L, 2L), .Label = c("NO",
"YES"), class = "factor"), DFS_MONTHS = c(NA, NA, 3.024657534,
NA, 17.0630137, 2.202739726), DFS_STATUS = structure(c(1L,
1L, 3L, 1L, 3L, 3L), .Label = c("", "DiseaseFree", "Recurred"
), class = "factor"), FRACTION_GENOME_ALTERED = c(0.168880099,
0.126393857, 0.254657044, 0.174506086, 0.182024694, 0.154967319
), KARNOFSKY_PERFORMANCE_SCORE = c(60L, NA, NA, NA, 100L,
NA), MRNA_DATA = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NO",
"YES"), class = "factor"), MUTATION_COUNT = c(NA, NA, NA,
4L, NA, 0L), ONCOTREE_CODE = structure(c(1L, 1L, 1L, 1L,
1L, 1L), .Label = "GBM", class = "factor"), OS_MONTHS = c(12.55890411,
14.46575342, 9.797260274, 20.35068493, 24.55890411, 7.397260274
), OS_STATUS = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("DECEASED",
"LIVING"), class = "factor"), PRETREATMENT_HISTORY = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("NO", "YES"), class = "factor"),
PRIOR_GLIOMA = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("NO",
"YES"), class = "factor"), SEQUENCED = structure(c(1L, 1L,
2L, 2L, 2L, 2L), .Label = c("NO", "YES"), class = "factor"),
SEX = structure(c(2L, 2L, 2L, 2L, 1L, 1L), .Label = c("Female",
"Male"), class = "factor"), TREATMENT_STATUS = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Treated", "Untreated"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
и чем
sc <- spark_connect(master = "local")
clinicalData_tbl <- dplyr::copy_to(sc, clinicalData, overwrite = TRUE)
clinicalData_tbl %>%
mutate(DFS_STATUS=regexp_replace(DFS_STATUS, "^$|^ $", "NA"))
Я пробовал с NA, без "", но не работает.