Как построить график корреляции для выбранных столбцов - PullRequest
0 голосов
/ 17 апреля 2020

Я хочу построить корреляционный график для ДНК против РНК моих образцов из моей таблицы данных. Как я могу выбрать диапазон столбцов (ДНК) для оси X и остальные для оси Y (РНК) и сделать график.

Пример данных

structure(list(RNA_T_0d = c(123.3540196, 2555.009918, 372.8928872, 
197.7139756, 197.7558796, 1819.467639, 268.3754862, 536.6923745, 
976.0457375, 138.3840469, 223.7599509, 2382.639138, 488.8051605, 
685.0873021, 1256.741858, 362.0016761, 594.8892136, 132.6928877, 
1795.045952, 1312.656731, 220.9514818, 199.7937582, 121.8956089, 
296.962662, 111.5437485, 91.49534944, 303.4654742, 2878.341776, 
724.1387546, 3536.103853), RNA_T_1d = c(117.5074379, 3943.561728, 
887.5236056, 221.1421939, 359.3336419, 3564.17523, 848.1434556, 
1083.683449, 1487.090929, 542.0256969, 1150.100385, 5479.159632, 
1592.822826, 1121.73857, 3638.131461, 449.145443, 1137.435616, 
281.2855817, 2861.01719, 1421.758321, 396.6180692, 334.9768095, 
289.1637032, 2140.133988, 323.967185, 326.0042706, 1608.011919, 
3899.463032, 2029.823549, 3567.352097), RNA_T_3d = c(78.21990313, 
2128.191955, 457.5990029, 142.1411258, 144.260197, 1370.550602, 
553.8705814, 582.8125092, 952.2426289, 319.5651801, 979.6899674, 
2589.87576, 1307.944492, 710.0937228, 1347.522779, 296.7750163, 
633.6116157, 223.0734314, 2083.399127, 818.454619, 327.9847552, 
147.8355813, 179.6407278, 345.3795587, 222.8163538, 132.413236, 
518.1713516, 2581.849746, 788.8571771, 4439.522485), DNA_T_0d = c(18.5197361, 
22.48748109, 17.30833213, 17.39705118, 19.65769504, 19.23334671, 
18.47105779, 17.12006055, 22.16447603, 17.67585286, 18.36241911, 
18.06198826, 17.1475032, 19.8023706, 18.03942203, 17.19291065, 
18.60999758, 16.60287792, 19.60039002, 18.88762536, 17.14796753, 
17.73058287, 17.63243595, 18.96199242, 19.82596306, 20.56516148, 
19.24498589, 20.39999086, 17.37720695, 16.73154943), DNA_T_1d = c(18.41103526, 
22.39731654, 17.51583348, 16.39611206, 20.0420798, 19.02785828, 
18.69372361, 16.98508666, 22.00918126, 17.80095289, 17.47914196, 
17.6440272, 16.50984611, 19.53184167, 18.03181892, 17.09650671, 
18.45576309, 16.37290268, 20.24872315, 18.95208285, 17.64948995, 
17.49009935, 17.62233167, 19.44940724, 19.745438, 20.26202155, 
19.47730469, 20.54058357, 17.51398145, 17.0738139), DNA_T_3d = c(18.33936118, 
21.9538277, 18.03783281, 17.37835619, 19.99321507, 19.33580894, 
18.66694717, 17.10261065, 22.21049676, 18.06614098, 17.15583499, 
17.85371195, 16.64347165, 19.63602607, 18.17779781, 17.20633149, 
18.57368257, 16.36700381, 20.86229019, 18.78072815, 17.96488883, 
17.67358244, 17.52922917, 19.86236758, 19.55200672, 20.53369, 
19.58410373, 20.64849751, 17.5768534, 17.38971165), RNA_GG_0d = c(763.902803, 
71.28114855, 282.5478626, 1273.997394, 111.4687471, 1971.259626, 
1123.225868, 665.7043636, 94.70746812, 183.0318776, 116.8872057, 
266.6888394, 1923.014278, 781.0785497, 730.180374, 384.7962839, 
6466.158344, 383.8703667, 237.3403603, 233.0806013, 125.7504427, 
143.3956123, 172.4963304, 4611.24168, 281.5220002, 278.3019207, 
3.189744425, 41.27814364, 159.9384171, 1118.431292), RNA_GG_1d = c(3206.954511, 
110.4901081, 957.0795116, 5677.86599, 38.43426105, 14319.77858, 
3662.382147, 800.8511256, 291.9302287, 118.8570351, 259.4204537, 
437.3050957, 2893.915506, 1688.519778, 1054.418644, 837.2192159, 
2290.232314, 657.7923798, 83.31465527, 665.1735093, 201.7525953, 
625.7547112, 748.3700911, 7275.687713, 178.5244712, 1.2e-05, 
10.92176424, 21.88103184, 402.1959977, 3648.145259), RNA_GG_3d = c(1365.048736, 
63.81152564, 602.6847251, 4148.107571, 13.21089439, 11692.20939, 
2056.48316, 398.9283092, 142.8421016, 40.59709886, 138.5344311, 
281.8745032, 1409.150293, 839.198448, 707.151687, 360.8733746, 
426.0317257, 403.814109, 33.94733367, 290.8023806, 106.339672, 
298.57277, 393.9230186, 3609.15776, 47.11419139, 6.355568708, 
3.898950035, 6.12439752, 229.5002768, 2963.852575), DNA_GG_0d = c(17.74264391, 
18.78622688, 17.31567316, 18.5300959, 17.53626049, 23.4083551, 
23.4083551, 18.36736594, 17.3961252, 20.59070989, 16.99664427, 
17.91561686, 17.86876493, 18.06518818, 20.02540506, 18.80040131, 
23.56615244, 17.50699849, 20.50728306, 17.88399108, 18.47140515, 
17.15416607, 16.63485535, 17.23915126, 18.8487439, 18.8487439, 
17.37631322, 18.91172944, 19.28626643, 17.38361384), DNA_GG_1d = c(17.38101803, 
18.74424077, 17.45345672, 18.54762957, 17.64259287, 23.83917576, 
23.83917576, 18.30361872, 17.40391715, 20.32669439, 16.75204675, 
18.69352166, 17.76204756, 17.80620654, 19.88638342, 19.03176766, 
23.19947185, 17.69576627, 20.55351553, 18.05121622, 18.47807982, 
17.14561921, 16.86968238, 17.67597089, 18.80336653, 18.80336653, 
17.48383262, 19.09133577, 19.37869807, 17.53038535), DNA_GG_3d = c(17.63636096, 
18.90456833, 17.65100405, 19.41912124, 17.87797922, 24.26044627, 
24.26044627, 18.21567535, 17.91138779, 18.56787336, 16.85100845, 
17.92233089, 17.76060124, 17.88418887, 19.8191119, 18.51562285, 
20.52678918, 17.64198173, 19.46948411, 18.02202304, 18.44294852, 
17.64296595, 16.82472985, 17.54445657, 18.90968462, 18.90968462, 
17.2907738, 18.50082286, 19.14152028, 17.23431189), RNA_Y_0d = c(143.8018766, 
458.7124982, 924.7944867, 742.5636139, 1839.382769, 172.5607016, 
904.6285007, 153.6970875, 32.10028817, 193.0183416, 2401.787717, 
627.4480898, 856.4517849, 63.74806861, 446.2231105, 248.3540763, 
607.3457109, 175.2556586, 301.208638, 907.8543529, 625.5716115, 
275.1555674, 143.4196077, 373.8474474, 3422.61105, 53.11832773, 
1014.107372, 1115.293607, 132.2232729, 41932.41901), RNA_Y_1d = c(207.2915943, 
863.5978194, 2427.749626, 131.0831647, 2709.355621, 534.5089069, 
521.2040501, 541.8789647, 18.02034673, 314.486768, 2657.507902, 
907.5766384, 1117.800585, 232.6733285, 866.1264018, 135.357359, 
311.7850956, 276.5000752, 1369.453723, 1685.9948, 1309.0819, 
669.153273, 411.1273995, 521.0307303, 7562.217031, 111.7514039, 
1392.407576, 5634.422902, 365.9884541, 173539.6463), RNA_Y_3d = c(101.61647, 
501.9694617, 1189.309416, 152.601451, 10105.15118, 336.2775719, 
168.4625971, 257.8603935, 10.74989468, 171.5812541, 1646.485419, 
388.109237, 904.14826, 125.731772, 405.3612055, 109.6546839, 
126.0603418, 174.6835395, 1018.461237, 992.7367435, 772.2618988, 
395.7452458, 193.8411187, 294.3837539, 3975.450177, 68.76204778, 
576.0749873, 6004.46658, 171.2346175, 214825.6364), DNA_Y_0d = c(16.3104489, 
18.14611979, 18.45159295, 18.23340003, 16.85004092, 19.06636475, 
19.38528831, 16.03115936, 17.91530679, 17.44738648, 21.1976527, 
17.85971013, 18.45103602, 17.0352191, 17.78996497, 17.93768166, 
20.16611662, 18.71704883, 18.54834843, 18.12726791, 18.20726348, 
19.98724892, 18.71282573, 16.99263842, 18.43049539, 17.25184042, 
17.18022024, 19.66546515, 16.38818276, 18.35685639), DNA_Y_1d = c(16.44855687, 
18.1387936, 18.69329598, 17.97859029, 16.59207225, 19.09133577, 
19.38806547, 15.91930376, 19.09133577, 17.40370608, 21.26677284, 
18.08116184, 18.70960232, 17.00312701, 18.36781108, 18.27773097, 
20.43592485, 18.56269678, 17.30661982, 17.94492081, 18.3400548, 
20.32336004, 18.88182702, 16.9030293, 18.84613963, 16.87193179, 
17.5418742, 20.17021863, 17.58005993, 18.46428149), DNA_Y_3d = c(16.4366845, 
18.16445377, 18.59010017, 18.10539093, 17.12735857, 19.11913328, 
18.88262651, 16.23147663, 19.11913328, 17.71477424, 21.13511584, 
17.81204488, 18.37882001, 17.20483597, 17.98293255, 17.80531718, 
20.53201543, 18.58028662, 17.62013935, 17.83281483, 18.08485124, 
20.13764126, 18.98661182, 16.98740873, 18.7330765, 16.92317076, 
17.92168561, 20.87855547, 18.43531021, 18.9803887)), class = "data.frame", row.names = c(NA, 
-30L))

Это мой текущий рабочий сценарий, но я не знаю, как выбрать только указанные c столбцы для каждой стороны.

сценарий

>corrplot(m, method = "square", type = "upper",tl.col = "black", tl.cex= 1.4,cl.cex= 1.4, col =colorRampPalette(c("blue","red"))(5), order = "hclust", hclust.method = "ward.D2")
add some statistical significance and plot:
p.mat <- cor.mtest(df)
corrplot(
    m,
    title = "Dummy name here",
    method = "circle",
    type = "upper",
    tl.col = "black",
    col = colorRampPalette(c("blue", "red"))(5),
    order = "hclust",
    hclust.method = "ward.D2",
    tl.cex = 1.2,
    cl.cex=1.2,
    outline = T,
    mar=c(0,0,4,5),
    sig.level = 0.05,
    p.mat = p.mat$p
)

Например, я хочу построить график такого рода , Это те же образцы, но разные измеренные значения РНК и ДНК;

Буду признателен, если вы поможете мне с этим.

Ответы [ 2 ]

2 голосов
/ 17 апреля 2020

Если ваш набор данных имеет одинаковое количество переменных РНК и переменных ДНК, это будет работать.

#Sorting the column names
sortdata<- data[sort(colnames(data))]

#creating the correlation matrix by splitting the dataset 
m<- cor(sortdata[-(1:(NCOL(sortdata)/2))],
        sortdata[1:(NCOL(sortdata)/2)])
p.mat <- cor.mtest(m)

#Plotting the correlation matrix
corrplot(m,
         title = "Dummy name here",
         method = "circle",
         type = "full",
         tl.col = "black",
         col = colorRampPalette(c("blue", "red"))(5),
         order = "hclust",
         hclust.method = "ward.D2",
         tl.cex = 1.2,
         cl.cex=1.2,
         outline = T,
         mar=c(0,0,4,5),
         sig.level = 0.05,
         p.mat = p.mat$p)

enter image description here

2 голосов
/ 17 апреля 2020

Я думаю, что это должно сделать это:

library(corrplot)

# create a new df and order columns alphabetically
df_new <- df[, order(colnames(df))]

# correlation
p.mat <- cor(df_new)

# remove unwanted rows and columns from the correlation matrix
p_mat <- p.mat[10:18, 1:9]

# run plot
corrplot(
  p_mat,
  title = "Dummy name here",
  method = "circle",
  type = "full",
  tl.col = "black",
  order = "hclust",
  hclust.method = "ward.D2",
  tl.cex = 1.2,
  cl.cex=1.2,
  outline = T,
  mar=c(0,0,4,5),
  sig.level = 0.05,
)

Результат:

enter image description here

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...