Я хотел бы создать коробчатую диаграмму числовых данных, но исключая случаи, отмеченные как «0» в другом столбце? - PullRequest
0 голосов
/ 06 мая 2020

Я сделал коробчатую диаграмму для одного фактора следующим образом:

ggplot(data = dataframe2, aes(x=factor(0), y = RPSdata$Survival.One.Year)) + geom_boxplot(...)

Фрейм данных просто:

dataframe2 <- data.frame(RPSdata$Survival.One.Year)

Я хотел бы сделать такую ​​же коробочную диаграмму, но только включая случаев, которые имеют код «1» в столбце RPSdata $ Survival.Complete.Sense

Большое спасибо! Новичок в R, поэтому цените любую помощь

Образец данных:

> dput(head(RPSdata, 5))
structure(list(ID.Rank = 1:5, ID.Participant = c("8571762481", 
"7351340719", "7396795819", "3790978753", "6450996320"), Population.Risk = structure(c(1L, 
2L, 3L, 2L, 2L), .Label = c("1", "2", "3", "4", "5", "6"), class = "factor"), 
    Personal.Risk = c(50, 60, 30, 40, 10), Comparative.Risk.Age = structure(c(2L, 
    NA, 3L, 4L, 3L), .Label = c("1", "2", "3", "4", "5"), class = "factor"), 
    Comparative.Risk.Current = structure(c(NA, 3L, 3L, NA, NA
    ), .Label = c("1", "2", "3", "4", "5"), class = "factor"), 
    Comparative.Risk.Ex = structure(c(2L, 3L, NA, NA, 3L), .Label = c("1", 
    "2", "3", "4", "5"), class = "factor"), Score.Exposure = structure(c(1L, 
    1L, 1L, 2L, 1L), .Label = c("1", "2", "4", "5"), class = "factor"), 
    RF.Age = structure(c(1L, NA, 1L, 1L, 2L), .Label = c("0", 
    "1", "2"), class = "factor"), RF.Pollution = structure(c(1L, 
    NA, 3L, 2L, 2L), .Label = c("0", "1", "2"), class = "factor"), 
    RF.Asbestos = structure(c(1L, NA, 1L, 1L, 1L), .Label = c("1", 
    "2"), class = "factor"), RF.Asthma = structure(c(2L, NA, 
    3L, 2L, 1L), .Label = c("0", "1", "2"), class = "factor"), 
    RF.BMI = structure(c(2L, NA, 1L, 2L, 3L), .Label = c("0", 
    "1", "2"), class = "factor"), RF.Gene = structure(c(2L, NA, 
    3L, 3L, 3L), .Label = c("0", "1", "2"), class = "factor"), 
    RF.COPD = structure(c(2L, NA, 2L, 2L, 2L), .Label = c("0", 
    "1", "2"), class = "factor"), RF.History = structure(c(2L, 
    NA, 1L, 1L, 2L), .Label = c("0", "1", "2"), class = "factor"), 
    RF.Diet = structure(c(3L, NA, 1L, 2L, 3L), .Label = c("0", 
    "1", "2"), class = "factor"), RF.Radon = structure(c(2L, 
    NA, 1L, 3L, 3L), .Label = c("0", "1", "2"), class = "factor"), 
    RF.Smoking = structure(c(2L, NA, 2L, 2L, 2L), .Label = c("0", 
    "1", "2"), class = "factor"), RF.Second.Smoke = structure(c(3L, 
    NA, 1L, 3L, 2L), .Label = c("0", "1", "2"), class = "factor"), 
    Survival.One.Year = c(80, 20, NA, NA, 90), Survival.Five.Year = c(60, 
    50, NA, 30, 50), Survival.Ten.Year = c(40, 20, NA, NA, 2), 
    Worry.Frequency = structure(c(1L, 3L, 1L, 1L, 1L), .Label = c("1", 
    "2", "3", "4"), class = "factor"), Worry.Intensity = structure(c(1L, 
    2L, 2L, 2L, 1L), .Label = c("1", "2", "3", "4"), class = "factor"), 
    Mental.Health.One = structure(c(1L, 3L, 2L, 1L, 1L), .Label = c("0", 
    "1", "2", "3"), class = "factor"), Mental.Health.Two = structure(c(1L, 
    2L, 2L, 1L, 1L), .Label = c("0", "1", "2", "3"), class = "factor"), 
    Mental.Health.Three = structure(c(1L, 1L, 1L, 1L, 1L), .Label = c("0", 
    "1", "2", "3"), class = "factor"), Mental.Health.Four = structure(c(2L, 
    2L, 1L, 1L, 1L), .Label = c("0", "1", "2", "3"), class = "factor"), 
    PHQ.4 = structure(c(2L, 5L, 3L, 1L, 1L), .Label = c("0", 
    "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", 
    "12"), class = "factor"), PHQ4.Anx = structure(c(1L, 4L, 
    3L, 1L, 1L), .Label = c("0", "1", "2", "3", "4", "5", "6"
    ), class = "factor"), PHQ4.Dep = structure(c(2L, 2L, 1L, 
    1L, 1L), .Label = c("0", "1", "2", "3", "4", "5", "6"), class = "factor"), 
    PHQ4.Bin = structure(c(1L, 2L, 1L, 1L, 1L), .Label = c("0", 
    "1", "2", "3"), class = "factor"), Dep.Bin = structure(c(1L, 
    1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"), 
    Anx.Bin = structure(c(1L, 2L, 1L, 1L, 1L), .Label = c("0", 
    "1"), class = "factor"), Survival.Compelete.Sense = structure(c(2L, 
    1L, 1L, 1L, 2L), .Label = c("0", "1"), class = "factor"), 
    Survival.Semi.Sense = c(1L, 0L, 0L, 1L, 1L)), row.names = c(NA, 
5L), class = "data.frame")
> 

1 Ответ

1 голос
/ 06 мая 2020

Учитывая описание проблемы, второй data.frame не нужен, только RPSdata - это все, что нужно. Проблема решается с помощью подмножества условного столбца, который должен быть равен 1.

library(ggplot2)

ggplot(data = subset(RPSdata, Survival.Complete.Sense == 1),
       mapping = aes(x = Survival.Complete.Sense, y = Survival.One.Year)) +
  geom_boxplot()

Другой вариант с пакетом dplyr - сначала filter и передайте результат в ggplot. Я также привязываю столбец оси x к множителю.

library(dplyr)
library(ggplot2)

RPSdata %>%
  filter(Survival.Complete.Sense == 1) %>%
  mutate(Survival.Complete.Sense = factor(Survival.Complete.Sense)) %>%
  ggplot(aes(Survival.Complete.Sense, Survival.One.Year)) +
  geom_boxplot()
...