Как воспроизвести график в ggplot, основанный на нелинейных регрессиях из R-пакета Growthrates? - PullRequest
1 голос
/ 27 января 2020

Я работаю с пакетом Growthrates для получения оценок параметров кривых темпов роста для моих данных. Я сделал регрессии и посмотрел на созданные графики, и я доволен данными, но я хотел бы воспроизвести следующие графики в ggplot2.

Рисунок 1: Мультипликатор регрессии для каждой группы: комбинация лечения

Я бы хотел, чтобы мультиплот из линий регрессии для каждой группы: комбинация лечения, но со всеми регрессии, которые я выполнил на нем ((т. е. logisti c, gompertz, gompertz2, et c ..). Пока что у меня есть:

library(growthrates)

####Using logistic regression to fit the data across mutliple groups
p     <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0,   K = 20)
upper <- c(y0 = 100, mumax = 5,   K = 400)

many_logistics <- all_growthmodels(y_data ~ 
                                     grow_logistic(total_time_days, parms) | sample + treatment,
                                   data = Alldata, 
                                   p = p,
                                   lower = lower, 
                                   upper = upper, 
                                   log = "y")
pp   <- coef(many_logistics)

par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_logistics)

many_logistics_results <- results(many_logistics)
xyplot(mumax ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))

curve_logistics <- predict(many_logistics) #Prediction for given data (data for curve)
est_logistics <- predict(many_logistics, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve


####Using Gompertz regression to fit the data across mutliple groups
p     <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0,   K = 20)
upper <- c(y0 = 100, mumax = 5,   K = 400)

many_gompertz <- all_growthmodels(y_datay_data ~ 
                                    grow_gompertz(total_time_days, parms) | sample + treatment,
                                   data = Alldata, 
                                   p = p,
                                   lower = lower, 
                                   upper = upper)
pp   <- coef(many_gompertz)

par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_gompertz)

many_gompertz_results <- results(many_gompertz)
xyplot(mumax ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))

curve_gompertz <- predict(many_gompertz) #Prediction for given data (data for curve)
est_gompertz <- predict(many_gompertz, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve

#Prepare the data frames

curve_logistics2 <- curve_logistics %>% 
  map_df(as_tibble, .id = "src") %>%
  separate(src, c("sample", "treatment"), ":") %>%
  mutate(regression = "logistic")

curve_gompertz2 <- curve_gompertz %>% 
  map_df(as_tibble, .id = "src") %>%
  separate(src, c("sample", "treatment"), ":") %>%
  mutate(regression = "gompertz")

alldata2<- Alldata %>%
  select("sample", "treatment","total_time_days", "y_data") %>%
  rename(time = "total_time_days") %>%
  rename(y = "y_data") %>%
  mutate(regression = "none") 

comp_reg <- bind_rows(curve_logistics2, curve_gompertz2, alldata2)

#define the function to automatically generate plots#define the function to automatically generate plots

REGRESSION_LINE_PLOT <-function(x) {ggplot(data = x, aes(x=time, y=y, colour = regression, linetype = regression)) + 
    geom_point(size = 2.5, data = subset(x, regression %in% c("none"))) +
    stat_smooth(data = subset(x, regression %in% c("gompertz", "logistic"))) +
    theme_bw() + 
    theme(panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),
          panel.spacing = unit(0,"cm"),
          axis.line=element_line(colour="black"),
          # axis.title.x = element_text(size=14, colour = "black"),
          axis.title.x = element_blank(),
          # axis.title.y = element_text(size=14, colour = "black"),
          axis.title.y = element_blank(),
          # axis.text.y = element_text(size=14, colour = "black"),
          # axis.text.x = element_text(size=14, colour = "black"),
          strip.background = element_blank(),
          strip.text = element_text(size = 12, colour="black", face = "bold"),
          legend.text= element_text(size = 12, colour = "black"),
          legend.title=element_blank(), 
          text = element_text(size=12,  family="Arial")) +
    # plot.margin=unit(c(0.1,0.1,0.1,0.1),"cm")) +
    #scale_colour_manual(values = cbbPalette) + ### here I tell R to use my custom colour palette
    #scale_x_continuous(limits = c(-1,14)) + # set time range from -1 to 70 since we started sampling on day -1
    #scale_y_continuous(limits = c(-1,350), breaks = seq(0, 360, 90)) + # For comparison purposes, i want all my panels to have the same y axis scale
    ylab("") + 
    xlab("")
}

comp_reg_nested<- comp_reg %>%
  group_by(sample, treatment) %>%
  nest() %>%
  mutate(plots=map(.x=data, ~REGRESSION_LINE_PLOT(.x)))

fo_ad_line <- comp_reg_nested[[1,"plots"]]

Однако я не думаю, что регрессионные линии правильно представлены в ggplot22. Есть ли лучший способ сделать это?

1 Ответ

0 голосов
/ 27 января 2020

Я создал пример данных, более или менее схожий с вашей структурой данных, из встроенных данных пакета и немного упростил код, исключив функции печати по умолчанию. Мне очень понравился ваш метод построения фрейма данных с map_df, спасибо. Затем я добавил простой ggplot, который, конечно, можно расширить и адаптировать к вашим потребностям.

library(growthrates)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)

## use subset of built-in example data of the package
## and adapt it to the existing part of the script
data(bactgrowth)
Alldata <- bactgrowth[(bactgrowth$conc < 1) & bactgrowth$replicate == 1, ]
names(Alldata)    <- c("sample", "replicate", "treatment", "total_time_days", "y_data")
Alldata$y_data    <- Alldata$y_data * 1000
Alldata$treatment <- as.character(Alldata$treatment)

####Using logistic regression to fit the data across mutliple groups
p     <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0,   K = 20)
upper <- c(y0 = 100, mumax = 5,   K = 400)

many_logistics <- all_growthmodels(y_data ~
  grow_logistic(total_time_days, parms) | sample + treatment,
  data = Alldata,
  p = p,
  lower = lower,
  upper = upper)

many_logistics_results <- results(many_logistics)
curve_logistics <- predict(many_logistics)

####Using Gompertz regression to fit the data across mutliple groups
many_gompertz <- all_growthmodels(y_data ~
  grow_gompertz(total_time_days, parms) | sample + treatment,
  data = Alldata,
  p = p,
  lower = lower,
  upper = upper)

many_gompertz_results <- results(many_gompertz)
curve_gompertz <- predict(many_gompertz)

#Prepare the data frames
curve_logistics2 <- curve_logistics %>%
  map_df(as_tibble, .id = "src") %>%
  separate(src, c("sample", "treatment"), ":") %>%
  mutate(regression = "logistic")

curve_gompertz2 <- curve_gompertz %>%
  map_df(as_tibble, .id = "src") %>%
  separate(src, c("sample", "treatment"), ":") %>%
  mutate(regression = "gompertz")

alldata2<- Alldata %>%
  rename(time = "total_time_days", y = "y_data")

## combine the two curves to a joint data frame
comp_reg <- bind_rows(curve_logistics2, curve_gompertz2)

## plot it
ggplot(comp_reg, aes(time, y)) +
  geom_point(data = alldata2) +
  geom_line(aes(color = regression)) +
  facet_grid(treatment ~ sample)
...