Пчелиный теплый участок с кругами разного размера в R - PullRequest
0 голосов
/ 13 июня 2018

Я хочу сделать график пчелиного тепла в R, но размер точек будет связан с третьей переменной.Что-то вроде этого beerwarn plot from the NYT

Но просто, в стиле R, без описаний и тому подобное.Я уже посмотрел на пакет beeswarm, но, похоже, в нем нет этого варианта.

Это то, что я пробовал, но кружки застряли

library(beeswarm)

myData <- list(x = rnorm(n = 10, sd = 1, mean = 10), 
               y = rnorm(n = 10, sd = 3, mean = 7))
df <- as.data.frame(myData)
df$size <- 1:10

bee_info <- beeswarm(myData, col = c("red", "blue"))

plot(bee_info$x, bee_info$y, type = "n", axes = FALSE, xlab = "", ylab = "")
symbols(x = bee_info$x, y = bee_info$y, 
        circles = rep(df$size, 2), 
        bg = adjustcolor(bee_info$col, 0.5),
        xpd = TRUE, add = TRUE, 
        inches = 0.5)
axis(1, at = seq_along(myData), labels = names(myData), lwd = 0)
axis(2, las = 2)

first try

Играя с spacing в beeswarm и inches в symbol, я могу сделать сюжет ближе к тому, что хочу.

library(beeswarm)

myData <- list(x = rnorm(n = 10, sd = 1, mean = 10), 
               y = rnorm(n = 10, sd = 3, mean = 7))
df <- as.data.frame(myData)
df$size <- 1:10

bee_info <- beeswarm(myData, col = c("red", "blue"), spacing = 5)

plot(bee_info$x, bee_info$y, type = "n", axes = FALSE, xlab = "", ylab = "")
symbols(x = bee_info$x, y = bee_info$y, 
        circles = rep(df$size, 2), 
        bg = adjustcolor(bee_info$col, 0.5),
        xpd = TRUE, add = TRUE, 
        inches = 0.2)
axis(1, at = seq_along(myData), labels = names(myData), lwd = 0)
axis(2, las = 2)

enter image description here

Но я бы хотел, чтобы круги были ближе и автоматизировали это, поскольку мне нужно будет генерировать сотни таких графиков.

Обновление:

Используя функции, которые я создал ниже, я получил то, что хотел.Но он использует грубую силу для оценки положения точек, что является очень неэффективным решением.

library(dplyr)
library(magrittr)
library(purrr)
library(sp)
library(rgeos)
library(raster)
library(truncnorm)
library(scales)
library(RColorBrewer)
library(magick)

# from https://stat.ethz.ch/pipermail/r-sig-geo/2012-November/016632.html
get_circle_coord <- function(x, y, r = 1, lo = 100){
  pts <- seq(0, 2 * pi, length.out = lo)
  coords <- cbind(x + r * sin(pts), y + r * cos(pts))
  return(coords)
}

make_poly_circles <- function(x, y, r = 1, lo = 100, id = "x"){
  circle <- get_circle_coord(x, y, r, lo)  %>% 
    as.data.frame %>%
    magrittr::set_colnames(c("x", "y")) %>% 
    as.matrix %>% 
    Polygon %>% 
    list(.) %>% 
    Polygons(ID = id) %>% 
    list(.) %>% 
    SpatialPolygons
  return(circle)
}

make_poly_rect <- function(xMin, xMAx, yMin, yMax, id = "x"){
  rect <- data.frame(x = c(xMAx, xMin, xMin, xMAx, xMAx), 
                     y = c(yMin, yMin, yMax, yMax, yMin)) %>% 
    as.matrix %>% 
    Polygon %>% 
    list(.) %>% 
    Polygons(ID = id) %>% 
    list(.) %>% 
    SpatialPolygons
  return(rect)
}

get_area_in_intersection <- function(shp1, shp2){
  int <- rgeos::gIntersection(shp1, shp2)
  int_area <- ifelse(is.null(int), 0, rgeos::gArea(int))
  return(int_area)
}

get_side_move <- function(circle, other_circles, x){
  y_circle <- sp::bbox(circle)["y",]
  bb_others <- sp::bbox(other_circles)

  maxX <- bb_others["x", "max"]
  minX <- bb_others["x", "min"]
  maxY <- y_circle["min"]
  minY <- y_circle["max"]

  square_left <- make_poly_rect(xMin = minX, xMAx = x, 
                                yMin = minY, yMax = maxY)
  square_right <- make_poly_rect(xMin = x, xMAx = maxX, 
                                 yMin = minY, yMax = maxY)
  left_area <- get_area_in_intersection(other_circles, square_left)
  right_area <- get_area_in_intersection(other_circles, square_right)
  areas <- c("left" = left_area, "right" = right_area)

  same_area <- all.equal(areas[[1]], areas[[2]]) == "TRUE"
  move_side <- ifelse(same_area,
                      yes = sample(names(areas), 1), 
                      no = names(which.min(areas)))
  return(move_side)
}

# y: y position of the circles
# x: x inicial position of the circles
# size: size of the circles
# col: collor of the circles (one character or a vector of the size of y)
# rescFactor: factor to rescale size. 
# lo: who many points will the circles coords will have
# step_x: number by with the the circle will move in the x axis each loop
# progress_bar: if show a progress bar
make_linear_circle_packing <- function(y,
                                       x,
                                       size,
                                       range_y = range(y), 
                                       col = "white",
                                       rescFactor = 0.025,
                                       lo = 100,
                                       step_x = quantile(y * rescFactor, 
                                                         0.1),
                                       progress_bar = TRUE){

  # for the loop
  n <- length(y)
  indexes <- seq_len(n)

  # Will store the shape files of the circles can't use `vector`
  # function because `do.call(rbind, circle_packing) will return error 
  # with NULL values
  circle_packing <- list()

  # mix the order, so we have a different plot each time
  s <- sample(indexes)
  y <- y[s]
  size <- size[s]
  size <- scales::rescale(x = size,
                          to = c(0, diff(range_y) * rescFactor))

  if(progress_bar){
    pb <- txtProgressBar(min = 1, max = n, style = 3)
  }

  for(i in indexes){

    # Make the circle
    circle <- make_poly_circles(x = x, y = y[i], 
                                r = size[i], 
                                lo = lo, id = i)

    if(progress_bar){
      setTxtProgressBar(pb, i)
    }

    # If is the first circle, as will 
    # not have anything to  overlap with
    # we just store it
    if(i == 1) {
      circle_packing[[i]] <- circle
      next
    }

    # circles that have already been processed
    other_circles <- do.call(rbind, circle_packing)

    # This is te new x position of the circle 
    # (it can decrease or increase in the loop depenging 
    # of the density in each area)
    new_x <- x
    first_repetition <- TRUE

    repeat{
      check_intersection <- rgeos::gIntersects(other_circles, circle)
      if(!check_intersection) break
      if(first_repetition){
        move_side <- get_side_move(circle = circle, 
                                   other_circles = other_circles, 
                                   x = x)
        first_repetition <- FALSE
      }
      move <- ifelse(test = move_side == "left", yes = -step_x , no = step_x)
      new_x <- new_x + move 
      circle <- make_poly_circles(new_x, y[i], size[i], lo = lo, id = i)
    } 
    circle_packing[[i]] <- circle
  }

  # Make Spatial Polygon Data Frame in the order the user supply
  circle_packing <- do.call(rbind, circle_packing)
  df <- data.frame(id = s, col = col[s],size = size,
                   y = y, x = x,
                   stringsAsFactors = FALSE)
  circle_packing <- sp::SpatialPolygonsDataFrame(circle_packing, df)
  return(circle_packing)
}


multiples_linear_circle_packing <- function(df, 
                                     space_btw = NULL, 
                                     step_x = quantile(df$y, 0.001)){

  space_btw = NULL

  per_cl <- split(df, df$class)
  classes <- names(per_cl)
  nClass <- length(classes) 

  if(is.null(space_btw)){
    space_btw <- nClass/2.5^nClass
  }

  circle_packings <- vector("list", length = nClass)

  for(i in seq_len(nClass)){
    class <- classes[[i]]
    cat("Class", class, "|", i, "of", nClass, "\n")
    circle_packings[[i]] <- make_linear_circle_packing(y = per_cl[[i]]$y, 
                                                       x = 0, 
                                                       size = per_cl[[i]]$size, 
                                                       col = per_cl[[i]]$col, 
                                                       range_y = range(df$y))

    circle_packings[[i]]$class <- class
    cat("\n")
  }

  bbs <- lapply(circle_packings, bbox)
  x_sizes <- sapply(bbs, function(x) diff(x["x",]))
  x_total <- sum(x_sizes)
  dist_btw_class <- x_total * space_btw
  circle_packings[[1]]$x <- 1
  x <- x_sizes[1] + dist_btw_class

  for(i in seq_len(nClass)[-1]){

    circle_packings[[i]]$x <- x
    circle_packings[[i]] <- raster::shift(circle_packings[[i]], x = x)
    x <- x + x_sizes[i] + dist_btw_class
  }

  circle_packings <- do.call(rbind, circle_packings)
  return(circle_packings)
}


plot_circle_packings <- function(df,
                                 space_btw = NULL, 
                                 step_x = quantile(df$y, 0.001)){

  circle_packings <- multiples_linear_circle_packing(df = df, 
                                                     space_btw = space_btw, 
                                                     step_x = space_btw)


  plot(circle_packings, col = circle_packings$col)

  # to calculate the axis and ticks position
  bb <- bbox(circle_packings)
  ylims <- bb["y",]
  xlims <- bb["x",]

  # position axis
  x_size <- diff(xlims)
  y_pos <- xlims[1] - x_size/50

  y_size <- diff(ylims)
  x_pos <- ylims[1] -  y_size/5

  # Tick marks position
  y_at <- pretty(x =  ylims[1]:ylims[2], n = 5)

  axis(side = 2, at = y_at, las = 2)
  axis(side = 1, at = unique(circle_packings$x), 
       labels = unique(circle_packings$class), pos = x_pos, 
       lwd = 0)

  circle_packings
}

Вот пример использования этой функции:

n <- 100
palletes <- c(x = "Blues", y = "Reds")

ys <- list(x = round(rnorm(n = n, sd = 5, mean = 100)), 
           y = round(rnorm(n = n, sd = 7, mean = 50)),
           z = round(rnorm(n = n, sd = 3, mean = 75)), 
           a = round(rnorm(n = n, sd = 10, mean = 75)), 
           b = round(rnorm(n = n, sd = 15, mean = 55)), 
           c = round(rnorm(n = n, sd = 10, mean = 35)))

sizes <- list(x = rtruncnorm(n = n, sd = 10, mean = 50, a = 0), 
              y = rtruncnorm(n = n, sd = 5, mean = 100, a = 0), 
              z = rtruncnorm(n = n, sd = 25, mean = 75, a = 0), 
              a = rtruncnorm(n = n, sd = 1, mean = 10, a = 0),
              b = rtruncnorm(n = n, sd = 11, mean = 80, a = 0), 
              c = rtruncnorm(n = n, sd = 10, mean = 30, a = 0))

df <- data.frame(y = unlist(ys), size = unlist(sizes), 
                 class = rep(names(ys), each = n), 
                 col = rep(brewer.pal(length(sizes), "Dark2"), each = n),
                 stringsAsFactors = FALSE)

path <- "~/Desktop/teste.png"
png(path, width = 2000, height = 2000)
plot_circle_packings(df)
dev.off()

# This is because I could not remove the 
# margin ploting a SpatialPolygon
image_read(path) %>% image_trim %>% image_write(path)

Вотрезультат:

enter image description here

...