Пример кода настолько мал, что некоторые подходы могут быть более быстрыми, только если вы просматриваете большие данные. Мне кажется, что вы могли бы сэкономить немного времени, используя подход с разделением каналов, который, в принципе, можно выполнять параллельно, например, используя mclapply
вместо lapply
(в linux / macos). Вы, вероятно, также немного быстрее, если вы используете rbindlist
вместо rbind
и транспонируете. На моей машине такой подход почти вдвое сокращает время выполнения на этом крошечном наборе. Использование mclapply(co, gmb2, mc.cores=4L)
или аналогичного может быть полезным, если отдельные группы L2 больше, но только ухудшают производительность с этим небольшим набором данных. Если не считать изменения кода функции getMinBBox
, это, по крайней мере, несколько сократит время и может стоить попробовать.
В качестве примечания: использование openblas или MKL от Intel вместо стандартных библиотек R может В матричных вычислениях есть большая разница, поэтому вы обязательно должны их использовать.
# function code
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
## rotating calipers algorithm using the convex hull
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
## unit basis vectors for all subspaces spanned by the hull edges
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
## unit basis vectors for the orthogonal subspaces
## rotation by 90 deg -> y' = x, x' = -y
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
## project hull vertices on the subspaces spanned by the hull edges, and on
## the subspaces spanned by their orthogonal complements - in subspace coords
projMat <- rbind(huDir, ouDir) %*% t(hull)
## range of projections and corresponding width/height of bounding rectangle
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
## the orthogonal subspace is in the 2nd half of the matrix
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
## extreme projections for min-area rect in subspace coordinates
## hull edge leading to minimum-area
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
## move projections to rectangle corners
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
## corners in standard coordinates, rows = x,y, columns = corners
## in combined (4x2)-matrix: reverse point order to be usable in polygon()
## basis formed by hull edge and orthogonal subspace
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
## angle of longer edge pointing up
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
#> Reading layer `nc' from data source `/home/b/R/x86_64-pc-linux-gnu-library/4.0/sf/shape/nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID): 4267
#> proj4string: +proj=longlat +datum=NAD27 +no_defs
height = NA_real_,
minimum.shutter.speed = NA_character_,
photo.interval = NA_real_
)
)
# common code:
lapply(c("sf", "data.table", "microbenchmark"),
require, character.only = TRUE)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
microbenchmark(b1 = getbox1(), b2 = getbox2())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.804903 8.391429 20.539996 9.870947 38.71889 66.95236 100 b
#> b2 3.956369 4.186426 9.919638 4.562312 19.13847 47.27808 100 a
Окончательное редактирование: вы можете повысить производительность функции getMinBBox, немного упростив ее.
# original function code (removed some comments for brevity)
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- rbind(huDir, ouDir) %*% t(hull)
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# common code:
lapply(c("sf", "data.table", "microbenchmark", "compiler"),
require, character.only = TRUE)
compiler::enableJIT(3)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
getMinBBox2 <- function(xy) {
# stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
# skip checking
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- matrixStats::colDiffs(rbind(hull, hull[1, ]))
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- tcrossprod(rbind(huDir, ouDir), hull)
HO <- list(seq_len(n), (seq_len(n)+n))
rr <- matrixStats::rowRanges(projMat)
rd <- as.numeric(matrixStats::rowDiffs(rr))
rangeH <- rr[HO[[1]],]
rangeO <- rr[HO[[2]],]
widths <- rd[HO[[1]]]
heights <- rd[HO[[2]]]
eMin <- which.min(widths*heights)
## seems like sweeping is not required here?
# hProj <- rbind( rangeH[eMin, ], 0)
# oProj <- rbind(0, rangeO[eMin, ])
# hPts <- sweep(hProj, 1, oProj[ , 1], "+")
# oPts <- sweep(hProj, 1, oProj[ , 2], "+")
hPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 1], 2))
oPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 2], 2))
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
colnames(pts) <- c("X", "Y")
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
gmb3 <- function(i) getMinBBox2(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
getbox3 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb3))
}
all(identical(getbox1(), getbox2()), identical(getbox2(), getbox3()))
#> [1] TRUE
microbenchmark(b1 = getbox1(), b2 = getbox2(), b3=getbox3())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.852630 8.067991 8.862175 8.269768 8.632328 14.474924 100 c
#> b2 4.020513 4.071012 4.444000 4.166884 4.274048 7.728204 100 b
#> b3 2.871639 2.938294 3.111886 2.986174 3.046096 5.935133 100 a