Р: Как использовать применить в этом случае, чтобы ускорить функцию? - PullRequest
2 голосов
/ 27 июня 2019

Я пытаюсь вычислить расстояние между точками и ближайшим полигоном для каждой точки. В настоящее время я использую функцию st_distance (библиотека sf), которая кажется самым быстрым способом сделать это. Но это все еще занимает много времени. Вот почему я хочу изменить цикл, который я использую, на процедуру применения или таким образом, чтобы сделать это быстрее. Может ли кто-нибудь помочь мне сделать это, пожалуйста? Спасибо!

## Importation of shapefiles
# library(rgdal)
# pathToShp = "J:/shp_files/"
# points = readOGR(dsn = pathToShp, layer="points_2154", stringsAsFactors=FALSE)   #Points in EPSG 2154 Lambert
# polygons = readOGR(dsn = pathToShp, layer="polygons_2154", stringsAsFactors=FALSE)   #Polygons

library(sf)
# points_sf = st_as_sf(points)
# polygons_sf = st_as_sf(polygons)

## Search the closest polygon for each point
point_polygon <- c()
point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature)     # ID of the closest polygon for each point

## Distance between each point and the closest polygon
dist_sf <- c()
for (i in 1:nrow(points_sf)) {
  dist_sf[i] = st_distance(points_sf[i,], 
                           polygons_sf[polygons_sf$ID == point_polygon$ID[i], ], 
                           by_element = TRUE)    
}

Вы должны получить:

dist_sf
# [1] 514830.0 260656.0 260647.7 260653.5 262053.6

Данные

points_sf <- structure(list(field_1 = c("1", "2", "3", "4", "5"), adresse = c("6 RUE DES VIGNES, 40140 SOUSTONS, France", 
"22 RUE DE PARIS, 03000 MOULINS, France", "5 RUE REGNAUDIN, 03000 MOULINS, France", 
"31 RUE DE PARIS, 03000 MOULINS, France", "15 RUE DES RAMIERS, 85360 LA TRANCHE SUR MER, France"
), latitude = c(43.75395, 46.56875, 46.56893, 46.56873, 46.35638
), longitude = c(-1.31277, 3.330296, 3.330394, 3.330224, -1.470842
), geometry = structure(list(structure(c(352768.516216819, 6304476.86420524
), class = c("XY", "POINT", "sfg")), structure(c(725298.307259582, 
6607688.02981763), class = c("XY", "POINT", "sfg")), structure(c(725305.729670888, 
6607708.05130113), class = c("XY", "POINT", "sfg")), structure(c(725292.801896427, 
6607685.78563239), class = c("XY", "POINT", "sfg")), structure(c(356412.817813797, 
6593779.89675049), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 352768.516216819, 
ymin = 6304476.86420524, xmax = 725305.729670888, ymax = 6607708.05130113
), class = "bbox"), crs = structure(list(epsg = NA_integer_, 
    proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(field_1 = NA_integer_, 
adresse = NA_integer_, latitude = NA_integer_, longitude = NA_integer_
), .Label = c("constant", "aggregate", "identity"), class = "factor"), row.names = c(NA, 
5L), class = c("sf", "data.frame"))

polygons_sf <- structure(list(ID = c("M1204300", "E6490620", "E4240850"), geometry = structure(list(
    structure(list(structure(c(533957.599997006, 534047.299997008, 
    534171.89999701, 534191.69999701, 534226.099997011, 534270.599997012, 
    534325.099997013, 534369.799997014, 534449.399997015, 534549.199997017, 
    534674.099997019, 534924.099997023, 535084.299997026, 535174.199997028, 
    535239.099997029, 535293.89999703, 535323.599997031, 6786523.09989417, 
    6786492.39989417, 6786461.39989417, 6786436.19989417, 6786370.99989417, 
    6786305.69989417, 6786255.19989417, 6786219.89989417, 6786184.19989417, 
    6786163.39989417, 6786162.39989417, 6786185.29989417, 6786218.89989417, 
    6786218.19989417, 6786207.69989417, 6786182.19989417, 6786156.99989417
    ), .Dim = c(17L, 2L))), class = c("XY", "MULTILINESTRING", 
    "sfg")), structure(list(structure(c(608743.099998312, 608792.899998313, 
    608827.799998314, 608847.799998314, 608867.999998314, 608918.699998315, 
    608974.499998316, 609015.299998317, 609071.299998318, 609086.499998318, 
    609106.399998319, 609156.19999832, 609181.59999832, 609197.09999832, 
    609202.299998321, 609217.599998321, 609257.999998322, 609273.299998322, 
    609324.099998323, 609354.699998323, 7003205.49989546, 7003185.09989545, 
    7003164.79989545, 7003169.69989546, 7003194.49989546, 7003278.99989546, 
    7003378.49989546, 7003478.09989546, 7003597.59989546, 7003623.39989546, 
    7003618.29989546, 7003592.89989546, 7003642.59989546, 7003702.49989546, 
    7003732.39989546, 7003767.29989546, 7003816.89989546, 7003856.79989546, 
    7003946.29989546, 7004020.99989546), .Dim = c(20L, 2L))), class = c("XY", 
    "MULTILINESTRING", "sfg")), structure(list(structure(c(669193.399999424, 
    669183.499999424, 669153.399999423, 669097.999999422, 669077.999999422, 
    669048.599999421, 7097101.79989609, 7097111.89989609, 7097102.09989609, 
    7097047.59989609, 7097052.79989609, 7097123.99989609), .Dim = c(6L, 
    2L)), structure(c(669048.599999421, 669022.899999421, 668953.19999942, 
    668888.899999418, 668854.499999418, 668809.899999417, 668790.299999417, 
    668740.899999416, 668721.199999415, 668656.799999414, 668637.199999414, 
    668618.099999413, 7097123.99989609, 7097149.19989609, 7097189.79989609, 
    7097265.29989609, 7097340.49989609, 7097385.89989609, 7097430.99989609, 
    7097496.39989609, 7097532.59989609, 7097598.09989609, 7097653.1998961, 
    7097758.39989609), .Dim = c(12L, 2L)), structure(c(668618.099999413, 
    668598.799999413, 668553.799999412, 668519.299999411, 668435.09999941, 
    668335.899999408, 668159.599999405, 7097758.39989609, 7097833.49989609, 
    7097949.7998961, 7098010.0998961, 7098095.7998961, 7098191.4998961, 
    7098459.3998961), .Dim = c(7L, 2L))), class = c("XY", "MULTILINESTRING", 
    "sfg"))), class = c("sfc_MULTILINESTRING", "sfc"), precision = 0, bbox = structure(c(xmin = 533957.599997006, 
ymin = 6786156.99989417, xmax = 669193.399999424, ymax = 7098459.3998961
), class = "bbox"), crs = structure(list(epsg = NA_integer_, 
    proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 0:2, class = c("sf", 
"data.frame"), sf_column = "geometry", agr = structure(c(ID = NA_integer_), class = "factor", .Label = c("constant", 
"aggregate", "identity")))

1 Ответ

2 голосов
/ 27 июня 2019

Это:

apply(st_distance(points_sf, polygons_sf), 1, min)

кажется самым быстрым вариантом. Хотя нативная версия sf не намного медленнее. Ниже приведены фактические сроки

library(microbenchmark)

microbenchmark(
    loop = {
        point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature)
        ## Distance between each point and the closest polygon
        dist_sf <- c()
        for (i in 1:nrow(points_sf)) {
            dist_sf[i] = st_distance(points_sf[i,], 
                                     polygons_sf[polygons_sf$ID == point_polygon$ID[i], ], 
                                     by_element = TRUE)    
        }
    },
    apply = { apply(st_distance(points_sf, polygons_sf), 1, min) },
    native = {
        polys = polygons_sf[st_nearest_feature(points_sf, polygons_sf), ]
        st_length(st_nearest_points(points_sf, polys, pairwise = TRUE))
    },
    dt = {
        dist = as.data.table(st_distance(points_sf, polygons_sf))
        dist[, pmin(V1, V2, V3)]
    },
    times = 10
)


Unit: milliseconds
   expr     min       lq      mean   median       uq     max neval  cld
   loop 29.2660 30.36030 32.092494 30.95950 32.97390 42.5732   100    d
  apply  2.7579  2.90365  3.124069  2.96670  3.20515  5.0635   100 a   
 native  3.9875  4.13340  4.566414  4.24310  4.55095 11.9232   100   c 
     dt  3.4089  3.57920  3.838198  3.66055  3.93795  8.6983   100  b 
...