Давайте создадим некоторые пригодные для использования данные:
data.frame(
address_ID = 1:5,
address = c(
"4 Kiricheneck 9990", "10 Kiricheneck 9990",
"26 Kiricheneck 9990", "27 Kiricheneck 9990",
"6 Avenue D'oberkorn 4640"
),
stringsAsFactors = FALSE
) -> xdf
Теперь давайте создадим подходящую оболочку API для этой конечной точки:
geoportail_geocode <- function(query) {
suppressPackageStartupMessages({ # this makes it self-contained and quiet
library(httr, warn.conflicts = FALSE, quietly = TRUE, verbose = FALSE)
library(jsonlite, warn.conflicts = FALSE, quietly = TRUE, verbose = FALSE)
})
`%||%` <- function(x, y) { if (length(x)) x else y } # this makes the code below less 'if-else'y
httr::GET(
url = "https://apiv3.geoportail.lu/geocode/search",
httr::user_agent("geoportail_geocode R function used by me@example.com"), # you should add your email to this string
query = list(
queryString = query[1]
)
) -> res
httr::stop_for_status(res) # halts on API/network errors; you may not want this but it's standard practice in API packages
out <- httr::content(res, as = "text", encoding = "UTF-8")
out <- jsonlite::fromJSON(out)
if (length(out$success) && out$success) { # if the return looks valid
# MAKES A MAJOR ASSUMPTION A Point IS BEING RETURNED
# YOU SHOULD DO A *TON* MORE VALIDATION AND ERROR CHECKING
ret <- out$results[,c("ratio", "name", "easting", "address", "northing", "matching street", "accuracy")]
ret <- cbind.data.frame(ret, out$results$AddressDetails)
ret$type <- out$results$geomlonlat$type %||% NA_character_
ret$lng <- out$results$geomlonlat$coordinates[[1]][1] %||% NA_real_
ret$lat <- out$results$geomlonlat$coordinates[[1]][2] %||% NA_real_
ret$geom <- out$results$geom$type %||% NA_character_
ret$geom_x <- out$results$geom$coordinates[[1]][1] %||% NA_real_
ret$geom_y <- out$results$geom$coordinates[[1]][2] %||% NA_real_
ret
} else {
warning("Error in geocoding")
data.frame(stringsAsFactors = FALSE)
}
}
Мы сделаем один:
str(geoportail_geocode(xdf$address[1]))
## 'data.frame': 1 obs. of 19 variables:
## $ ratio : num 1
## $ name : chr "4,Kiricheneck 9990 Weiswampach"
## $ easting : num 73344
## $ address : chr "4 Kiricheneck,9990 Weiswampach"
## $ northing : num 133788
## $ matching street : chr "Kiricheneck"
## $ accuracy : int 8
## $ zip : chr "9990"
## $ locality : chr "Weiswampach"
## $ id_caclr_street : chr "8188"
## $ street : chr "Kiricheneck"
## $ postnumber : chr "4"
## $ id_caclr_building: chr "181679"
## $ type : chr "Point"
## $ lng : num 6.08
## $ lat : num 50.1
## $ geom : chr "Point"
## $ geom_x : num 73344
## $ geom_y : num 133788
И используйте tidyverse
, чтобы сделать их все, и избегайте циклов for
, таких как чума, это не Java или неприглядный Python:
str(dplyr::bind_cols(
xdf,
purrr::map_df(xdf$address, geoportail_geocode)
))
## 'data.frame': 5 obs. of 21 variables:
## $ address_ID : int 1 2 3 4 5
## $ address : chr "4 Kiricheneck 9990" "10 Kiricheneck 9990" "26 Kiricheneck 9990" "27 Kiricheneck 9990" ...
## $ ratio : num 1 1 1 1 1
## $ name : chr "4,Kiricheneck 9990 Weiswampach" "10,Kiricheneck 9990 Weiswampach" "26,Kiricheneck 9990 Weiswampach" "27,Kiricheneck 9990 Weiswampach" ...
## $ easting : num 73344 73280 73203 73241 60462
## $ address1 : chr "4 Kiricheneck,9990 Weiswampach" "10 Kiricheneck,9990 Weiswampach" "26 Kiricheneck,9990 Weiswampach" "27 Kiricheneck,9990 Weiswampach" ...
## $ northing : num 133788 133732 133622 133591 65234
## $ matching street : chr "Kiricheneck" "Kiricheneck" "Kiricheneck" "Kiricheneck" ...
## $ accuracy : int 8 8 8 8 8
## $ zip : chr "9990" "9990" "9990" "9990" ...
## $ locality : chr "Weiswampach" "Weiswampach" "Weiswampach" "Weiswampach" ...
## $ id_caclr_street : chr "8188" "8188" "8188" "8188" ...
## $ street : chr "Kiricheneck" "Kiricheneck" "Kiricheneck" "Kiricheneck" ...
## $ postnumber : chr "4" "10" "26" "27" ...
## $ id_caclr_building: chr "181679" "181752" "181672" "181668" ...
## $ type : chr "Point" "Point" "Point" "Point" ...
## $ lng : num 6.08 6.07 6.07 6.07 5.9
## $ lat : num 50.1 50.1 50.1 50.1 49.5
## $ geom : chr "Point" "Point" "Point" "Point" ...
## $ geom_x : num 73344 73280 73203 73241 60462
## $ geom_y : num 133788 133732 133622 133591 65234
Как отмечено в коде функциивызов stop_for_status
убьет функцию, поэтому вы можете вместо этого warn_for_status
проверить код состояния ответа и вернуть пустой data.frame(stringsAsFactors=FALSE)
.