Код ниже работает, если я удаляю Sys.sleep () из функции map (). Я пытался исследовать ошибку («Не знаю, как извлечь из замыкания»), но я не нашел много на эту тему.
Кто-нибудь знает, где я могу найти документацию по этой ошибке и какую-либо справку о том, почему она происходит и как ее предотвратить?
library(rvest)
library(tidyverse)
library(stringr)
# lets assume 3 pages only to do it quickly
page <- (0:18)
# no need to create a list. Just a vector
urls = paste0("https://www.mlssoccer.com/players?page=", page)
# define this function that collects the player's name from a url
get_the_names = function( url){
url %>%
read_html() %>%
html_nodes("a.name_link") %>%
html_text()
}
# map the urls to the function that gets the names
players = map(urls, get_the_names) %>%
# turn into a single character vector
unlist() %>%
# make lower case
tolower() %>%
# replace the `space` to underscore
str_replace_all(" ", "-")
# Now create a vector of player urls
player_urls = paste0("https://www.mlssoccer.com/players/", players )
# define a function that reads the 3rd table of the url
get_the_summary_stats <- function(url){
url %>%
read_html() %>%
html_nodes("table") %>%
html_table() %>% .[[3]]
}
# lets read 3 players only to speed things up [otherwise it takes a significant amount of time to run...]
a_few_players <- player_urls[1:5]
# get the stats
tables = a_few_players %>%
# important step so I can name the rows I get in the table
set_names() %>%
#map the player urls to the function that reads the 3rd table
# note the `safely` wrap around the get_the_summary_stats' function
# since there are players with no stats and causes an error (eg.brenden-aaronson )
# the output will be a list of lists [result and error]
map(., ~{ Sys.sleep(5)
safely(get_the_summary_stats) }) %>%
# collect only the `result` output (the table) INTO A DATA FRAME
# There is also an `error` output
# also, name each row with the players name
map_df("result", .id = "player") %>%
#keep only the player name (remove the www.mls.... part)
mutate(player = str_replace(player, "https://www.mlssoccer.com/players/", "")) %>%
as_tibble()
tables <- tables %>% separate(Match,c("awayTeam","homeTeam"), extra= "drop", fill = "right")