Не понимаю "Сообщение RSelenium: javascript ошибка" - PullRequest
1 голос
/ 04 мая 2020

Я запустил скрипт для веб-скрапинга в RStudio и получил следующую ошибку:

Selenium message:javascript error: this.each is not a function
  (Session info: chrome=81.0.4044.129)
Build info: version: '4.0.0-alpha-2', revision: 'f148142cf8', time: '2019-07-01T21:30:10'
System info: host: 'xxxxxx', ip: 'xxx.xxx.x.xxx', os.name: 'Windows 10', os.arch: 'amd64', os.version: '10.0', java.version: '1.8.0_231'
Driver info: driver.version: unknown

Error:   Summary: JavaScriptError
     Detail: An error occurred while executing user supplied JavaScript.
     class: org.openqa.selenium.JavascriptException
     Further Details: run errorDetails method

Я не совсем понимаю, в чем проблема и как я могу ее решить. кто-нибудь знает, как решить эту проблему? Я все еще новичок в этом, поэтому конкретные шаги были бы очень практичными для меня. Заранее спасибо!

Редактировать: это скрипт, который я использую. Кажется, ошибка возникает перед «#end of main l oop»

library(data.table)   # Required for rbindlist
library(dplyr)        # Required to use the pipes %>% and some table manipulation commands
library(magrittr)     # Required to use the pipes %>%
library(rvest)        # Required for read_html
library(RSelenium)    # Required for webscraping with javascript
library(lubridate)    # Required to collect dates
library(stringr)
library(purrr)


options(stringsAsFactors = F) #needed to prevent errors when merging data frames

#Paste the GoodReads Url
url <- "https://www.goodreads.com/book/show/1885.Pride_and_Prejudice?ac=1&from_search=true&qid=VkA2NbcGBa&rank=1"

languageOnly = F #If FALSE, "all languages" is chosen

#Set your browser settings
rD <- rsDriver(port = 4585L, browser = "chrome", chromever = "81.0.4044.69")
remDr <- rD[["client"]]
remDr$setTimeout(type = "implicit", 2000)
remDr$navigate(url)

bookTitle = unlist(remDr$getTitle())
finalData = data.frame()

# Main loop going through the website pages
morePages = T
pageNumber =  1
while(morePages){

  #Select reviews in correct language. 
      #It should also work if you only fill in the numeral language code, and leave the first one empty.

  selectLanguage = if(languageOnly){
    selectLanguage = remDr$findElement("xpath", "//select[@id='language_code']/option[@value='']")
  } else {
    selectLanguage = remDr$findElement("xpath", "//select[@id='language_code']/option[5]")
  }

  selectLanguage$clickElement()
  Sys.sleep(3)

  #Expand all reviews
  expandMore <- remDr$findElements("link text", "...more")
  sapply(expandMore, function(x) x$clickElement())

  #Extracting the reviews from the page
  reviews <- remDr$findElements("css selector", "#bookReviews .stacked")
  reviews.html <- lapply(reviews, function(x){x$getElementAttribute("outerHTML")[[1]]})
  reviews.list <- lapply(reviews.html, function(x){read_html(x) %>% html_text()} )
  reviews.text <- unlist(reviews.list)

  #Some reviews have only rating and no text, so we process them separately
  onlyRating = unlist(map(1:length(reviews.text), function(i) str_detect(reviews.text[i], "^\\\n\\\n")))

  #Full reviews
  if(sum(!onlyRating) > 0){

    filterData = reviews.text[!onlyRating]
    fullReviews = purrr::map_df(seq(1, length(filterData), by=2), function(i){
      review = unlist(strsplit(filterData[i], "\n"))

      data.frame(
        date = mdy(review[2]), #date
        username = str_trim(review[5]), #user
        rating = str_trim(review[9]), #overall
        comment = str_trim(review[12]) #comment
      )
    })

    #Add review text to full reviews
    fullReviews$review = unlist(purrr::map(seq(2, length(filterData), by=2), function(i){
      str_trim(str_remove(filterData[i], "\\s*\\n\\s*\\(less\\)"))
    }))

  } else {
    fullReviews = data.frame()
  }


  #partial reviews (only rating)
  if(sum(onlyRating) > 0){

    filterData = reviews.text[onlyRating]
    partialReviews = purrr::map_df(1:length(filterData), function(i){
      review = unlist(strsplit(filterData[i], "\n"))

      data.frame(
        date = mdy(review[9]), #date
        username = str_trim(review[4]), #user
        rating = str_trim(review[8]), #overall
        comment = "",
        review = ""
      )
    })

  } else {
    partialReviews = data.frame()
  }

  finalData = rbind(finalData, fullReviews, partialReviews)

  #Go to next page if possible
  nextPage = remDr$findElements("xpath", "//a[@class='next_page']")
  if(length(nextPage) > 0){
    message(paste("PAGE", pageNumber, "Processed - Going to next"))
    nextPage[[1]]$clickElement()
    pageNumber = pageNumber + 1
    Sys.sleep(2)
  } else {
    message(paste("PAGE", pageNumber, "Processed - Last page"))
    morePages = FALSE
  }

}   
#end of the main loop

#Replace missing ratings by 'not rated'
finalData$rating = ifelse(finalData$rating == "", "not rated", finalData$rating)

#Stop server
rD[["server"]]$stop()

#set directory to where you wish the file to go
#copy your working directory and exchange all backward slashes with forward slashes
getwd()
setwd("C:/Users/ledgreve/Desktop/GoodReads_TextMining-master/Scripts/New Scripts/Test1")

#Write results
write.csv(finalData, paste0(bookTitle, ".csv"), row.names = F)
message("FINISHED!")
...