Question

Код ниже используется для удаления веб-сайта с помощью вызова API.Мне просто нужно изменить startDate и endDate, чтобы получить набор данных, который я хочу.Раньше все работало нормально, прекрасно справляясь с циклами, но после того, как я внес некоторые изменения в часть html_nodes () - попытайтесь извлечь другой раздел на веб-странице, он будет возвращать мне данные с той же датой.с предупреждением об ошибке «Ошибка в seq.int (0, to0 - from, by): неверный вход» аргументом «r».Что здесь нужно сделать?

library(tidyverse)
library(readr)
library(tidyr)
library(dplyr)
library(xlsx)
library(beepr)

get_sounding_data <- function(region = c("naconf", "samer", "pac", "nz", "ant",
                                     "np", "europe", "africa", "seasia", "mideast"),
                          date,
                          from_hr = c("00", "12", "all"),
                          to_hr = c("00", "12", "all"),
                          station_number = 48615) {

  # we use these pkgs (the readr and dplyr dependencies removed)
  suppressPackageStartupMessages({
    require("xml2", quietly = TRUE)
    require("httr", quietly = TRUE)
    require("rvest", quietly = TRUE)
  })

  # validate region
  region <- match.arg(
    arg = region,
    choices = c(
      "naconf", "samer", "pac", "nz", "ant",
      "np", "europe", "africa", "seasia", "mideast"
    )
  )

  # validates the date for us if it's a character string
  date <- as.Date(date)

  # get year and month
  year <- as.integer(format(date, "%Y"))
  stopifnot(year %in% 1973:as.integer(format(Sys.Date(), "%Y")))

  year <- as.character(year)
  month <- format(date, "%m")

# we need these to translate day & *_hr to the param the app needs
c(
"0100", "0112", "0200", "0212", "0300", "0312", "0400", "0412",
"0500", "0512", "0600", "0612", "0700", "0712", "0800", "0812",
"0900", "0912", "1000", "1012", "1100", "1112", "1200", "1212",
"1300", "1312", "1400", "1412", "1500", "1512", "1600", "1612",
"1700", "1712", "1800", "1812", "1900", "1912", "2000", "2012",
"2100", "2112", "2200", "2212", "2300", "2312", "2400", "2412",
"2500", "2512", "2600", "2612", "2700", "2712", "2800", "2812",
"2900", "2912", "3000", "3012", "3100", "3112"
  ) -> hr_vals

c(
"01/00Z", "01/12Z", "02/00Z", "02/12Z", "03/00Z", "03/12Z", "04/00Z",
"04/12Z", "05/00Z", "05/12Z", "06/00Z", "06/12Z", "07/00Z", "07/12Z",
"08/00Z", "08/12Z", "09/00Z", "09/12Z", "10/00Z", "10/12Z", "11/00Z",
"11/12Z", "12/00Z", "12/12Z", "13/00Z", "13/12Z", "14/00Z", "14/12Z",
"15/00Z", "15/12Z", "16/00Z", "16/12Z", "17/00Z", "17/12Z", "18/00Z",
"18/12Z", "19/00Z", "19/12Z", "20/00Z", "20/12Z", "21/00Z", "21/12Z",
"22/00Z", "22/12Z", "23/00Z", "23/12Z", "24/00Z", "24/12Z", "25/00Z",
"25/12Z", "26/00Z", "26/12Z", "27/00Z", "27/12Z", "28/00Z", "28/12Z",
"29/00Z", "29/12Z", "30/00Z", "30/12Z", "31/00Z", "31/12Z"
  ) -> hr_inputs

  hr_trans <- stats::setNames(hr_vals, hr_inputs)

  o_from_hr <- from_hr <- as.character(tolower(from_hr))
  o_to_hr <- to_hr <- as.character(tolower(to_hr))

  if ((from_hr == "all") || (to_hr == "all")) {
  from_hr <- to_hr <- "all"
  } else {

  from_hr <- hr_trans[sprintf("%s/%02dZ", format(date, "%d"), as.integer(from_hr))]
match.arg(from_hr, hr_vals)

 to_hr <- hr_trans[sprintf("%s/%02dZ", format(date, "%d"), as.integer(to_hr))]
match.arg(to_hr, hr_vals)
 }

  # clean up the station number if it was entered as a double
  station_number <- as.character(as.integer(station_number))

# execute the API call
httr::GET(
url = "http://weather.uwyo.edu/cgi-bin/sounding",
query = list(
  region = region,
  TYPE = "TEXT:LIST",
  YEAR = year,
  MONTH = sprintf("%02d", as.integer(month)),
  FROM = from_hr,
  TO = to_hr,
  STNM = station_number
   )
  ) -> res

  # check for super bad errors (that we can't handle nicely)
  httr::stop_for_status(res)

  # get the page content
  doc <- httr::content(res, as="text")

  # if the site reports no data, issue a warning and return an empty data frame
  if (grepl("Can't get", doc)) {
    doc <- xml2::read_html(doc)
    msg <- rvest::html_nodes(doc, "body")
    msg <- rvest::html_text(msg, trim=TRUE)
    msg <- gsub("\n\n+.*$", "", msg)
    warning(msg)
    return(data.frame(stringsAsFactors=FALSE))
   }

  # if the site reports no data, issue a warning and return an empty data frame
  if (grepl("Can't get", doc)) {
    doc <- xml2::read_html(doc)
    msg <- rvest::html_nodes(doc, "body")
    msg <- rvest::html_text(msg, trim=TRUE)
    msg <- gsub("\n\n+.*$", "", msg)
    warning(msg)
    return(data.frame(stringsAsFactors=FALSE))
   }

  # turn it into something we can parse
  doc <- xml2::read_html(doc)

  raw_dat <- doc %>%
    html_nodes("h3+ pre") %>% 
    html_text()

  indices <- raw_dat %>% 
    str_split(pattern = "\n", simplify = T) %>% 
    map_chr(str_squish) %>% 
    tibble(x = .) %>% 
    separate(x, into = c("Station", "Value"), sep = ": ") %>% 
    filter(!is.na(Value))

  data <- tidyr::spread(indices, Station, Value)
  data
    }

startDate <- as.Date("01-11-1979", format="%d-%m-%y")
endDate <- as.Date("31-01-1980",format="%d-%m-%y")

days <- seq(startDate, endDate, "1 day")

lapply(days[1:92], function(day) {
  get_sounding_data(
    region = "seasia",
    date = day,
    from_hr = "00",
    to_hr = "00",
    station_number = "48615"
  )
}) -> soundings_48615

warnings()

new_df <- map(soundings_48615, . %>% mutate_all(parse_guess))
dat <- bind_rows(new_df)
dat <- dat %>% separate(col =`Observation time`, into = c('Date', 'time'), sep = '/')

dat$Date <- as.Date(dat$Date, format = "%y%m%d")

#save in text file
write.csv(dat, 'c:/Users/Hp/Documents/yr/climatology/yr_SoundingIndexLowerPart/1979.csv')

get_sounding_data <- NULL
beep()

Oliver · Answer 1 · 10 февраля 2019

Похоже, что ошибка, которая у вас сейчас есть, основана на формате даты.Более конкретно, в следующие моменты времени

as.Date("01-11-1979", format="%d-%m-%y")
as.Date("31-01-1980",format="%d-%m-%y")

выводит

"2019-11-01"
"2019-01-31"

Формат даты / времени R соответствует международному стандарту yyyy-mm-dd.Таким образом, "2019-11-01" наступает после "2019-01-31" времени.Таким образом, это приведет к сбою последовательности, если он попытается повторить 1 положительный день за раз.Форматирование - проблема здесь, для того, чтобы это было решено, есть очень простое решение.Всегда работайте в международных стандартных форматах даты, так как они будут распознаваться (почти) всеми программами.

В связи с этим измените часть последовательности кода вашего кода на

....
startDate <- as.Date("1979-11-01")
endDate <- as.Date("1980-01-31")

days <- seq(startDate, endDate, "1 day")
....

Обратите внимание на изменение формата,Что касается того, почему он меняется с 1979 на 2019, я не совсем уверен, у кого-то еще может быть более сложный ответ на это странное поведение.

Что означает ошибка в seq.int (0, to0 - from, by): неверный знак в аргументе «by» в r

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Что означает ошибка в seq.int (0, to0 - from, by): неверный знак в аргументе «by» в r

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы