Чтение json файла 'Ошибка в parse_con (txt, bigint_as_char): лексическая ошибка: недопустимый символ в json тексте' - PullRequest
0 голосов
/ 04 августа 2020

У меня следующая проблема с функцией jsonlite::fromJSON. Это воспроизводимый пример

tmp <- tempfile()
url <- "http://dadosabertos.c3sl.ufpr.br/curitibaurbs/2019_10_01_tabelaVeiculo.json.xz"
download.file(url, destfile =tmp,quiet = FALSE, mode = "w")
wb <- jsonlite::fromJSON(tmp)

Я получаю эту ошибку

wb <- jsonlite::fromJSON(tmp)
# Error in parse_con(txt, bigint_as_char) : 
#   lexical error: invalid char in json text.
#           4444444444444444444444444},{"COD_LINHA":"519","NOME_LINHA":"
#                      (right here) ------^
# In addition: Warning message:
# In readBin(3L, raw(0), 32768L) : lzma decoder corrupt data

Это информация о моем сеансе

> sessionInfo()
# R version 4.0.1 (2020-06-06)
# Platform: x86_64-w64-mingw32/x64 (64-bit)
# Running under: Windows Server 2012 R2 x64 (build 9600)
# 
# Matrix products: default
# 
# locale:
# [1] LC_COLLATE=Portuguese_Brazil.1252  LC_CTYPE=Portuguese_Brazil.1252    # LC_MONETARY=Portuguese_Brazil.1252
# [4] LC_NUMERIC=C                       LC_TIME=Portuguese_Brazil.1252    
# 
# attached base packages:
# [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# loaded via a namespace (and not attached):
# [1] compiler_4.0.1 tools_4.0.1    jsonlite_1.7.0

Воспроизводимый код работал в моем Linux, в котором была следующая информация о сеансе, но я все еще не уверен, что вызывает ошибку.

> sessionInfo()
# R version 4.0.2 (2020-06-22)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Ubuntu 18.04.4 LTS
# 
# Matrix products: default
# BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
# LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
# 
# locale:
#  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
#  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
#  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
# [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       

# attached base packages:
# [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# loaded via a namespace (and not attached):
#  [1] Rcpp_1.0.5         rstudioapi_0.11    magrittr_1.5       hms_0.5.3         
#  [5] units_0.6-7        tidyselect_1.1.0   lattice_0.20-41    R6_2.4.1          
#  [9] rlang_0.4.7        stringr_1.4.0      dplyr_1.0.0        tools_4.0.2       
# [13] rgdal_1.5-12       grid_4.0.2         data.table_1.13.0  KernSmooth_2.23-17
# [17] e1071_1.7-3        DBI_1.1.0          ellipsis_0.3.1     class_7.3-17      
# [21] tibble_3.0.3       lwgeom_0.2-5       lifecycle_0.2.0    sf_0.9-5          
# [25] crayon_1.3.4       zip_2.0.4          readr_1.3.1        purrr_0.3.4       
# [29] gtfs2gps_1.2-3     vctrs_0.3.2        glue_1.4.1.9000    openxlsx_4.1.5    
# [33] sp_1.4-2           stringi_1.4.6      compiler_4.0.2     pillar_1.4.6      
# [37] generics_0.0.2     classInt_0.4-3     jsonlite_1.7.0     pkgconfig_2.0.3

...