Как я могу загрузить список SharePoint в Tibble в R? - PullRequest
1 голос
/ 30 января 2020

Я хочу загрузить список SharePoint в тиббл в R.

Проблема с моей попыткой заключается в том, что каждое значение данных заключено в список. Как я могу развернуть каждое значение или изменить преобразование данных, чтобы они содержали непосредственно строки, а не списки списков?

# A tibble: 10 x 6
   `__metadata` A          B             C                D             E
   <list>       <list>     <list>        <list>           <list>        <list>    
 1 <list [4]>   <list [1]> <chr [1]>     <list [2]>       <chr [1]>     <chr [1]> 
 2 <list [4]>   <list [1]> <chr [1]>     <list [2]>       <chr [1]>     <chr [1]> 
 3 <list [4]>   <list [1]> <chr [1]>     <list [2]>       <chr [1]>     <chr [1]> 
 4 <list [4]>   <list [1]> <chr [1]>     <list [2]>       <chr [1]>     <chr [1]> 
 5 <list [4]>   <list [1]> <chr [1]>     <list [2]>       <chr [1]>     <chr [1]> 
 6 <list [4]>   <list [1]> <chr [1]>     <list [2]>       <chr [1]>     <chr [1]> 
 7 <list [4]>   <list [1]> <chr [1]>     <list [2]>       <chr [1]>     <chr [1]>
 ...

Я пытался безуспешно:

my_data %>% mutate_all(~ map(.x, unlist))
my_data %>% unlist(recursive = FALSE)

... и многие другие комбинации map(), mutate_all(), unnest() и unlist().

Я думаю, что проблема заключается в способе обработки данных. Оригинал JSON имеет следующий формат:

{
    "d": {
        "results": [
        {
            "__metadata": {
            "id": "<GUID>",
            "uri": "<redacted>",
            "etag": "\"42\"",
            "type": "SP.Data.DownloadcenterItem"
            },
            "A": {
            "results": [
                {
                "__metadata": {
                    "id": "<GUID>",
                    "type": "SP.Data.UserInfoItem"
                },
                "Title": "<redacted>"
                }
            ]
            },
            "C": {
            "__metadata": {
                "id": "<GUID>",
                "type": "SP.Data.UserInfoItem"
            },
            "EMail": "<redacted>"
            },
            "B": "<redacted>",
            "D": "<redacted>",
            "E": "<redacted>"
        },
        ...
        ],
        "__next": "<redacted>"
    }
}

Следующий код используется для загрузки JSON и преобразования его в таблицу:

current_page <- httr::GET('<URL>') %>% httr::content()
my_data <- current_page$d$results %>%
  map(enframe) %>%
  map(~ spread(.x, name, value))

Выход dput(current_page$d$results):

list(list(`__metadata` = list(id = "<redacted>", 
    uri = "<redacted>", 
    etag = "<redacted>", type = "<redacted>"), dmsAuthor = list(
    results = list(list(`__metadata` = list(id = "<redacted>", 
        type = "<redacted>"), Title = "<redacted>"))), 
    dmsDocumentOwner = list(`__metadata` = list(id = "<redacted>", 
        type = "<redacted>"), EMail = "<redacted>"), 
    dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"), 
    list(`__metadata` = list(id = "<redacted>", 
        uri = "<redacted>", 
        etag = "<redacted>", type = "<redacted>"), 
        dmsAuthor = list(results = list(list(`__metadata` = list(
            id = "<redacted>", type = "<redacted>"), 
            Title = "<redacted>"))), dmsDocumentOwner = list(
            `__metadata` = list(id = "<redacted>", 
                type = "<redacted>"), EMail = "<redacted>"), 
        dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"))

1 Ответ

1 голос
/ 30 января 2020

Следующее работает для ваших данных. Вам не нужно использовать purrr пакет здесь.

library(dplyr)
library(tibble)
library(tidyr)
enframe(unlist(current_page$d$results)) %>% 
  filter(!grepl("metadata", name, ignore.case = T)) %>% 
  group_by(name) %>% 
  mutate(rid = 1:n()) %>% 
  pivot_wider(-rid, names_from = "name", values_from = "value") %>% 
  unnest

#> # A tibble: 10 x 5
#>    dmsAuthor.resul~ dmsDocumentOwne~ dmsDocumentID dmsDocVersion dmsSPTitle
#>    <chr>            <chr>            <chr>         <chr>         <chr>     
#>  1 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  2 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  3 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  4 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  5 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  6 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  7 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  8 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#>  9 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>
#> 10 <redacted>       <redacted>       <redacted>    <redacted>    <redacted>

Предупреждения:

#> Warning: Values in `value` are not uniquely identified; output will contain list-cols.
##> * Use `values_fn = list(value = list)` to suppress this warning.
##> * Use `values_fn = list(value = length)` to identify where the duplicates arise
##> * Use `values_fn = list(value = summary_fun)` to summarise duplicates
##> Warning: `cols` is now required.
##> Please use `cols = c(dmsAuthor.results.Title, dmsDocumentOwner.EMail, dmsDocumentID, 
##>     dmsDocVersion, dmsSPTitle)`
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...