Я получаю две строки для каждого запроса к API-интерфейсу NPPES, и я пытаюсь понять, почему.
Я мог бы выбрать разные, используя dplyr, но я бы лучше понял, почему я получаю дублирующиеся строки, а не чем использовать обходной путь. Ваша помощь будет принята с благодарностью.
Мой код выглядит следующим образом:
pacman::p_load(tidyverse,httr,jsonlite,purrr)
path <- "https://npiregistry.cms.hhs.gov/api/?"
# CREATE A FUNCTION TO QUERY THE NPPES NPI REGISTRY
getNPI <- function(object) {
request <- httr::GET(url = path,
query = list(version = "2.0",
number = object))
df <- content(request, as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(., flatten = TRUE) %>%
data.frame() %>%
tidyr::unnest(c(results.addresses,results.taxonomies), names_repair = "unique")
df_col_names <- names(df)
cols_to_add <- setdiff(c("result_count",
"results.enumeration_type",
"results.number",
"results.last_updated_epoch",
"results.created_epoch",
"results.other_names",
"country_code",
"country_name",
"address_purpose",
"address_type",
"address_1",
"address_2",
"city",
"state",
"postal_code",
"telephone_number",
"code",
"desc",
"primary",
"state1",
"license",
"results.identifiers",
"results.basic.first_name",
"results.basic.last_name",
"results.basic.middle_name",
"results.basic.credential",
"results.basic.sole_proprietor",
"results.basic.gender",
"results.basic.enumeration_date",
"results.basic.last_updated",
"results.basic.status",
"results.basic.name"), df_col_names)
if (length(cols_to_add) > 0) {
for(i in cols_to_add){
df[,i] <- "UNKNOWN"
}
}
# df_col_names <- names(df$results.taxonomies[[1]])
# cols_to_add <- setdiff(c("desc","state"), df_col_names)
# if (length(cols_to_add) > 0) {
# for(i in cols_to_add){
# df$results.taxonomies[[1]][,i] <- "UNKNOWN"
# }
# }
df %>%
select(results.number,
results.basic.name,
results.enumeration_type,
results.basic.gender,
results.basic.credential,
desc) %>%
rename(Provider_NPI = results.number,
Provider_Name = results.basic.name,
Provider_Gender = results.basic.gender,
Provider_Credentials = results.basic.credential,
Provider_Taxonomy = desc) %>%
mutate(
Provider_Type = case_when(
results.enumeration_type == "NPI-1" ~ 'Individual Provider',
results.enumeration_type == "NPI-2" ~ 'Organizational Provider'
)
) %>%
select(-results.enumeration_type)
}
providerIDs <- c(
#'1477765634',
'1376815795'
)
test <- lapply(providerIDs, getNPI)
final_df <- do.call("rbind",test)