Как выполнить линейную интерполяцию между несколькими столбцами в r? - PullRequest
2 голосов
/ 08 июля 2020

Я пробовал использовать несколько разных методов, но никак не могу понять.

Вот мои данные:

NEI_all_years <-structure(list(state = c("AK", "AK", "AK", "AK", "AK", "AK"), 
    pollutant = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Ammonia", 
    "Carbon Dioxide", "Carbon Monoxide", "Methane", "Nitrogen Oxides", 
    "Nitrous Oxide", "PM10 Filterable", "PM10 Primary (Filt + Cond)", 
    "PM2.5 Filterable", "PM2.5 Primary (Filt + Cond)", "Sulfur Dioxide", 
    "Volatile Organic Compounds"), class = "factor"), CEDS_Sector = c("1A1b_Pet-refining", 
    "1A1g_Other-energy-transf", "1A2_Industrial_fuel_combustion", 
    "1A2_Industrial_fuel_combustion", "1A2_Industrial_fuel_combustion", 
    "1A2_Industrial_fuel_combustion"), CEDS_Fuel = c("natural_gas", 
    "natural_gas", "biomass", "diesel_oil", "hard_coal", "heavy_oil"
    ), emissions2011 = c(0.00576, 0.00043, 0.000237024333990807, 
    0.00188652040085416, 0.000193489271882478, 0.00013881276046503
    ), emissions2014 = c(0.00583, 0, 5.32360487697781e-06, 0.00124625603613746, 
    7.0333234068145e-05, 0), emissions2017 = c(0.0089604, 2.34e-05, 
    0.000388967014684649, 0.000837643732771449, 8.95609282040459e-05, 
    2.86528055941436e-05), tier1_description = structure(c(5L, 
    5L, 5L, 5L, 5L, 5L), .Label = c("", "CHEMICAL & ALLIED PRODUCT MFG", 
    "DOMESTIC SHIPPING", "FUEL COMB. ELEC. UTIL.", "FUEL COMB. INDUSTRIAL", 
    "FUEL COMB. OTHER", "HIGHWAY VEHICLES", "METALS PROCESSING", 
    "MISCELLANEOUS", "OFF-HIGHWAY", "OTHER INDUSTRIAL PROCESSES", 
    "OTHER NATURAL", "PETROLEUM & RELATED INDUSTRIES", "SOLVENT UTILIZATION", 
    "STORAGE & TRANSPORT", "WASTE DISPOSAL & RECYCLING"), class = "factor"), 
    unit = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "TON", class = "factor")), row.names = c(NA, 
6L), class = "data.frame")

Похоже, это (пример) :

state      pollutant              CEDS_Sector                 CEDS_Fuel         emissions2011       emissions2014      emissions2017      tier1_description           unit   
AK          Ammonia           1A1b_Pet-refining              natural gas             11                   14                   17       FUEL COMB. INDUSTRIAL         TON                     
AK          Ammonia        1A1g_Other-energy-transf          natural gas             10                   22                   30       FUEL COMB. INDUSTRIAL         TON
AK          Ammonia     1A2_Industrial_fuel_combustion        biomass                15                   48                   50       FUEL COMB. INDUSTRIAL         TON

Мне нужна интерполяция для выбросов, чтобы у меня были данные за каждый год с 2010 по 2020 на основе столбцов emissions2011, emissions2014 и emissions2017.

1 Ответ

2 голосов
/ 08 июля 2020

Мы могли бы преобразовать данные в «длинный» формат, затем расширить набор данных с помощью complete (от tidyr) и использовать na.approx в «выбросах»

library(dplyr)
library(tidyr)
library(zoo)
NEI_all_years %>%
  pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
        names_sep="(?<=[a-z])(?=[0-9])") %>%
  mutate(year = as.integer(year)) %>%
  group_by(across(state:unit)) %>%
  complete(year = 2010:2020) %>% 
  mutate(emissions = na.approx(emissions, na.rm = FALSE, rule = 2))

Или вместо na.approx мы можем использовать na.interp из forecast

library(forecast)
NEI_all_years %>% 
  pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"), 
    names_sep="(?<=[a-z])(?=[0-9])") %>% 
  mutate(year = as.integer(year)) %>% 
  group_by(across(state:unit)) %>%
  complete(year = 2010:2020) %>% 
  mutate(emissions = na.interp(emissions))
# A tibble: 66 x 8
# Groups:   state, pollutant, CEDS_Sector, CEDS_Fuel, tier1_description, unit [6]
#   state pollutant CEDS_Sector       CEDS_Fuel   tier1_description     unit   year emissions  
#   <chr> <fct>     <chr>             <chr>       <fct>                 <fct> <int> <ts>       
# 1 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2010 0.005760000
# 2 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2011 0.005760000
# 3 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2012 0.005783333
# 4 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2013 0.005806667
# 5 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2014 0.005830000
# 6 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2015 0.006873467
# 7 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2016 0.007916933
# 8 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2017 0.008960400
# 9 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2018 0.008960400
#10 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2019 0.008960400
# … with 56 more rows

Если нам нужно выполнить линейную экстраполяцию, можно выбрать

# remotes::install_github("skgrange/threadr")
library(threadr)
NEI_all_years %>%
   pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
         names_sep="(?<=[a-z])(?=[0-9])") %>%
   mutate(year = as.integer(year)) %>%
   group_by(across(state:unit)) %>%
   complete(year = 2010:2020) %>% 
   mutate(emissions = na_extrapolate(na.approx(emissions, na.rm = FALSE)))
# A tibble: 66 x 8
# Groups:   state, pollutant, CEDS_Sector, CEDS_Fuel, tier1_description, unit [6]
#   state pollutant CEDS_Sector       CEDS_Fuel   tier1_description     unit   year emissions
#   <chr> <fct>     <chr>             <chr>       <fct>                 <fct> <int>     <dbl>
# 1 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2010   0.00574
# 2 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2011   0.00576
# 3 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2012   0.00578
# 4 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2013   0.00581
# 5 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2014   0.00583
# 6 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2015   0.00687
# 7 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2016   0.00792
# 8 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2017   0.00896
# 9 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2018   0.0100 
#10 AK    Ammonia   1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON    2019   0.0110 
# … with 56 more rows
...