Проблема с na.spline при использовании во фрейме данных для временной интерполяции в R - PullRequest
2 голосов
/ 19 марта 2020

Я пытаюсь использовать функцию na.spline из пакета zoo, чтобы интерполировать некоторые NA значения в data frame. Я получаю следующую ошибку:

Error in splinefun(x[!na], y[!na], ...) : zero non-NA points
In addition: Warning message:
In xy.coords(x, y, setLab = FALSE) : NAs introduced by coercion

Код, который я использовал, представлен ниже.

#read the excel file and the sheet of interest
test= structure(list(`1243_4sel` = c("2.3525000000000001E-2", "5.6603000000000001E-2", "9.1589000000000004E-2", "8.6460999999999996E-2",
              "1.7899999999999999E-3","1.8776000000000001E-2", "NA"), `1245_4sel` = c("2.6909999999999998E-3",
               "1.7314E-2", "-4.8430000000000001E-3", "4.0668999999999997E-2",  "-1.0984000000000001E-2",
               "-4.2880000000000001E-3", "NA"), `1255_4sel` = c("-2.6103000000000001E-2","5.2512999999999997E-2",
                "-2.1322000000000001E-2", "-3.5166999999999997E-2", "4.4469000000000002E-2", "4.6221999999999999E-2", "NA"),
                `1265_4sel` = c("NA", "-2.8042999999999998E-2", "NA", "NA", "-5.4059000000000003E-2", "-6.4116000000000006E-2", "NA"),
                  `1266_4sel` = c("NA", "3.6759E-2", "NA", "NA", "-7.7029999999999998E-3", "1.9910000000000001E-3", "NA"),
                   `1268_4sel` = c("-8.8400000000000002E-4", "6.5909999999999996E-2", "1.9495999999999999E-2", "6.9381999999999999E-2", "-2.764E-3", 
                "-3.6695999999999999E-2", "NA"), `1269_4sel` = c("-2.4181000000000001E-2", "2.1572000000000001E-2",
                     "-1.6182999999999999E-2", "2.5044E-2", "-3.3234E-2", "-1.6448999999999998E-2", "NA"),
                     `1274_4sel` = c("-2.0017E-2", "3.5234000000000001E-2", "-3.8670999999999997E-2", "5.6230000000000004E-3",
                     "-1.7319000000000001E-2", "1.4264000000000001E-2", "NA"), `1276_4sel` = c("-4.1009999999999996E-3", "6.1688E-2", "1.3915E-2", "1.0439E-2",
                      "5.1450000000000003E-3","3.5539999999999999E-3", "NA"), `1277_4sel` = c("2.8437E-2", "2.0038E-2", "5.1650000000000001E-2", "9.8672999999999997E-2", 
                       "-3.5707999999999997E-2", "1.7539999999999999E-3", "NA"), `1278_4sel` = c("3.4150000000000001E-3", "4.5303999999999997E-2", "2.818E-2",
                    "6.0167999999999999E-2","-2.5447999999999998E-2", "-4.061E-2", "NA"), `1387_4sel` = c("NA", "-8.8667999999999997E-2", "-3.0911999999999999E-2",
                    "NA", "-4.4260000000000001E-2", "-2.3598999999999998E-2", "NA")), row.names = c(NA, -7L), class = c("tbl_df", "tbl", "data.frame"))

#convert the column Date from character to date format
df$DATE <- as.Date(df$DATE, format = "%y-%m-%d")
str(df$DATE)

#remove the 1st column of the excel
df <- subset(df, select = -c(RowID))

#convert the character columns to numeric
df %<>% mutate_if(is.character,as.numeric)
class(df$DATE)

#check the format of the columns
df[] <- lapply(df, function(x) {
  if(is.factor(x)) as.numeric(as.character(x)) else x
})
sapply(df, class)

#convert all character cells to null
df[is.character(df)]= NULL
View(df)

df[which(is.character(df))] <- NULL

sum(is.na(df))

#move the last column to the 1st position
df %>% select(0:0, length(df), everything())

na.spline(df)

Я не знаю, нужно ли мне читать файл excel как зоопарк объект или нет для вызова функции na.spline, но даже если я это сделаю, я получаю другую ошибку

Error in read.zoo(df) : index has bad entries at data rows: 7 8 21

Я обнаружил в других сообщениях, что проблема вызвана, потому что некоторые rows начинаются с NA 's. Есть мысли?

1 Ответ

1 голос
/ 19 марта 2020

Возможно, вам нужно использовать lapply для применения na.spline по столбцам.

dat[-1] <- lapply(dat[-1], zoo::na.spline)
dat
#         DATE  X155_4sel X964_4sel X970_4sel
# 1 2016-12-02 -0.0162270 -0.022269  0.095243
# 2 2016-12-10  0.1005000  0.041044 -0.021599
# 3 2016-12-18 -0.0064720  0.039549  0.161545
# 4 2016-12-26 -0.0386020  0.059017  0.159219
# 5 2017-01-01  0.1657300 -0.046500 -0.054670
# 6 2017-01-09  0.1582580 -0.017922 -0.079368
# 7 2017-01-17 -0.5417341 -0.110800 -0.107872  ## dat[7, 2] is extrapolated

Редактировать

Я не совсем уверен, в чем ваша проблема, но это работает и с вашими новыми примерами данных:

test[] <- lapply(test, as.numeric)  ## convert to numeric
test[] <- lapply(test, zoo::na.spline)  ## apply na.spline
# summary(test)
# 1243_4sel         1245_4sel           1255_4sel          1265_4sel       
# Min.   :0.00179   Min.   :-0.010984   Min.   :-0.21019   Min.   :-0.07487  
# 1st Qu.:0.02115   1st Qu.:-0.004566   1st Qu.:-0.03063   1st Qu.:-0.05909  
# Median :0.05660   Median : 0.002691   Median :-0.02132   Median :-0.04469  
# Mean   :0.08711   Mean   : 0.039850   Mean   :-0.02137   Mean   :-0.04608  
# 3rd Qu.:0.08903   3rd Qu.: 0.028991   3rd Qu.: 0.04535   3rd Qu.:-0.03203  
# Max.   :0.33106   Max.   : 0.238393   Max.   : 0.05251   Max.   :-0.02076  
# 1266_4sel           1268_4sel           1269_4sel          1274_4sel        
# Min.   :-0.007703   Min.   :-0.036696   Min.   :-0.03323   Min.   :-0.038671  
# 1st Qu.:-0.001574   1st Qu.:-0.001824   1st Qu.:-0.02031   1st Qu.:-0.018668  
# Median : 0.009681   Median : 0.019496   Median :-0.01618   Median : 0.005623  
# Mean   : 0.019375   Mean   : 0.038579   Mean   : 0.03308   Mean   : 0.032151  
# 3rd Qu.: 0.030351   3rd Qu.: 0.067646   3rd Qu.: 0.02331   3rd Qu.: 0.024749  
# Max.   : 0.076094   Max.   : 0.155606   Max.   : 0.27501   Max.   : 0.245945  
# 1276_4sel           1277_4sel          1278_4sel          1387_4sel       
# Min.   :-0.004101   Min.   :-0.03571   Min.   :-0.04061   Min.   :-0.25055  
# 1st Qu.: 0.004350   1st Qu.: 0.01090   1st Qu.:-0.01102   1st Qu.:-0.06646  
# Median : 0.010439   Median : 0.02844   Median : 0.02818   Median :-0.03140  
# Mean   : 0.015397   Mean   : 0.10897   Mean   : 0.04256   Mean   :-0.05613  
# 3rd Qu.: 0.015529   3rd Qu.: 0.07516   3rd Qu.: 0.05274   3rd Qu.:-0.02726  
# Max.   : 0.061688   Max.   : 0.59797   Max.   : 0.22689   Max.   : 0.07646  


Данные

dat <- structure(list(`155_4sel` = c("-0.016226999999999998", "0.10050000000000001", 
"-0.0064720000000000003", "-0.038601999999999997", "0.16572999999999999", 
"0.15825800000000001", "NA"), `964_4sel` = c(-0.022269, 0.041044, 
0.039549, 0.059017, -0.0465, -0.017922, -0.1108), `970_4sel` = c(0.095243, 
-0.021599, 0.161545, 0.159219, -0.05467, -0.079368, -0.107872
), DATE = structure(c(1480636800, 1481328000, 1482019200, 1482710400, 
1483228800, 1483920000, 1484611200), class = c("POSIXct", "POSIXt"
), tzone = "UTC")), row.names = c(NA, -7L), class = "data.frame")


# dat <- as.data.frame(read_excel("2017_NDVI_Anomaly_Zonal_Stats.xlsx", sheet = "Sheet4"))[-1]
dat$DATE <- as.Date(dat$DATE, format="%y-%m-%d")
dat$`155_4sel` <- as.numeric(dat$`155_4sel`)
dat <- dat[c(4, 1:3)]
names(dat) <- make.names(names(dat))  ## You may want to use proper names (not beginning with number)
...