Остаточная функция Season_decomposition удаляет имя столбца в Python - PullRequest
0 голосов
/ 19 июня 2020

В приведенном ниже коде я пытаюсь реализовать тест Дики-Фуллера для переменной residual, которая является серией pandas. Я не знаю, почему он удаляет метку столбца total_case, которая нужна в функции test_stationarity(). Вот как indexedDataset_logScale выглядит

total_cases
date                   
2020-01-30     0.000000
2020-01-31     0.000000
2020-02-01     0.000000
2020-02-02     0.693147
2020-02-03     0.693147
...                 ...
2020-06-13    12.641074
2020-06-14    12.678953
2020-06-15    12.714167
2020-06-16    12.745751
2020-06-17    12.777236

[139 rows x 1 columns]

Вот как decomposedLogData выглядит

date
2020-02-02    0.188993
2020-02-03    0.049152
2020-02-04    0.284231
2020-02-05    0.126681
2020-02-06    0.022418
                ...   
2020-06-10    0.012442
2020-06-11    0.009722
2020-06-12   -0.036078
2020-06-13   -0.016174
2020-06-14    0.001704
Name: resid, Length: 133, dtype: float64
from statsmodels.tsa.seasonal import seasonal_decompose
indexedDataset_logScale.replace([np.inf, -np.inf], np.nan,inplace=True)
# 
indexedDataset_logScale.dropna(inplace=True)
# print(indexedDataset_logScale)
# indexedDataset_logScale.asfreq('D')
decomposition = seasonal_decompose(indexedDataset_logScale,freq=7)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.subplot(411)
plt.plot(indexedDataset_logScale, label='Original')
plt.legend(loc="best")
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc="best")
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc="best")
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc="best")
plt.tight_layout

decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_stationarity(decomposedLogData)
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):

    #Determining rolling statistics
    movingAverage = timeseries.rolling(window=7).mean()
    movingStd = timeseries.rolling(window=7).std()

    #Plot rolling statistics
    orig = plt.plot(timeseries, color='blue', label='Origin')        
    mean = plt.plot(movingAverage, color='red', label='Rolling Mean')
    std = plt.plot(movingStd, color='black', label='Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block = False)


    #Perform Dickey-Fuller test:
    print('Result of Dickey-Fuller Test: ')
    dftest = adfuller(timeseries['total_cases'],autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)' %key] = value
    print(dfoutput)

Это дает эту ошибку.

Result of Dickey-Fuller Test: 
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4410             try:
-> 4411                 return libindex.get_value_at(s, key)
   4412             except IndexError:

pandas/_libs/index.pyx in pandas._libs.index.get_value_at()

pandas/_libs/index.pyx in pandas._libs.index.get_value_at()

pandas/_libs/util.pxd in pandas._libs.util.get_value_at()

pandas/_libs/util.pxd in pandas._libs.util.validate_indexer()

TypeError: 'str' object cannot be interpreted as an integer

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
    650         try:
--> 651             value = Index.get_value(self, series, key)
    652         except KeyError:

~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4418                 else:
-> 4419                     raise e1
   4420             except Exception:

~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4404         try:
-> 4405             return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
   4406         except KeyError as e1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine._date_check_type()

KeyError: 'total_cases'

During handling of the above exception, another exception occurred:

ParserError                               Traceback (most recent call last)
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()

pandas/_libs/tslibs/parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()

~/anaconda3/envs/covid/lib/python3.8/site-packages/dateutil/parser/_parser.py in parse(timestr, parserinfo, **kwargs)
   1373     else:
-> 1374         return DEFAULTPARSER.parse(timestr, **kwargs)
   1375 

~/anaconda3/envs/covid/lib/python3.8/site-packages/dateutil/parser/_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
    648         if res is None:
--> 649             raise ParserError("Unknown string format: %s", timestr)
    650 

ParserError: Unknown string format: total_cases

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
    659             try:
--> 660                 return self.get_value_maybe_box(series, key)
    661             except (TypeError, ValueError, KeyError):

~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value_maybe_box(self, series, key)
    674         elif not isinstance(key, Timestamp):
--> 675             key = Timestamp(key)
    676         values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz)

pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()

pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()

pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()

ValueError: could not convert string to Timestamp

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
 in 
     27 decomposedLogData = residual
     28 decomposedLogData.dropna(inplace=True)
---> 29 test_stationarity(decomposedLogData)
     30 # print(decomposition.resid)

 in test_stationarity(timeseries)
     17     #Perform Dickey-Fuller test:
     18     print('Result of Dickey-Fuller Test: ')
---> 19     dftest = adfuller(timeseries['total_cases'],autolag='AIC')
     20     dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
     21     for key,value in dftest[4].items():

~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/series.py in __getitem__(self, key)
    869         key = com.apply_if_callable(key, self)
    870         try:
--> 871             result = self.index.get_value(self, key)
    872 
    873             if not is_scalar(result):

~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
    660                 return self.get_value_maybe_box(series, key)
    661             except (TypeError, ValueError, KeyError):
--> 662                 raise KeyError(key)
    663         else:
    664             return com.maybe_box(self, value, series, key)

KeyError: 'total_cases'

Я знаю, что упускаю небольшую вещь.

...