В приведенном ниже коде я пытаюсь реализовать тест Дики-Фуллера для переменной residual
, которая является серией pandas. Я не знаю, почему он удаляет метку столбца total_case, которая нужна в функции test_stationarity()
. Вот как indexedDataset_logScale
выглядит
total_cases
date
2020-01-30 0.000000
2020-01-31 0.000000
2020-02-01 0.000000
2020-02-02 0.693147
2020-02-03 0.693147
... ...
2020-06-13 12.641074
2020-06-14 12.678953
2020-06-15 12.714167
2020-06-16 12.745751
2020-06-17 12.777236
[139 rows x 1 columns]
Вот как decomposedLogData
выглядит
date
2020-02-02 0.188993
2020-02-03 0.049152
2020-02-04 0.284231
2020-02-05 0.126681
2020-02-06 0.022418
...
2020-06-10 0.012442
2020-06-11 0.009722
2020-06-12 -0.036078
2020-06-13 -0.016174
2020-06-14 0.001704
Name: resid, Length: 133, dtype: float64
from statsmodels.tsa.seasonal import seasonal_decompose
indexedDataset_logScale.replace([np.inf, -np.inf], np.nan,inplace=True)
#
indexedDataset_logScale.dropna(inplace=True)
# print(indexedDataset_logScale)
# indexedDataset_logScale.asfreq('D')
decomposition = seasonal_decompose(indexedDataset_logScale,freq=7)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
plt.subplot(411)
plt.plot(indexedDataset_logScale, label='Original')
plt.legend(loc="best")
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc="best")
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc="best")
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc="best")
plt.tight_layout
decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_stationarity(decomposedLogData)
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
#Determining rolling statistics
movingAverage = timeseries.rolling(window=7).mean()
movingStd = timeseries.rolling(window=7).std()
#Plot rolling statistics
orig = plt.plot(timeseries, color='blue', label='Origin')
mean = plt.plot(movingAverage, color='red', label='Rolling Mean')
std = plt.plot(movingStd, color='black', label='Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block = False)
#Perform Dickey-Fuller test:
print('Result of Dickey-Fuller Test: ')
dftest = adfuller(timeseries['total_cases'],autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)' %key] = value
print(dfoutput)
Это дает эту ошибку.
Result of Dickey-Fuller Test:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4410 try:
-> 4411 return libindex.get_value_at(s, key)
4412 except IndexError:
pandas/_libs/index.pyx in pandas._libs.index.get_value_at()
pandas/_libs/index.pyx in pandas._libs.index.get_value_at()
pandas/_libs/util.pxd in pandas._libs.util.get_value_at()
pandas/_libs/util.pxd in pandas._libs.util.validate_indexer()
TypeError: 'str' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
650 try:
--> 651 value = Index.get_value(self, series, key)
652 except KeyError:
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4418 else:
-> 4419 raise e1
4420 except Exception:
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine._date_check_type()
KeyError: 'total_cases'
During handling of the above exception, another exception occurred:
ParserError Traceback (most recent call last)
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas/_libs/tslibs/parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
~/anaconda3/envs/covid/lib/python3.8/site-packages/dateutil/parser/_parser.py in parse(timestr, parserinfo, **kwargs)
1373 else:
-> 1374 return DEFAULTPARSER.parse(timestr, **kwargs)
1375
~/anaconda3/envs/covid/lib/python3.8/site-packages/dateutil/parser/_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
648 if res is None:
--> 649 raise ParserError("Unknown string format: %s", timestr)
650
ParserError: Unknown string format: total_cases
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
659 try:
--> 660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value_maybe_box(self, series, key)
674 elif not isinstance(key, Timestamp):
--> 675 key = Timestamp(key)
676 values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz)
pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
ValueError: could not convert string to Timestamp
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
in
27 decomposedLogData = residual
28 decomposedLogData.dropna(inplace=True)
---> 29 test_stationarity(decomposedLogData)
30 # print(decomposition.resid)
in test_stationarity(timeseries)
17 #Perform Dickey-Fuller test:
18 print('Result of Dickey-Fuller Test: ')
---> 19 dftest = adfuller(timeseries['total_cases'],autolag='AIC')
20 dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
21 for key,value in dftest[4].items():
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
~/anaconda3/envs/covid/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
--> 662 raise KeyError(key)
663 else:
664 return com.maybe_box(self, value, series, key)
KeyError: 'total_cases'
Я знаю, что упускаю небольшую вещь.