Я, вероятно, заскочил в Python на глубоком конце, но я следовал учебнику от начала до конца, который отлично работает, и я в целом (я думаю) понимаю.
Я пытаюсь воссоздать то, что я узнал, используя мои собственные данные.
У меня это тоже работает нормально, однако в учебнике показана линия предсказания на графике графика, сгенерированного на основе фактических данных.
Что мне нужно изменить, чтобы он предсказывал, скажем, на 28 дней вперед, а не поверх данных, которые у меня уже есть?
вот мой код (я говорю, мой ... в основном изучебник!)
from connectionstring import conn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as pyoff
import plotly.graph_objs as go
#import Keras
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.layers import LSTM
from sklearn.model_selection import KFold, cross_val_score, train_test_split
params = ("20180801","20191002")
sqlString = "SELECT ODCDAT AS DATE, SUM(ODORDQ) AS DROPIN FROM mytable.mydb WHERE ODCDAT BETWEEN %s AND %s GROUP BY ODCDAT ORDER BY ODCDAT"
command = (sqlString % params)
SQL_Query = pd.read_sql_query(command, conn)
df = pd.DataFrame(SQL_Query, columns=['DATE','DROPIN'])
df['DATE'] = pd.to_datetime(df['DATE'], format='%Y%m%d')
print(df.head(10))
#new dataframe
df_diff = df.copy()
df_diff['prev_day'] = df_diff['DROPIN'].shift(1)
df_diff = df_diff.dropna()
df_diff['diff'] = (df_diff['DROPIN'] - df_diff['prev_day'])
df_diff.head(10)
print(df_diff)
#plot monthly sales diff
plot_data = [
go.Scatter(
x=df_diff['DATE'],
y=df_diff['diff'],
)
]
plot_layout = go.Layout(
title='Daily Drop In Diff'
)
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.plot(fig)
#create dataframe for transformation from time series to supervised
df_supervised = df_diff.drop(['prev_day'],axis=1)
#adding lags
for inc in range(1,31):
field_name = 'lag_' + str(inc)
df_supervised[field_name] = df_supervised['diff'].shift(inc)
#drop null values
df_supervised = df_supervised.dropna().reset_index(drop=True)
print(df_supervised)
# Import statsmodels.formula.api
import statsmodels.formula.api as smf
# Define the regression formula
model = smf.ols(formula='diff ~ lag_1 + lag_2 + lag_3 + lag_4 + lag_5 + lag_6 + lag_7 + lag_8 + lag_9 + lag_10 + lag_11 + lag_12 + lag_13 + lag_14 + lag_15 + lag_16 + lag_17 + lag_18 + lag_19 + lag_20 + lag_21 + lag_22 + lag_23 + lag_24 + lag_24 + lag_25 + lag_26 + lag_27 + lag_28 + lag_29 + lag_30', data=df_supervised)
# Fit the regression
model_fit = model.fit()
# Extract the adjusted r-squared
regression_adj_rsq = model_fit.rsquared_adj
print(regression_adj_rsq)
#import MinMaxScaler and create a new dataframe for LSTM model
from sklearn.preprocessing import MinMaxScaler
df_model = df_supervised.drop(['DROPIN','DATE'],axis=1)
#split train and test set
train_set, test_set = df_model[0:-28].values, df_model[-28:].values
#apply Min Max Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_set)
# reshape training set
train_set = train_set.reshape(train_set.shape[0], train_set.shape[1])
train_set_scaled = scaler.transform(train_set)
# reshape test set
test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
model = Sequential()
model.add(LSTM(4, batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=100, batch_size=1, verbose=1, shuffle=False)
y_pred = model.predict(X_test,batch_size=1)
#for multistep prediction, you need to replace X_test values with the predictions coming from t-1
#reshape y_pred
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
#rebuild test set for inverse transform
pred_test_set = []
for index in range(0,len(y_pred)):
#print np.concatenate([y_pred[index],X_test[index]],axis=1)
pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))
#reshape pred_test_set
pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])
#inverse transform
pred_test_set_inverted = scaler.inverse_transform(pred_test_set)
#create dataframe that shows the predicted sales
result_list = []
sales_dates = list(df[-29:].DATE)
act_sales = list(df[-29:].DROPIN)
for index in range(0,len(pred_test_set_inverted)):
result_dict = {}
result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index])
result_dict['DATE'] = sales_dates[index+1]
result_list.append(result_dict)
df_result = pd.DataFrame(result_list)
#for multistep prediction, replace act_sales with the predicted sales
print(df_result)
#merge with actual sales dataframe
df_sales_pred = pd.merge(df,df_result,on='DATE',how='left')
#plot actual and predicted
plot_data = [
go.Scatter(
x=df_sales_pred['DATE'],
y=df_sales_pred['DROPIN'],
name='actual'
),
go.Scatter(
x=df_sales_pred['DATE'],
y=df_sales_pred['pred_value'],
name='predicted'
)
]
plot_layout = go.Layout(
title='Sales Prediction'
)
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.plot(fig)
и вот второй график и график, который я хочу предсказать заранее ...
