Я пытался внедрить код, доступный от @Daniel Möller, в мои данные. Это проблема прогнозирования временных рядов с использованием обучения LSTM.
https://github.com/danmoller/TestRepo/blob/master/TestBookLSTM.ipynb
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional
from sklearn.metrics import mean_squared_error, accuracy_score
from scipy.stats import linregress
from keras.callbacks import EarlyStopping
fi = 'pollution.csv'
raw = pd.read_csv(fi, delimiter=',')
raw = raw.drop('Dates', axis=1)
print (raw.shape)
scaler = MinMaxScaler(feature_range=(-1, 1))
raw = scaler.fit_transform(raw)
n_rows = raw.shape[0]
n_feats = raw.shape[1]
time_shift = 7
train_size = int(n_rows * 0.8)
train_data = raw[:train_size, :]
test_data = raw[train_size:, :]
x_train = train_data[:-time_shift, :]
x_test = test_data[:-time_shift,:]
x_predict = raw[:-time_shift,:]
y_train = train_data[time_shift:, :]
y_test = test_data[time_shift:,:]
y_predict_true = raw[time_shift:,:]
x_train = x_train.reshape(1, x_train.shape[0], x_train.shape[1])
y_train = y_train.reshape(1, y_train.shape[0], y_train.shape[1])
x_test = x_test.reshape(1, x_test.shape[0], x_test.shape[1])
y_test = y_test.reshape(1, y_test.shape[0], y_test.shape[1])
x_predict = x_predict.reshape(1, x_predict.shape[0], x_predict.shape[1])
y_predict_true = y_predict_true.reshape(1, y_predict_true.shape[0], y_predict_true.shape[1])
print (x_train.shape)
print (y_train.shape)
print (x_test.shape)
print (y_test.shape)
model = Sequential()
model.add(LSTM(64,return_sequences=True,input_shape=(None, n_feats)))
model.add(LSTM(32,return_sequences=True))
model.add(LSTM(n_feats,return_sequences=True))
stop = EarlyStopping(monitor='loss',min_delta=0.000000000001,patience=30)
model.compile(loss='mse', optimizer='Adam')
model.fit(x_train,y_train,epochs=10,callbacks=[stop],verbose=2,validation_data=(x_test,y_test))
newModel = Sequential()
newModel.add(LSTM(64,return_sequences=True,stateful=True,batch_input_shape=(1,None,n_feats)))
newModel.add(LSTM(32,return_sequences=True,stateful=True))
newModel.add(LSTM(n_feats,return_sequences=False,stateful=True))
newModel.set_weights(model.get_weights())
newModel.reset_states()
lastSteps = np.empty((1, n_rows, n_feats))
lastSteps[:,:time_shift] = x_predict[:,-time_shift:]
newModel.predict(x_predict).reshape(1,1,n_feats)
rangeLen = n_rows - time_shift
for i in range(rangeLen):
lastSteps[:,i+time_shift] = newModel.predict(lastSteps[:,i:i+1,:]).reshape(1,1,n_feats)
forecastFromSelf = lastSteps[:,time_shift:,:]
print (forecastFromSelf.shape)
forecastFromSelf = scaler.inverse_transform(forecastFromSelf.reshape(forecastFromSelf.shape[1],forecastFromSelf.shape[2]))
y_predict_true = scaler.inverse_transform(y_predict_true.reshape(y_predict_true.shape[1],y_predict_true.shape[2]))
plt.plot(y_predict_true[:,0], color='b', label='True')
plt.plot(forecastFromSelf[:,0],color='r', label='Predict')
plt.legend()
plt.title("Self forcast (Feat 1)")
plt.show()
newModel.reset_states()
newModel.predict(x_predict)
newSteps = []
for i in range(x_predict.shape[1]):
newSteps.append(newModel.predict(x_predict[:,i:i+1,:]))
forecastFromInput = np.asarray(newSteps).reshape(1,x_predict.shape[1],n_feats)
print (forecastFromInput.shape)
forecastFromInput = scaler.inverse_transform(forecastFromInput.reshape(forecastFromInput.shape[1],forecastFromInput.shape[2]))
plt.plot(y_predict_true[:,0], color='b', label='True')
plt.plot(forecastFromInput[:,0], color='r', label='Predict')
plt.legend()
plt.title("Forecast from input (Feat 1)")
plt.show()
Прогнозы могут быть увеличены путем увеличения слоев модели и количества эпох.
Однако здесь возникает вопрос: почему «Самопрогноз» хуже, чем «Прогноз от ввода»?
Данные о загрязнении здесь: https://github.com/sirjanrocky/some-neural-tests-for-study/blob/master/pollution.csv
Этот код работает без ошибок. Вы также можете попробовать это