Невещательная ошибка при тестировании LSTM - PullRequest
0 голосов
/ 31 января 2020

Я пытаюсь создать единичный вывод («Цена открытия») с 2 входными переменными («Открыть», «Высокий»). Это код ниже:

import pandas as pd
import numpy as np
import pandas_datareader as web
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

#Load the data for training
data_training_complete = web.get_data_yahoo('AAPL', start='2013-01-01', end='2017-12-31')
data_training_processed = data_training_complete.loc[:, ['Open','High']].values

#Normalise the input data
min_max_scaler = MinMaxScaler(feature_range=(0,1))
data_training_scaled = min_max_scaler.fit_transform(data_training_processed)

#Split the data into X_train data and Y_train data and reshape to 3D for feeding input into LSTM
X_train = []
y_train = []
for i in range(60, 1260): 
    X_train.append(data_training_scaled[i-60:i, :])
    y_train.append(data_training_scaled[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 2))

#Train the model
regressor = Sequential()

regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 2)))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

regressor.add(Dense(units = 1))

#Optimise the weightings of the model
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.fit(X_train, y_train, epochs = 3, batch_size = 32)

#load in testing data
data_test_complete = web.get_data_yahoo('AAPL', start='2018-01-01', end='2019-12-31')
data_test_processed = data_test_complete.loc[:, ['Open','High']].values

#Join testing data to the training data
data_total = pd.concat((data_training_complete[['Open','High']], data_test_complete[['Open','High']]), axis = 0)
test_inputs = data_total[len(data_total) - len(data_test_complete) - 60:].values
test_inputs = test_inputs.reshape(-1,2)

#Normalise the testing inputs
test_inputs = min_max_scaler.transform(test_inputs)

#predict using testing data
X_test = []
for i in range(60,500):
    X_test.append(test_inputs[i-60:i, :])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 2))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = min_max_scaler.inverse_transform(predicted_stock_price)

#Visualise results
plt.plot(data_test_processed, color = 'black', label = 'AAPL Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted AAPL Stock Price')
plt.title('AAPL Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('AAPL Stock Price')
plt.legend()
plt.show()

Я получаю сообщение об ошибке в этой строке кода здесь:

predicted_stock_price = min_max_scaler.inverse_transform(predicted_stock_price)

производит:

ValueError: non-broadcastable output operand with shape (1126,1) doesn't match the broadcast shape (1126,2)

Я думаю Я знаю, может быть, почему это происходит. X_test имеет форму (440, 60, 2), но предиктором Y будет только 1 вывод, поэтому (440,1). Как я могу обойти это?

...