Bitcoin прогноз цены с LSTM - PullRequest
       22

Bitcoin прогноз цены с LSTM

0 голосов
/ 01 марта 2020

Я пытаюсь использовать LSTM для прогнозирования изменения цены bitcoin в следующий момент времени.

Мой код указан ниже. CSV-файл находится здесь https://github.com/yiluzhou/quantitative-trading/blob/master/USDT_BTC.csv

import datetime
import pandas as pd
import numpy as np
import time
import warnings

#import USDT_BTC data (3 months data, frequency: 5min)
USDT_BTC_file = 'USDT_BTC.csv'
df_USDT_BTC = pd.read_csv(USDT_BTC_file, index_col = 'date', parse_dates=True)

#Backfilling Missing Data & Replace zeros with previous non zero value
df_USDT_BTC.fillna(method="backfill", inplace=True)
df_USDT_BTC['volume'].replace(to_replace=0, method='ffill', inplace=True)

#set index
df_USDT_BTC.index = pd.to_datetime(df_USDT_BTC.index, unit='s')

#Pre-processing columns
df_USDT_BTC['open_log'] = np.log(df_USDT_BTC['open'])
df_USDT_BTC['high_log'] = np.log(df_USDT_BTC['high'])
df_USDT_BTC['low_log'] = np.log(df_USDT_BTC['low'])
df_USDT_BTC['close_log'] = np.log(df_USDT_BTC['close'])
df_USDT_BTC['volume_log'] = np.log(df_USDT_BTC['volume'])

#To predict close price minus open price
df_USDT_BTC['close_minus_open_log'] = df_USDT_BTC['close_log'] - df_USDT_BTC['open_log']

#Keep useful columns only
df_USDT_BTC = df_USDT_BTC[['open_log', 'high_log', 'low_log', 'close_log','volume_log', 'close_minus_open_log']]
input_column_number = df_USDT_BTC.shape[1]

#Training set = 90%, test set = 10%
Train_test_split = 0.9
n = int(len(df_USDT_BTC) * Train_test_split)
X_train = df_USDT_BTC.iloc[:n]
Y_train = df_USDT_BTC['close_minus_open_log'].iloc[:n]
print('X_train.shape =', X_train.shape)
print('Y_train.shape =', Y_train.shape)

X_test = df_USDT_BTC.iloc[n:]
Y_test = df_USDT_BTC['close_minus_open_log'].iloc[n:]

#Reshape your data either using array.reshape(-1, 1) if your data has a single feature 
#or array.reshape(1, -1) if it contains a single sample.
Y_train = np.expand_dims(Y_train, -1)
Y_test = np.expand_dims(Y_test, -1)
print('Y_train.shape =', Y_train.shape)
print('Y_test.shape =', Y_test.shape)

#Scale Data
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler(feature_range=(0, 1))
Y_scaler = MinMaxScaler(feature_range=(0, 1))

#ONLY FIT TO TRAININ DATA
X_scaler.fit(X_train)
Y_scaler.fit(Y_train)
scaled_X_train = X_scaler.transform(X_train)
scaled_X_test = X_scaler.transform(X_test)
print('scaled_X_train.shape =', scaled_X_train.shape)
print('scaled_X_test.shape =', scaled_X_test.shape)
scaled_Y_train = Y_scaler.transform(Y_train)
scaled_Y_test = Y_scaler.transform(Y_test)
print('scaled_Y_train.shape =', scaled_Y_train.shape)
print('scaled_Y_test.shape =', scaled_Y_test.shape)


from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

# define generator. Input:previous 1000 steps
n_input = 1000
Train_generator = TimeseriesGenerator(scaled_X_train, scaled_X_train, length=n_input, batch_size=1)
Test_generator = TimeseriesGenerator(scaled_X_test, scaled_X_test, length=n_input, batch_size=1)
print('scaled_X_train =\n', scaled_X_train)
print('scaled_X_test =\n', scaled_X_test)

df_scaled_Y_train = pd.DataFrame(scaled_Y_train)
df_scaled_Y_train.to_csv('df_scaled_Y_train.csv', index = True, header=True)
df_scaled_Y_test = pd.DataFrame(scaled_Y_test)
df_scaled_Y_test.to_csv('df_scaled_Y_test.csv', index = True, header=True)

print('len(Train_generator)=', len(Train_generator))


from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input, Activation
from tensorflow.keras import optimizers
from sklearn.metrics import mean_squared_error
import math
import os
import tensorflow as tf
os.environ["PATH"] += os.pathsep + 'C:\\Program Files (x86)\\Graphviz2.38\\bin\\'

# basic model architecture
lookback_points = 1000

lstm_input = Input(shape=(lookback_points, input_column_number), name='lstm_input')
x = LSTM(100, name='lstm_0')(lstm_input)
x = Dropout(0.2, name='lstm_dropout_0')(x)
x = Dense(128, name='dense_0')(x)
x = Activation('relu', name='relu_0')(x)
x = Dense(1, name='dense_1')(x)
output = Activation('relu', name='output')(x)    

model = Model(inputs=lstm_input, outputs=output)
adam = optimizers.Adam(lr=0.005)
model.compile(optimizer='adam', loss='mse')


from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True, dpi = 300)
from IPython.display import Image
Image(filename='model.png')


print('scaled_X_train.shape =', scaled_X_train.shape)
print('scaled_X_test.shape =', scaled_X_test.shape)
seqModel = model.fit(
    Train_generator, 
    epochs = 25, 
    verbose = 1, 
    validation_data = Test_generator)

#Save training model
model.save('basic_model.h')

Я получил следующие результаты. Потеря не уменьшилась во время обучения. Я не уверен, неправильно ли реализован LSTM или неправильно настроена сеть LSTM. Любые комментарии или предложения приветствуются !!!

X_train.shape = (24084, 6)
Y_train.shape = (24084,)
Y_train.shape = (24084, 1)
Y_test.shape = (2677, 1)
scaled_X_train.shape = (24084, 6)
scaled_X_test.shape = (2677, 6)
scaled_Y_train.shape = (24084, 1)
scaled_Y_test.shape = (2677, 1)
scaled_X_train =
 [[0.34446912 0.33958391 0.34683305 0.34547543 0.4847738  0.44761373]
 [0.34561656 0.34087025 0.34689703 0.34727077 0.58093924 0.45588245]
 [0.34746962 0.3420396  0.34983022 0.34797463 0.48180663 0.4412361 ]
 ...
 [0.82245147 0.82182239 0.82403983 0.82187215 0.61153938 0.42988105]
 [0.82220815 0.82181428 0.82065202 0.81919019 0.60364405 0.39877913]
 [0.81952485 0.81953472 0.82016479 0.81799437 0.73950252 0.4177353 ]]
scaled_X_test =
 [[0.81952485 0.81933986 0.81989491 0.82089771 0.56736163 0.4547625 ]
 [0.8190607  0.82157686 0.82089613 0.82260236 0.61230716 0.48241941]
 [0.82293841 0.82304913 0.8228225  0.82065396 0.63405394 0.40813753]
 ...
 [0.87031582 0.86977699 0.87147625 0.87105801 0.34266333 0.44698384]
 [0.87136746 0.86973014 0.87062708 0.86849229 0.4731583  0.4008562 ]
 [0.87048537 0.86973278 0.87226353 0.87012989 0.46587671 0.43298599]]
len(Train_generator)= 23084
scaled_X_train.shape = (24084, 6)
scaled_X_test.shape = (2677, 6)
Train for 23084 steps, validate for 1677 steps
Epoch 1/25
23084/23084 [==============================] - 900s 39ms/step - loss: 0.0233 - val_loss: 0.0364
Epoch 2/25
23084/23084 [==============================] - 973s 42ms/step - loss: 0.0223 - val_loss: 0.0367
Epoch 3/25
23084/23084 [==============================] - 984s 43ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 4/25
23084/23084 [==============================] - 989s 43ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 5/25
23084/23084 [==============================] - 957s 41ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 6/25
23084/23084 [==============================] - 989s 43ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 7/25
23084/23084 [==============================] - 989s 43ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 8/25
23084/23084 [==============================] - 984s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 9/25
23084/23084 [==============================] - 987s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 10/25
23084/23084 [==============================] - 990s 43ms/step - loss: 0.0221 - val_loss: 0.0365
Epoch 11/25
23084/23084 [==============================] - 994s 43ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 12/25
23084/23084 [==============================] - 994s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 13/25
23084/23084 [==============================] - 994s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 14/25
23084/23084 [==============================] - 993s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 15/25
23084/23084 [==============================] - 990s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 16/25
23084/23084 [==============================] - 992s 43ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 17/25
23084/23084 [==============================] - 990s 43ms/step - loss: 0.0221 - val_loss: 0.0366
Epoch 18/25
23084/23084 [==============================] - 992s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 19/25
23084/23084 [==============================] - 994s 43ms/step - loss: 0.0222 - val_loss: 0.0364
Epoch 20/25
23084/23084 [==============================] - 993s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 21/25
23084/23084 [==============================] - 992s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 22/25
23084/23084 [==============================] - 990s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 23/25
23084/23084 [==============================] - 993s 43ms/step - loss: 0.0221 - val_loss: 0.0365
Epoch 24/25
23084/23084 [==============================] - 993s 43ms/step - loss: 0.0221 - val_loss: 0.0364
Epoch 25/25
23084/23084 [==============================] - 993s 43ms/step - loss: 0.0221 - val_loss: 0.0364
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\ops\resource_variable_ops.py:1781: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: basic_model.h\assets
...