В эти дни я только начал играть с Reinforcement Learning, и я нашел стратегию Natural Evolution, вроде как понимаю, как она работает, но я очень плохо знаком с Python и нашел этот код, который в основном реализует NES алгоритм
https://github.com/huseinzol05/Stock-Prediction-Models/blob/master/agent/updated-NES-google.ipynb
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
import random
sns.set()
# CSV containing the TSLA stock predictions in the form of
# [Date, Open, High, Low, Close, Adj Close, Volume] from
# Yahoo! Finance
df = pd.read_csv('TSLA.csv')
df.head()
def get_state(data, t, n):
d = t - n + 1
block = data[d : t + 1] if d >= 0 else -d * [data[0]] + data[0 : t + 1]
res = []
for i in range(n - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
close = df.Close.values.tolist()
window_size = 30
skip = 1
l = len(close) - 1
class Deep_Evolution_Strategy:
inputs = None
def __init__(
self, weights, reward_function, population_size, sigma, learning_rate
):
self.weights = weights
self.reward_function = reward_function
self.population_size = population_size
self.sigma = sigma
self.learning_rate = learning_rate
def _get_weight_from_population(self, weights, population):
weights_population = []
for index, i in enumerate(population):
jittered = self.sigma * i
weights_population.append(weights[index] + jittered)
return weights_population
def get_weights(self):
return self.weights
def train(self, epoch = 100, print_every = 1):
lasttime = time.time()
for i in range(epoch):
population = []
rewards = np.zeros(self.population_size)
for k in range(self.population_size):
x = []
for w in self.weights:
x.append(np.random.randn(*w.shape))
population.append(x)
for k in range(self.population_size):
weights_population = self._get_weight_from_population(self.weights, population[k])
rewards[k] = self.reward_function(weights_population)
rewards = (rewards - np.mean(rewards)) / np.std(rewards)
for index, w in enumerate(self.weights):
A = np.array([p[index] for p in population])
self.weights[index] = (
w
+ self.learning_rate
/ (self.population_size * self.sigma)
* np.dot(A.T, rewards).T
)
class Model:
def __init__(self, input_size, layer_size, output_size):
self.weights = [
np.random.randn(input_size, layer_size),
np.random.randn(layer_size, output_size),
np.random.randn(layer_size, 1),
np.random.randn(1, layer_size),
]
def predict(self, inputs):
feed = np.dot(inputs, self.weights[0]) + self.weights[-1]
decision = np.dot(feed, self.weights[1])
buy = np.dot(feed, self.weights[2])
return decision, buy
def get_weights(self):
return self.weights
def set_weights(self, weights):
self.weights = weights
class Agent:
POPULATION_SIZE = 15
SIGMA = 0.1
LEARNING_RATE = 0.03
def __init__(self, model, money, max_buy, max_sell):
self.model = model
self.initial_money = money
self.max_buy = max_buy
self.max_sell = max_sell
self.es = Deep_Evolution_Strategy(
self.model.get_weights(),
self.get_reward,
self.POPULATION_SIZE,
self.SIGMA,
self.LEARNING_RATE,
)
def act(self, sequence):
decision, buy = self.model.predict(np.array(sequence))
return np.argmax(decision[0]), int(buy[0])
def get_reward(self, weights):
initial_money = self.initial_money
starting_money = initial_money
self.model.weights = weights
state = get_state(close, 0, window_size + 1)
inventory = []
quantity = 0
for t in range(0, l, skip):
action, buy = self.act(state)
next_state = get_state(close, t + 1, window_size + 1)
if action == 1 and initial_money >= close[t]:
if buy < 0:
buy = 1
if buy > self.max_buy:
buy_units = self.max_buy
else:
buy_units = buy
total_buy = buy_units * close[t]
initial_money -= total_buy
inventory.append(total_buy)
quantity += buy_units
elif action == 2 and len(inventory) > 0:
if quantity > self.max_sell:
sell_units = self.max_sell
else:
sell_units = quantity
quantity -= sell_units
total_sell = sell_units * close[t]
initial_money += total_sell
state = next_state
return ((initial_money - starting_money) / starting_money) * 100
def fit(self, iterations, checkpoint):
self.es.train(iterations, print_every = checkpoint)
def buy(self):
initial_money = self.initial_money
state = get_state(close, 0, window_size + 1)
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
quantity = 0
for t in range(0, l, skip):
action, buy = self.act(state)
next_state = get_state(close, t + 1, window_size + 1)
if action == 1 and initial_money >= close[t]:
if buy < 0:
buy = 1
if buy > self.max_buy:
buy_units = self.max_buy
else:
buy_units = buy
total_buy = buy_units * close[t]
initial_money -= total_buy
inventory.append(total_buy)
quantity += buy_units
states_buy.append(t)
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
if quantity > self.max_sell:
sell_units = self.max_sell
else:
sell_units = quantity
if sell_units < 1:
continue
quantity -= sell_units
total_sell = sell_units * close[t]
initial_money += total_sell
states_sell.append(t)
try:
invest = ((total_sell - bought_price) / bought_price) * 100
except:
invest = 0
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
model = Model(window_size, 500, 3)
agent = Agent(model, 10000, 5, 5)
agent.fit(500, 10)
agent.buy()
Как вы видите, он используется для прогнозирования акций и использует только столбец Close, но я хотел бы попробовать его с большим количеством параметров, скажем, High и Low.
Я изо всех сил, когда мне нужно изменить его, чтобы использовать этот двухмерный список. Я попробовал простое изменение:
close = df.loc[:,['Close','Open']].values.tolist()
Который добавляет еще одно свойство в каждой строке списка. Но когда я запускаю код, я начинаю видеть ошибки, когда выполняю вызов agent.fit ():
agent.fit(iterations = 500, checkpoint = 10)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-225-d97697984016> in <module>()
----> 1 agent.fit(iterations = 500, checkpoint = 10)
<ipython-input-223-35d9fbba5756> in fit(self, iterations, checkpoint)
66
67 def fit(self, iterations, checkpoint):
---> 68 self.es.train(iterations, print_every = checkpoint)
69
70 def buy(self):
<ipython-input-220-84ca345091f4> in train(self, epoch, print_every)
33 self.weights, population[k]
34 )
---> 35 rewards[k] = self.reward_function(weights_population)
36 rewards = (rewards - np.mean(rewards)) / np.std(rewards)
37
<ipython-input-223-35d9fbba5756> in get_reward(self, weights)
36
37 self.model.weights = weights
---> 38 state = get_state(self.close, 0, self.window_size + 1)
39 inventory = []
40 quantity = 0
<ipython-input-219-0df8d8be24a9> in get_state(data, t, n)
4 res = []
5 for i in range(n - 1):
----> 6 res.append(block[i + 1] - block[i])
7 return np.array([res])
TypeError: unsupported operand type(s) for -: 'list' and 'list'
Я предполагаю, что первым шагом является то, что мне нужно обновить класс Model, чтобы использовать другой параметр input_size, верно?
Любая помощь будет принята с благодарностью! Спасибо