Сгенерированная случайным образом начальная популяция выполняется быстро, так же как и первое поколение детей, но она постепенно прогрессирует с каждым пересечением.
Обе tf.global_variables_initializer () в строке 47 и run_simulation ( строка 152 ) занимают все больше времени с каждым прогоном, добавляя около секунды к общему времени пробега одногоитерация - почему это так?
К сожалению, мне нужно все это вставить, поскольку я просто не знаю, откуда может возникнуть проблема.
Нейронная сеть находится в объекте 'policy'.
from matplotlib import pyplot as plt
import numpy as np
import random, json, math, time
import tensorflow as tf
class DecisionPolicy:
def select_action(self, current_state, step):
pass
def update_q(self, state, action, reward, next_state):
pass
class RandomDecisionPolicy(DecisionPolicy):
def __init__(self, actions):
self.actions = actions
def select_action(self, current_state, step):
action = self.actions[random.randint(0, len(self.actions) - 1)]
return action
policy = None
class QLearningDecisionPolicy(DecisionPolicy):
def __init__(self, actions, input_dim, variaDict):
self.epsilon = 0.9
self.gamma = 0.001
self.actions = actions
output_dim = len(actions)
h1_dim = 200
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
self.x = tf.placeholder(tf.float32, [None, input_dim])
self.y = tf.placeholder(tf.float32, [output_dim])
#self.W1 = tf.Variable(variaDict['W1'], name="q")
self.W1 = tf.get_variable("q", [input_dim, h1_dim])
#self.b1 = tf.Variable(variaDict['b1'], name="e")
self.b1 = tf.get_variable("e", [h1_dim])
W1_assign = self.W1.assign(variaDict['W1'])
b1_assign = self.b1.assign(variaDict['b1'])
# {'W1': tf.Variable(tf.random_normal([input_dim, h1_dim])),
# 'b1' : tf.Variable(tf.constant(0.1, shape=[h1_dim])),
# 'W2': tf.Variable(tf.random_normal([h1_dim, output_dim])),
# 'b2' : tf.Variable(tf.constant(0.1, shape=[output_dim]))}
h1 = tf.nn.relu(tf.matmul(self.x, self.W1) + self.b1)
#self.W2 = tf.Variable(variaDict['W2'], name="c")
self.W2 = tf.get_variable("c", [h1_dim, output_dim])
#self.b2 = tf.Variable(variaDict['b2'], name="R")
self.b2 = tf.get_variable("R", [output_dim])
W2_assign = self.W2.assign(variaDict['W2'])
b2_assign = self.b2.assign(variaDict['b2'])
self.q = tf.nn.relu(tf.matmul(h1, self.W2) + self.b2)
loss = tf.square(self.y - self.q)
#print(self.y, self.q, loss) ???????????? co to y
self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss)
self.sess = tf.Session()
start_time = time.time()
#print(self.sess.run(tf.report_uninitialized_variables()))
self.sess.run(tf.global_variables_initializer())
#print(len(tf.global_variables()))
print("variable initialization took %s seconds" % (time.time() - start_time))
def export_variables(self):
return {'W1':self.sess.run(self.W1), 'b1':self.sess.run(self.b1), 'W2':self.sess.run(self.W2), 'b2':self.sess.run(self.b2)}
#return {'W1': self.evW1, 'b1':self.evb1, 'W2':self.evW2, 'b2':self.evb2}
def select_action(self, current_state, step):
threshold = min(self.epsilon, step / 1000.)
if random.random() < threshold:
# Exploit best option with probability epsilon
action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state})
action_idx = np.argmax(action_q_vals) # TODO: replace w/ tensorflow's argmax
action = self.actions[action_idx]
else:
# Explore random option with probability 1 - epsilon
action = self.actions[random.randint(0, len(self.actions) - 1)]
return action
def update_q(self, state, action, reward, next_state):
action_q_vals = self.sess.run(self.q, feed_dict={self.x: state})
next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state})
next_action_idx = np.argmax(next_action_q_vals)
action_q_vals[0, next_action_idx] = reward + self.gamma * next_action_q_vals[0, next_action_idx]
action_q_vals = np.squeeze(np.asarray(action_q_vals))
self.sess.run(self.train_op, feed_dict={self.x: state, self.y: action_q_vals})
def run_simulation(initial_budget, initial_num_stocks, prices, hist, debug=False):
budget = initial_budget
num_stocks = initial_num_stocks
share_value = 0
transitions = list()
for i in range(len(prices) - hist - 1):
#if i % 100 == 0:
#print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1)))
current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks)))
current_portfolio = budget + num_stocks * share_value
action = policy.select_action(current_state, i)
share_value = float(prices[i + hist + 1])
if action == 'Buy' and budget >= share_value:
budget -= share_value
num_stocks += 1
#print('bought 1 @ ' +str(share_value))
elif action == 'Sell' and num_stocks > 0:
budget += share_value * 0.998001
num_stocks -= 1
#print('sold 1 @ ' +str(share_value))
else:
action = 'Hold'
new_portfolio = budget + num_stocks * share_value
reward = new_portfolio - current_portfolio
next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget, num_stocks)))
transitions.append((current_state, action, reward, next_state))
policy.update_q(current_state, action, reward, next_state)
portfolio = budget + num_stocks * share_value
if debug:
print('${}\t{} shares'.format(budget, num_stocks))
return portfolio
def run_simulations(budget, num_stocks, prices, hist, num_tries, variaDict=None, selection=None):
global policy
final_portfolios = list()
for i in range(num_tries):
#print(i)
if variaDict != None:
policy = QLearningDecisionPolicy(actions, hist + 2, variaDict)
elif selection != None:
newPolicyVariasDict = crossover_with_mut(random.choice(selection), random.choice(selection))
#print(newPolicyVariasDict)
#tfDict = {'W1': tf.Variable(newPolicyVariasDict['W1']),
# 'b1' : tf.Variable(newPolicyVariasDict['b1']),
# 'W2': tf.Variable(newPolicyVariasDict['W2']),
# 'b2' : tf.Variable(newPolicyVariasDict['b2'])}
#print(tfDict['W1'].dtype)
#start_time = time.time()
policy = QLearningDecisionPolicy(actions, hist + 2, newPolicyVariasDict)
start_time = time.time()
final_portfolio = run_simulation(budget, num_stocks, prices, hist)
print("simulation took %s seconds" % (time.time() - start_time))
#print(policy.sess.run(policy.b2))
final_portfolios.append(final_portfolio)
scoresList.append(final_portfolio)
scoresDict[final_portfolio] = policy.export_variables()#tu moga byc dwa takie same yolo
policy.sess.close()
avg, std = np.mean(final_portfolios), np.std(final_portfolios)
plt.clf()
plt.title('Final Portfolio Value')
plt.xlabel('Simulation #')
plt.ylabel('Net worth')
plt.plot(final_portfolios)
#plt.show()
return avg, std, final_portfolios
def get_prices(cache_filename='stock_prices.txt'):
#stock_prices = np.load(cache_filename)
try:
with open(cache_filename, 'r') as f:
stock_prices = json.load(f)
except:
stock_prices = [4996.98, 4996.99, 4996.49, 4996.98, 4996.99, 4996.99, 4997.26, 4997.26, 4997.29, 4997.3, 4997.34, 4995.15, 4995.15, 4995.15, 4995.13, 4997.32, 4997.32, 4996.74, 4995.03, 4995.03, 4995.02, 4997.33, 4996.41, 4996.4, 4997.34, 4997.32, 4997.4, 4997.5, 4997.85, 4996.31, 4996.31, 4996.87, 4996.31, 4996.31, 4997.1, 4996.34, 4996.37, 4996.37, 4997.09, 4996.36, 4996.37, 4996.36, 4996.36, 4996.36, 4996.38, 4996.38, 4996.38, 4995.09, 4996.38, 4996.38, 4997.3, 4997.31, 4997.33, 4996.17, 4996.36, 4995.11, 4996.03, 4995.15, 4995.11, 4995.1, 4995.09, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.0, 4995.03, 4995.01, 4995.0, 4995.0, 4995.03, 4995.03, 4995.03, 4995.02, 4995.02, 4995.01, 4995.0, 4995.01, 4995.0, 4994.98, 4994.16, 4995.01, 4995.01, 4995.01, 4994.44, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.0, 4995.03, 4995.03, 4995.03, 4995.01, 4995.01, 4995.01, 4995.03, 4995.03, 4994.3, 4995.03, 4995.01, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.02, 4995.03, 4995.0, 4995.01, 4995.01, 4995.03, 4995.0, 4994.06, 4993.97, 4993.97, 4993.81, 4993.8, 4993.7, 4993.01, 4992.99, 4992.69, 4992.51, 4992.42, 4992.37, 4992.27, 4992.01, 4992.0, 4991.87, 4994.98, 4994.47, 4994.97, 4994.97, 4994.97, 4994.58, 4994.95, 4994.97, 4994.97, 4994.97, 4993.95, 4993.79, 4993.96, 4994.39, 4994.82, 4994.78, 4994.78, 4994.79, 4993.98, 4994.0, 4994.32, 4994.0, 4993.98, 4993.96, 4993.96, 4994.09, 4992.45, 4994.06, 4994.06, 4992.45, 4992.37, 4992.35, 4992.35, 4992.25, 4992.14, 4993.31, 4994.07, 4994.03, 4993.38, 4994.03, 4994.0, 4992.51, 4993.25, 4994.0, 4994.01, 4992.51, 4992.48, 4994.0, 4994.0, 4993.97, 4993.35, 4992.53, 4993.13, 4993.94, 4993.94, 4992.49, 4992.48, 4993.91, 4993.91, 4993.91, 4993.48, 4992.52, 4992.52, 4992.52, 4992.49, 4993.89, 4992.49, 4992.48, 4992.49, 4993.89, 4993.9, 4992.52, 4992.51, 4994.74, 4993.97, 4993.97, 4994.74, 4994.75, 4993.99, 4993.97, 4994.46, 4994.75, 4994.75, 4994.75, 4993.99, 4993.97, 4994.49, 4994.75, 4994.0, 4994.75, 4994.75, 4994.43, 4994.02, 4994.75, 4994.27, 4994.02, 4994.75, 4994.02, 4994.02, 4994.41, 4994.0, 4994.02, 4994.29, 4994.0, 4994.75, 4994.75, 4994.75, 4994.02, 4994.02, 4994.35, 4994.02, 4994.0, 4994.0, 4994.0, 4994.75, 4994.02, 4994.5, 4994.73, 4994.0, 4994.73, 4994.0, 4994.73, 4994.73, 4994.73, 4994.75, 4994.75, 4994.73, 4994.43, 4994.73, 4994.75, 4994.99, 4995.0, 4995.0, 4995.02, 4995.03, 4995.03, 4995.03, 4995.03, 4995.01, 4994.8, 4994.69, 4994.69, 4995.01, 4995.03, 4995.03, 4995.03, 4994.81, 4995.03, 4994.74, 4994.75, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4994.79, 4994.93, 4995.03, 4995.03, 4995.03, 4994.87, 4994.85, 4994.86, 4994.87, 4994.98]
return stock_prices
def plot_prices(prices):
plt.title('BTC/USDT')
plt.xlabel('tick #')
plt.ylabel('price (USDT)')
plt.plot(prices)
#plt.show()
plt.savefig('prices.png')
def crossover_with_mut(vars1, vars2):
MUT_PROBABILITY = 0.1
child = {'W1': [], 'b1': [], 'W2': [], 'b2': []}
#W1
for rowidx in range(len(vars1['W1'])):
inner = []
for colidx in range(len(vars1['W1'][0])):
if random.random() < MUT_PROBABILITY:
if random.random() < 0.5:
inner.append(vars1['W1'][rowidx][colidx])
else:
inner.append(vars2['W1'][rowidx][colidx])
else:
inner.append(random.uniform(-2, 2))
child['W1'].append(inner)
#b1
for rowidx in range(len(vars1['b1'])):
if random.random() < MUT_PROBABILITY:
if random.random() < 0.5:
child['b1'].append(vars1['b1'][rowidx])
else:
child['b1'].append(vars2['b1'][rowidx])
else:
child['b1'].append(random.uniform(0.08, 0.12))
#W2
for rowidx in range(len(vars1['W2'])):
inner = []
for colidx in range(len(vars1['W2'][0])):
if random.random() < MUT_PROBABILITY:
if random.random() < 0.5:
inner.append(vars1['W2'][rowidx][colidx])
else:
inner.append(vars2['W2'][rowidx][colidx])
else:
inner.append(random.uniform(-2, 2))
child['W2'].append(inner)
#b2
for rowidx in range(len(vars1['b2'])):
if random.random() < MUT_PROBABILITY:
if random.random() < 0.5:
child['b2'].append(vars1['b2'][rowidx])
else:
child['b2'].append(vars2['b2'][rowidx])
else:
child['b2'].append(random.uniform(0.08, 0.12))
return child
if __name__ == '__main__':
prices = get_prices()
plot_prices(prices)
actions = ['Buy', 'Sell', 'Hold']
hist = 200
pop_size = 10
# policy = RandomDecisionPolicy(actions)
budget = 100000.0
num_stocks = 0
while len(prices) < hist:
print('jeszcze chwileczkę')
time.sleep(2)
maxGenerations = 100
selection = []
for gen in range(maxGenerations):
#print('gen '+str(gen))
scoresList = []
scoresDict = {}
start_time = time.time()
if gen == 0:
input_dim = hist + 2
h1_dim = 200
output_dim = len(actions)
#randoDict = {'W1': tf.Variable(tf.random_normal([input_dim, h1_dim])),
# 'b1' : tf.Variable(tf.constant(0.1, shape=[h1_dim])),
# 'W2': tf.Variable(tf.random_normal([h1_dim, output_dim])),
# 'b2' : tf.Variable(tf.constant(0.1, shape=[output_dim]))}
randoDict = {'W1': tf.random_normal([input_dim, h1_dim]),
'b1' : tf.constant(0.1, shape=[h1_dim]),
'W2': tf.random_normal([h1_dim, output_dim]),
'b2' : tf.constant(0.1, shape=[output_dim])}
avg, std, final_portfolios = run_simulations(budget, num_stocks, prices, hist, pop_size, variaDict = randoDict)
#print(avg, std)
#print(scoresDict[scoresList[-1]]['b2'])
else:
avg, std, final_portfolios = run_simulations(budget, num_stocks, prices, hist, pop_size, selection = selection)
#print(avg, std)
print('###FITTEST###: ' + str(max(final_portfolios)) + ' elapsed: ' +str(time.time()-start_time))
scoresList.sort()
best = []
bestScores = scoresList[-math.ceil(0.3*pop_size):] #todo stala 0.3
for score in bestScores:
best.append(scoresDict[score])
luckers = []
for i in range(math.ceil(0.2*pop_size)): #todo stala 0.2
luckers.append(scoresDict[random.choice(scoresList[:-math.ceil(0.3*pop_size)])])
selection = best + luckers
#print(selection)
#print(selection[0]['b2'])
#print(selection[1]['b2'])
#print(policy.sess.run(scoresDict[scoresList[0]]['W1']))
При каждом выполнении требуется больше времени для инициализации и оптимизации переменных тензорного потока.В чем корень этого поведения?