Выполнение Tensorflow + генетического алгоритма резко замедляется по мере прохождения поколений - PullRequest
0 голосов
/ 05 апреля 2019

Сгенерированная случайным образом начальная популяция выполняется быстро, так же как и первое поколение детей, но она постепенно прогрессирует с каждым пересечением.

Обе tf.global_variables_initializer () в строке 47 и run_simulation ( строка 152 ) занимают все больше времени с каждым прогоном, добавляя около секунды к общему времени пробега одногоитерация - почему это так?

К сожалению, мне нужно все это вставить, поскольку я просто не знаю, откуда может возникнуть проблема.

Нейронная сеть находится в объекте 'policy'.

from matplotlib import pyplot as plt
import numpy as np
import random, json, math, time
import tensorflow as tf


class DecisionPolicy:
    def select_action(self, current_state, step):
        pass

    def update_q(self, state, action, reward, next_state):
        pass


class RandomDecisionPolicy(DecisionPolicy):
    def __init__(self, actions):
        self.actions = actions

    def select_action(self, current_state, step):
        action = self.actions[random.randint(0, len(self.actions) - 1)]
        return action

policy = None

class QLearningDecisionPolicy(DecisionPolicy):
    def __init__(self, actions, input_dim, variaDict):
        self.epsilon = 0.9
        self.gamma = 0.001
        self.actions = actions
        output_dim = len(actions)
        h1_dim = 200

        with tf.variable_scope("model", reuse=tf.AUTO_REUSE):

            self.x = tf.placeholder(tf.float32, [None, input_dim])
            self.y = tf.placeholder(tf.float32, [output_dim])
            #self.W1 = tf.Variable(variaDict['W1'], name="q")
            self.W1 = tf.get_variable("q", [input_dim, h1_dim])
            #self.b1 = tf.Variable(variaDict['b1'], name="e")
            self.b1 = tf.get_variable("e", [h1_dim])
            W1_assign = self.W1.assign(variaDict['W1'])
            b1_assign = self.b1.assign(variaDict['b1'])

                # {'W1': tf.Variable(tf.random_normal([input_dim, h1_dim])),
                #  'b1' : tf.Variable(tf.constant(0.1, shape=[h1_dim])),        
                # 'W2': tf.Variable(tf.random_normal([h1_dim, output_dim])),
                # 'b2' : tf.Variable(tf.constant(0.1, shape=[output_dim]))}      

            h1 = tf.nn.relu(tf.matmul(self.x, self.W1) + self.b1)
            #self.W2 = tf.Variable(variaDict['W2'], name="c")
            self.W2 = tf.get_variable("c", [h1_dim, output_dim])
            #self.b2 = tf.Variable(variaDict['b2'], name="R")
            self.b2 = tf.get_variable("R", [output_dim])

            W2_assign = self.W2.assign(variaDict['W2'])
            b2_assign = self.b2.assign(variaDict['b2'])

            self.q = tf.nn.relu(tf.matmul(h1, self.W2) + self.b2)

            loss = tf.square(self.y - self.q)
            #print(self.y, self.q, loss) ???????????? co to y
            self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss)
            self.sess = tf.Session()        

            start_time = time.time()
            #print(self.sess.run(tf.report_uninitialized_variables()))
            self.sess.run(tf.global_variables_initializer())
            #print(len(tf.global_variables()))

            print("variable initialization took %s seconds" % (time.time() - start_time))

    def export_variables(self):
        return {'W1':self.sess.run(self.W1), 'b1':self.sess.run(self.b1), 'W2':self.sess.run(self.W2), 'b2':self.sess.run(self.b2)}
        #return {'W1': self.evW1, 'b1':self.evb1, 'W2':self.evW2, 'b2':self.evb2} 

    def select_action(self, current_state, step):
        threshold = min(self.epsilon, step / 1000.)
        if random.random() < threshold:
            # Exploit best option with probability epsilon
            action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state})
            action_idx = np.argmax(action_q_vals)  # TODO: replace w/ tensorflow's argmax
            action = self.actions[action_idx]
        else:
            # Explore random option with probability 1 - epsilon
            action = self.actions[random.randint(0, len(self.actions) - 1)]
        return action

    def update_q(self, state, action, reward, next_state):
        action_q_vals = self.sess.run(self.q, feed_dict={self.x: state})
        next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state})
        next_action_idx = np.argmax(next_action_q_vals)
        action_q_vals[0, next_action_idx] = reward + self.gamma * next_action_q_vals[0, next_action_idx]
        action_q_vals = np.squeeze(np.asarray(action_q_vals))
        self.sess.run(self.train_op, feed_dict={self.x: state, self.y: action_q_vals})


def run_simulation(initial_budget, initial_num_stocks, prices, hist, debug=False):
    budget = initial_budget
    num_stocks = initial_num_stocks
    share_value = 0
    transitions = list()
    for i in range(len(prices) - hist - 1):
        #if i % 100 == 0:
            #print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1)))
        current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks)))
        current_portfolio = budget + num_stocks * share_value
        action = policy.select_action(current_state, i)
        share_value = float(prices[i + hist + 1])
        if action == 'Buy' and budget >= share_value:
            budget -= share_value
            num_stocks += 1
            #print('bought 1 @ ' +str(share_value))
        elif action == 'Sell' and num_stocks > 0:
            budget += share_value * 0.998001
            num_stocks -= 1
            #print('sold 1 @ ' +str(share_value))
        else:
            action = 'Hold'
        new_portfolio = budget + num_stocks * share_value
        reward = new_portfolio - current_portfolio
        next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget, num_stocks)))
        transitions.append((current_state, action, reward, next_state))
        policy.update_q(current_state, action, reward, next_state)

    portfolio = budget + num_stocks * share_value
    if debug:
        print('${}\t{} shares'.format(budget, num_stocks))
    return portfolio


def run_simulations(budget, num_stocks, prices, hist, num_tries, variaDict=None, selection=None):
    global policy
    final_portfolios = list()
    for i in range(num_tries):
        #print(i)
        if variaDict != None:
            policy = QLearningDecisionPolicy(actions, hist + 2, variaDict)
        elif selection != None:
            newPolicyVariasDict = crossover_with_mut(random.choice(selection), random.choice(selection))
            #print(newPolicyVariasDict)
            #tfDict = {'W1': tf.Variable(newPolicyVariasDict['W1']),
            #            'b1' : tf.Variable(newPolicyVariasDict['b1']),        
             #           'W2': tf.Variable(newPolicyVariasDict['W2']),
            #            'b2' : tf.Variable(newPolicyVariasDict['b2'])}

            #print(tfDict['W1'].dtype)

            #start_time = time.time()  
            policy = QLearningDecisionPolicy(actions, hist + 2, newPolicyVariasDict)

        start_time = time.time()        
        final_portfolio = run_simulation(budget, num_stocks, prices, hist)
        print("simulation took %s seconds" % (time.time() - start_time))
        #print(policy.sess.run(policy.b2))
        final_portfolios.append(final_portfolio)
        scoresList.append(final_portfolio)
        scoresDict[final_portfolio] = policy.export_variables()#tu moga byc dwa takie same yolo
        policy.sess.close()

    avg, std = np.mean(final_portfolios), np.std(final_portfolios)
    plt.clf()
    plt.title('Final Portfolio Value')
    plt.xlabel('Simulation #')
    plt.ylabel('Net worth')
    plt.plot(final_portfolios)
    #plt.show()
    return avg, std, final_portfolios


def get_prices(cache_filename='stock_prices.txt'):

    #stock_prices = np.load(cache_filename)
     try:
        with open(cache_filename, 'r') as f:
            stock_prices = json.load(f)
    except:
        stock_prices = [4996.98, 4996.99, 4996.49, 4996.98, 4996.99, 4996.99, 4997.26, 4997.26, 4997.29, 4997.3, 4997.34, 4995.15, 4995.15, 4995.15, 4995.13, 4997.32, 4997.32, 4996.74, 4995.03, 4995.03, 4995.02, 4997.33, 4996.41, 4996.4, 4997.34, 4997.32, 4997.4, 4997.5, 4997.85, 4996.31, 4996.31, 4996.87, 4996.31, 4996.31, 4997.1, 4996.34, 4996.37, 4996.37, 4997.09, 4996.36, 4996.37, 4996.36, 4996.36, 4996.36, 4996.38, 4996.38, 4996.38, 4995.09, 4996.38, 4996.38, 4997.3, 4997.31, 4997.33, 4996.17, 4996.36, 4995.11, 4996.03, 4995.15, 4995.11, 4995.1, 4995.09, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.0, 4995.03, 4995.01, 4995.0, 4995.0, 4995.03, 4995.03, 4995.03, 4995.02, 4995.02, 4995.01, 4995.0, 4995.01, 4995.0, 4994.98, 4994.16, 4995.01, 4995.01, 4995.01, 4994.44, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.0, 4995.03, 4995.03, 4995.03, 4995.01, 4995.01, 4995.01, 4995.03, 4995.03, 4994.3, 4995.03, 4995.01, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.02, 4995.03, 4995.0, 4995.01, 4995.01, 4995.03, 4995.0, 4994.06, 4993.97, 4993.97, 4993.81, 4993.8, 4993.7, 4993.01, 4992.99, 4992.69, 4992.51, 4992.42, 4992.37, 4992.27, 4992.01, 4992.0, 4991.87, 4994.98, 4994.47, 4994.97, 4994.97, 4994.97, 4994.58, 4994.95, 4994.97, 4994.97, 4994.97, 4993.95, 4993.79, 4993.96, 4994.39, 4994.82, 4994.78, 4994.78, 4994.79, 4993.98, 4994.0, 4994.32, 4994.0, 4993.98, 4993.96, 4993.96, 4994.09, 4992.45, 4994.06, 4994.06, 4992.45, 4992.37, 4992.35, 4992.35, 4992.25, 4992.14, 4993.31, 4994.07, 4994.03, 4993.38, 4994.03, 4994.0, 4992.51, 4993.25, 4994.0, 4994.01, 4992.51, 4992.48, 4994.0, 4994.0, 4993.97, 4993.35, 4992.53, 4993.13, 4993.94, 4993.94, 4992.49, 4992.48, 4993.91, 4993.91, 4993.91, 4993.48, 4992.52, 4992.52, 4992.52, 4992.49, 4993.89, 4992.49, 4992.48, 4992.49, 4993.89, 4993.9, 4992.52, 4992.51, 4994.74, 4993.97, 4993.97, 4994.74, 4994.75, 4993.99, 4993.97, 4994.46, 4994.75, 4994.75, 4994.75, 4993.99, 4993.97, 4994.49, 4994.75, 4994.0, 4994.75, 4994.75, 4994.43, 4994.02, 4994.75, 4994.27, 4994.02, 4994.75, 4994.02, 4994.02, 4994.41, 4994.0, 4994.02, 4994.29, 4994.0, 4994.75, 4994.75, 4994.75, 4994.02, 4994.02, 4994.35, 4994.02, 4994.0, 4994.0, 4994.0, 4994.75, 4994.02, 4994.5, 4994.73, 4994.0, 4994.73, 4994.0, 4994.73, 4994.73, 4994.73, 4994.75, 4994.75, 4994.73, 4994.43, 4994.73, 4994.75, 4994.99, 4995.0, 4995.0, 4995.02, 4995.03, 4995.03, 4995.03, 4995.03, 4995.01, 4994.8, 4994.69, 4994.69, 4995.01, 4995.03, 4995.03, 4995.03, 4994.81, 4995.03, 4994.74, 4994.75, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4994.79, 4994.93, 4995.03, 4995.03, 4995.03, 4994.87, 4994.85, 4994.86, 4994.87, 4994.98]

    return stock_prices


def plot_prices(prices):
    plt.title('BTC/USDT')
    plt.xlabel('tick #')
    plt.ylabel('price (USDT)')
    plt.plot(prices)
    #plt.show()
    plt.savefig('prices.png')


def crossover_with_mut(vars1, vars2):

    MUT_PROBABILITY = 0.1

    child = {'W1': [], 'b1': [], 'W2': [], 'b2': []}
    #W1
    for rowidx in range(len(vars1['W1'])):
        inner = []
        for colidx in range(len(vars1['W1'][0])):
            if random.random() < MUT_PROBABILITY:
                if random.random() < 0.5:
                    inner.append(vars1['W1'][rowidx][colidx])
                else:
                    inner.append(vars2['W1'][rowidx][colidx])
            else:
                inner.append(random.uniform(-2, 2))
        child['W1'].append(inner)

    #b1
    for rowidx in range(len(vars1['b1'])):
        if random.random() < MUT_PROBABILITY:
            if random.random() < 0.5:
                child['b1'].append(vars1['b1'][rowidx])
            else:
                child['b1'].append(vars2['b1'][rowidx])
        else:
            child['b1'].append(random.uniform(0.08, 0.12))

    #W2
    for rowidx in range(len(vars1['W2'])):
        inner = []
        for colidx in range(len(vars1['W2'][0])):
            if random.random() < MUT_PROBABILITY:
                if random.random() < 0.5:
                    inner.append(vars1['W2'][rowidx][colidx])
                else:
                    inner.append(vars2['W2'][rowidx][colidx])
            else:
                inner.append(random.uniform(-2, 2))
        child['W2'].append(inner)

    #b2
    for rowidx in range(len(vars1['b2'])):
        if random.random() < MUT_PROBABILITY:
            if random.random() < 0.5:
                child['b2'].append(vars1['b2'][rowidx])
            else:
                child['b2'].append(vars2['b2'][rowidx])
        else:
            child['b2'].append(random.uniform(0.08, 0.12))

    return child


if __name__ == '__main__':
    prices = get_prices()
    plot_prices(prices)
    actions = ['Buy', 'Sell', 'Hold']
    hist = 200
    pop_size = 10
    # policy = RandomDecisionPolicy(actions)

    budget = 100000.0
    num_stocks = 0
    while len(prices) < hist:
        print('jeszcze chwileczkę')
        time.sleep(2)

    maxGenerations = 100
    selection = []

    for gen in range(maxGenerations):
        #print('gen '+str(gen))
        scoresList = []
        scoresDict = {}
        start_time = time.time()

        if gen == 0:
            input_dim = hist + 2
            h1_dim = 200
            output_dim = len(actions)
            #randoDict = {'W1': tf.Variable(tf.random_normal([input_dim, h1_dim])),
            #            'b1' : tf.Variable(tf.constant(0.1, shape=[h1_dim])),        
            #            'W2': tf.Variable(tf.random_normal([h1_dim, output_dim])),
            #            'b2' : tf.Variable(tf.constant(0.1, shape=[output_dim]))}
            randoDict = {'W1': tf.random_normal([input_dim, h1_dim]),
                        'b1' : tf.constant(0.1, shape=[h1_dim]),        
                        'W2': tf.random_normal([h1_dim, output_dim]),
                        'b2' : tf.constant(0.1, shape=[output_dim])}

            avg, std, final_portfolios = run_simulations(budget, num_stocks, prices, hist, pop_size, variaDict = randoDict)
            #print(avg, std)


            #print(scoresDict[scoresList[-1]]['b2'])
        else:
            avg, std, final_portfolios = run_simulations(budget, num_stocks, prices, hist, pop_size, selection = selection)
            #print(avg, std)



        print('###FITTEST###: ' + str(max(final_portfolios)) + '  elapsed: ' +str(time.time()-start_time))

        scoresList.sort()
        best = []
        bestScores = scoresList[-math.ceil(0.3*pop_size):] #todo stala 0.3
        for score in bestScores:
            best.append(scoresDict[score])
        luckers = []
        for i in range(math.ceil(0.2*pop_size)): #todo stala 0.2
            luckers.append(scoresDict[random.choice(scoresList[:-math.ceil(0.3*pop_size)])])

        selection = best + luckers
        #print(selection)
        #print(selection[0]['b2'])
        #print(selection[1]['b2'])

        #print(policy.sess.run(scoresDict[scoresList[0]]['W1']))

При каждом выполнении требуется больше времени для инициализации и оптимизации переменных тензорного потока.В чем корень этого поведения?

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...