Я пытаюсь написать собственную функцию потерь с дополнительным параметром внутри, чтобы реализовать алгоритм критического действия c:
def loss(y_true, y_pred):
y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)
log_likelihood = y_true * K.log(y_pred_clipped)
return K.sum(-log_likelihood * delta)
return loss
, но я получаю ошибку:
NotImplementedError: Cannot convert a symbolic Tensor (truediv_2:0) to a numpy array.
полный код:
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model, callbacks, models
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
tf.config.experimental_run_functions_eagerly(True)
import numpy as np
import os
class Agent(object):
def __init__(self, alpha, beta, gamma=0.99, n_action=2, load=False,
input_dims=4, layer_shared=1024, layer_actor=128, layer_critic=128):
self.alpha = alpha
self.beta = beta
self.gamma = gamma
self.load = load
self.input_dims = input_dims
self.n_action = n_action
self.layer_shared = layer_shared
self.layer_actor = layer_actor
self.layer_critic = layer_critic
self.action_space = [i for i in range(n_action)]
self.actor, self.critic, self.policy = self.build_actor_critic_network()
def build_actor_critic_network(self, load=False):
main_input = Input(shape=(self.input_dims,), name='main_input')
delta = Input(shape=(1), name='delta')
dense_shared = Dense(self.layer_shared, activation='relu', name='dense_shared')(main_input)
dense_actor = Dense(self.layer_actor, activation='relu', name='dense_actor')(dense_shared)
output_actor = Dense(self.n_action, activation='softmax', name='output_actor')(dense_actor)
dense_critic = Dense(self.layer_critic, activation='relu', name='dense_critic')(dense_shared)
output_critic = Dense(1, activation='linear', name='output_critic')(dense_critic)
def custom_loss(delta):
def loss(y_true, y_pred):
y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)
log_likelihood = y_true * K.log(y_pred_clipped)
return K.sum(-log_likelihood * delta)
return loss
model_actor = Model(inputs=[main_input, delta], outputs=output_actor, name='model_actor')
model_actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss(delta))
model_critic = Model(inputs=[main_input], outputs=output_critic, name='model_critic')
model_critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')
model_policy = Model(inputs=[main_input], outputs=output_actor)
model_critic.layers[1].trainable = False
print(f'layer "{model_critic.layers[1].name}" of the "model_critic" frozen')
return model_actor, model_critic, model_policy
def choose_action(self, state):
state = state[np.newaxis,:]
probabilities = self.policy.predict(state)[0]
action = np.random.choice(self.action_space, p=probabilities)
return action
def learn(self, state, action, reward, state_, done):
state = state[np.newaxis,:]
state_ = state_[np.newaxis,:]
critic_value_ = self.critic.predict(state_)
critic_value = self.critic.predict(state)
target = reward + self.gamma * critic_value_ * (1 - int(done))
delta = target - critic_value
actions = np.zeros([1, self.n_action])
actions[np.arange(1), action] = 1.0
self.actor.fit([state, delta], actions, verbose=1)
self.critic.fit([state], target, verbose=1)
Код бегуна:
import gym
env = gym.make('LunarLander-v2')
agent = Agent(alpha=0.00002, beta=0.0001, input_dims=8, n_action=4, load=False)
num_episodes = 2000
length_episode = 100
score_history = []
log = 1
for i in range(num_episodes):
done = False
score = 0
observation = env.reset()
for t in range(length_episode):
action = agent.choose_action(observation)
observation_, reward, done, info = env.step(action)
agent.learn(observation, action, reward, observation_, done)
observation = observation_
score += reward
score_history.append(score)
avg_score = np.mean(score_history[-log:])
if i % log ==0:
print(f'episode n°{i}, score {avg_score}')
Ошибка:
layer "dense_shared" of the "model_critic" frozen
Train on 1 samples
1/1 [==============================]
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-14-1d63e256ee52> in <module>
16 action = agent.choose_action(observation)
17 observation_, reward, done, info = env.step(action)
---> 18 agent.learn(observation, action, reward, observation_, done)
19 observation = observation_
20 if done:
<ipython-input-13-92083476d368> in learn(self, state, action, reward, state_, done)
82 actions = np.zeros([1, self.n_action])
83 actions[np.arange(1), action] = 1.0
---> 84 self.actor.fit([state, delta_tensor], actions, verbose=1)
85 self.critic.fit([state], target, verbose=1)
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
395 total_epochs=1)
396 cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TEST,
--> 397 prefix='val_')
398
399 return model.history
C:\ProgramData\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
117 if type is None:
118 try:
--> 119 next(self.gen)
120 except StopIteration:
121 return False
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in on_epoch(self, epoch, mode)
770 # Epochs only apply to `fit`.
771 self.callbacks.on_epoch_end(epoch, epoch_logs)
--> 772 self.progbar.on_epoch_end(epoch, epoch_logs)
773
774 @tf_contextlib.contextmanager
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\callbacks.py in on_epoch_end(self, epoch, logs)
787 self.log_values.append((k, logs[k]))
788 if self.verbose:
--> 789 self.progbar.update(self.seen, self.log_values)
790
791
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\utils\generic_utils.py in update(self, current, values)
557 info += ' - %s:' % k
558 if isinstance(self._values[k], list):
--> 559 avg = np.mean(self._values[k][0] / max(1, self._values[k][1]))
560 if abs(avg) > 1e-3:
561 info += ' %.4f' % avg
<__array_function__ internals> in mean(*args, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in mean(a, axis, dtype, out, keepdims)
3333
3334 return _methods._mean(a, axis=axis, dtype=dtype,
-> 3335 out=out, **kwargs)
3336
3337
C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\_methods.py in _mean(a, axis, dtype, out, keepdims)
133
134 def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
--> 135 arr = asanyarray(a)
136
137 is_float16_result = False
C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\_asarray.py in asanyarray(a, dtype, order)
136
137 """
--> 138 return array(a, dtype, copy=False, order=order, subok=True)
139
140
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py in __array__(self)
726 def __array__(self):
727 raise NotImplementedError("Cannot convert a symbolic Tensor ({}) to a numpy"
--> 728 " array.".format(self.name))
729
730 def __len__(self):
NotImplementedError: Cannot convert a symbolic Tensor (truediv_2:0) to a numpy array.
Я немного запутался в этой ошибке, я пытался найти несколько разных постов, но ни один не решил мою проблему. Я знаю, что это связано с тем фактом, что дельта должна быть тензорной, но я чувствую, что, поскольку я инициализирую ее как «вход», она должна быть в порядке. Также я попытался преобразовать его в другом месте, и это не решило проблему. Если вы знаете, как решить эту проблему, любая помощь будет высоко ценится:)
Спасибо!
Макетная модель Попробовать Ввод ((1)):
from tensorflow.keras.layers import Input,Dense
from tensorflow.keras.models import Model
import numpy as np
x_train = np.random.random((1000, 1))
y_train = np.random.randint(2, size=(1000, 1))
inp = Input((1))
dense = Dense(10)(inp)
out = Dense(1, activation='sigmoid')(dense)
model = Model(inp,out)
model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.fit(x_train, y_train,epochs=1,batch_size=100)
Версии:
import tensorflow as tf
print('tf:', tf.__version__)
print('keras:', tf.keras.__version__)
Выход: tf: 2.1.0 керас: 2.2.4-tf
РЕДАКТИРОВАТЬ : Спасибо, Мдауст! Это работает:)
Однако, возможно, способ, которым я инициирую дельта-переменную, не так хорош, я не знаю, как вы хотели добавить переменную в модель "актер" self.actor.delta.assign(delta)
Пожалуйста, дайте мне знать, если есть более красивый способ сделать это!
Рабочий код для человека, у которого была такая же проблема:
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model, callbacks, models
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
# tf.config.experimental_run_functions_eagerly(True)
import numpy as np
import os
class Agent(object):
def __init__(self, alpha, beta, gamma=0.99, n_action=2, load=False,
input_dims=4, layer_shared=1024, layer_actor=128, layer_critic=128):
self.alpha = alpha
self.beta = beta
self.gamma = gamma
self.load = load
self.input_dims = input_dims
self.n_action = n_action
self.layer_shared = layer_shared
self.layer_actor = layer_actor
self.layer_critic = layer_critic
self.action_space = [i for i in range(n_action)]
self.actor, self.critic, self.policy, self.delta = self.build_actor_critic_network()
def build_actor_critic_network(self, load=False):
main_input = Input(shape=(self.input_dims,), name='main_input')
delta = tf.Variable([[0.]], trainable=False)
dense_shared = Dense(self.layer_shared, activation='relu', name='dense_shared')(main_input)
dense_actor = Dense(self.layer_actor, activation='relu', name='dense_actor')(dense_shared)
output_actor = Dense(self.n_action, activation='softmax', name='output_actor')(dense_actor)
dense_critic = Dense(self.layer_critic, activation='relu', name='dense_critic')(dense_shared)
output_critic = Dense(1, activation='linear', name='output_critic')(dense_critic)
def custom_loss(delta):
def loss(y_true, y_pred):
y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)
log_likelihood = y_true * K.log(y_pred_clipped)
return K.sum(-log_likelihood * delta)
return loss
model_actor = Model(inputs=[main_input], outputs=output_actor, name='model_actor')
model_actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss(delta))
model_critic = Model(inputs=[main_input], outputs=output_critic, name='model_critic')
model_critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')
model_policy = Model(inputs=[main_input], outputs=output_actor)
model_critic.layers[1].trainable = False
print(f'layer "{model_critic.layers[1].name}" of the "model_critic" frozen')
return model_actor, model_critic, model_policy, delta
def choose_action(self, state):
state = state[np.newaxis,:]
probabilities = self.policy.predict(state)[0]
action = np.random.choice(self.action_space, p=probabilities)
return action
def learn(self, state, action, reward, state_, done):
state = state[np.newaxis,:]
state_ = state_[np.newaxis,:]
critic_value_ = self.critic.predict(state_)
critic_value = self.critic.predict(state)
target = reward + self.gamma * critic_value_ * (1 - int(done))
delta_numpy = target - critic_value
actions = np.zeros([1, self.n_action])
actions[np.arange(1), action] = 1.0
self.delta.assign(delta_numpy)
self.actor.fit(state, actions, verbose=1)
self.critic.fit(state, target, verbose=1)