Класс процессора у keras-rl меняет форму - PullRequest
1 голос
/ 26 марта 2019

Хорошо, я пытаюсь дать в качестве входных данных список из 10 целых чисел для модели на keras-rl, но, поскольку я использую новую среду OpenAI-Gym, мне нужно установить класс процессора так, как я хочу , Мой класс процессора выглядит так:

class RecoProcessor(Processor):
    def process_observation(self, observation):
        print("Observation:")
#         print(observation.shape)
        look_back = 10
        if observation is None:
            X=np.zeros(10)
        else:
            X=np.array(observation, dtype='float32')
#         X.append(np.zeros{look_back - len(X)})
        print(X.shape)
        return X

    def process_state_batch(self, batch):
        print("Batch:")
        print(batch.shape)
        return batch

    def process_reward(self, reward):
        return reward

    def process_demo_data(self, demo_data):
        for step in demo_data:
            step[0] = self.process_observation(step[0])
            step[2] = self.process_reward(step[2])
        return demo_data

А мой агент и модель такие:

window_length = 1
emb_size = 100
look_back = 10

# "Expert" (regular dqn) model architecture
expert_model = Sequential()
# expert_model.add(Input(shape=(look_back,window_length)))
expert_model.add(Embedding(env.action_space.n+1, emb_size, input_length=look_back,mask_zero=True))
expert_model.add(LSTM(64, input_shape=(look_back,window_length)))
expert_model.add(Dense(env.action_space.n, activation='softmax'))

# try using different optimizers and different optimizer configs
expert_model.compile(loss='mse',
              optimizer='adam',
              metrics=['acc'])

# memory
memory = PrioritizedMemory(limit=5000,  window_length=window_length)

# policy
policy = BoltzmannQPolicy()

# agent
dqn = DQNAgent(model=expert_model, nb_actions=env.action_space.n, policy=policy, memory=memory, 
               enable_double_dqn=False, enable_dueling_network=False, gamma=.6, 
               target_model_update=1e-2, nb_steps_warmup=100, processor = RecoProcessor())

Но когда вы пытаетесь выполнить это, я вижу, что результат выглядит так:

Training for 50000 steps ...
CCCCCCCCCCCC
(10,)
Interval 1 (0 steps performed)
AAAAAAAAAAAAAAA
(1, 1, 10)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-29-4d8fdf0e849e> in <module>
     32 dqn.compile(Adam(lr), metrics=['mae'])
     33 
---> 34 train = dqn.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps = None)
     35 np.savetxt(fichero_train_history, 
     36            np.array(train.history["episode_reward"]), delimiter=",")

c:\users\eloy.anguiano\src\keras-rl\rl\core.py in fit(self, env, nb_steps, action_repetition, callbacks, verbose, visualize, nb_max_start_steps, start_step_policy, log_interval, nb_max_episode_steps)
    167                 # This is were all of the work happens. We first perceive and compute the action
    168                 # (forward step) and then use the reward to improve (backward step).
--> 169                 action = self.forward(observation)
    170                 if self.processor is not None:
    171                     action = self.processor.process_action(action)

c:\users\eloy.anguiano\src\keras-rl\rl\agents\dqn.py in forward(self, observation)
     87         # Select an action.
     88         state = self.memory.get_recent_state(observation)
---> 89         q_values = self.compute_q_values(state)
     90         if self.training:
     91             action = self.policy.select_action(q_values=q_values)

c:\users\eloy.anguiano\src\keras-rl\rl\agents\dqn.py in compute_q_values(self, state)
     67 
     68     def compute_q_values(self, state):
---> 69         q_values = self.compute_batch_q_values([state]).flatten()
     70         assert q_values.shape == (self.nb_actions,)
     71         return q_values

c:\users\eloy.anguiano\src\keras-rl\rl\agents\dqn.py in compute_batch_q_values(self, state_batch)
     62     def compute_batch_q_values(self, state_batch):
     63         batch = self.process_state_batch(state_batch)
---> 64         q_values = self.model.predict_on_batch(batch)
     65         assert q_values.shape == (len(state_batch), self.nb_actions)
     66         return q_values

~\AppData\Local\Continuum\anaconda3\lib\site-packages\keras-2.2.4-py3.7.egg\keras\engine\training.py in predict_on_batch(self, x)
   1266             Numpy array(s) of predictions.
   1267         
-> 1268         x, _, _ = self._standardize_user_data(x)
   1269         if self._uses_dynamic_learning_phase():
   1270             ins = x + [0.]

~\AppData\Local\Continuum\anaconda3\lib\site-packages\keras-2.2.4-py3.7.egg\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
    749             feed_input_shapes,
    750             check_batch_axis=False,  # Don't enforce the batch size.
--> 751             exception_prefix='input')
    752 
    753         if y is not None:

~\AppData\Local\Continuum\anaconda3\lib\site-packages\keras-2.2.4-py3.7.egg\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    126                         ': expected ' + names[i] + ' to have ' +
    127                         str(len(shape)) + ' dimensions, but got array '
--> 128                         'with shape ' + str(data_shape))
    129                 if not check_batch_axis:
    130                     data_shape = data_shape[1:]

ValueError: Error when checking input: expected embedding_12_input to have 2 dimensions, but got array with shape (1, 1, 10)

Как видите, форма, которую я получаю, является формой пакетной обработки, и я не знаю, как ее решить. На случай, если вы захотите провести несколько испытаний, я использую среду RecoGym (Версия 1).

...