Я пытаюсь построить глубокую обучающую нейронную сеть, но у меня возникают проблемы при вводе состояния игры в нейронную сеть. Я всегда получаю эту ошибку : ValueError: setting an array element with a sequence.
Вот как выглядит состояниекак:
[[array([[3775.74129204, 1946.76168772],
[3121.9310053 , 699.72903429],
[4366.58744074, 735.83476952]])
array([[-2318.83417005, 1468.79312881],
[-2212.77983243, 366.27498107]])
array([[-3124., 800.],
[ -645., 800.]]) -24.0]]
Полный код (это не полный класс, это просто важная вещь, которая создает проблему):
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount rate
self.epsilon = 1.0 # exploration rate
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()
def _build_model(self):
# Neural Net for Deep-Q learning Model
model = Sequential()
model.add(Dense(24, input_shape=(self.state_size,) ,activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mse',
optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
print(next_state)
target = (reward + self.gamma *
np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
if __name__ == "__main__":
Init()
done=False
state_size=4
action_size=4
agent = DQNAgent(state_size, action_size)
batch_size = 32
EPISODES=50
for e in range(EPISODES):
for stpf in range(5):
r_locs, i_locs, c_locs, ang, score = Game_step(1)
state=np.array([r_locs, i_locs, c_locs,ang])
state = np.reshape(state, [1, state_size])
for stp in range(100):
action = agent.act(state)
r_locs, i_locs, c_locs, ang, score = Game_step(action)
next_state=np.array([r_locs, i_locs, c_locs,ang])
reward=score
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done)
state = next_state
if len(agent.memory) > batch_size:
agent.replay(batch_size)
print("episode: {}/{}, score: {}".format(e, EPISODES))
done=True
Как это исправить?Спасибо!