как применять автоэнкодер и декодеры для классификации текста с использованием LSTM - PullRequest
0 голосов
/ 11 мая 2019

Я новичок в моделях CNN, RNN, я пытаюсь классифицировать данные в твиттере с помощью автоэнкодеров, но я получил некоторые ошибки, пожалуйста, помогите мне, как их решить.

Вот мои входные формы:

 from keras.preprocessing import sequence
    max_words = 30
    X_train = sequence.pad_sequences(X_train, maxlen=max_words)
    X_test = sequence.pad_sequences(X_test, maxlen=max_words)
    print(X_train.shape,X_test.shape)

(6336, 30) (1584, 30)

Здесь моя модель:

input_i = Input(shape=(6336, 30))
encoded_h1 = Dense(64, activation='tanh')(input_i)
encoded_h2 = Dense(32, activation='tanh')(encoded_h1)
encoded_h3 = Dense(16, activation='tanh')(encoded_h2)
encoded_h4 = Dense(8, activation='tanh')(encoded_h3)
encoded_h5 = Dense(4, activation='tanh')(encoded_h4)
latent = Dense(2, activation='tanh')(encoded_h5)
decoder_h1 = Dense(4, activation='tanh')(latent)
decoder_h2 = Dense(8, activation='tanh')(decoder_h1)
decoder_h3 = Dense(16, activation='tanh')(decoder_h2)
decoder_h4 = Dense(32, activation='tanh')(decoder_h3)
decoder_h5 = Dense(64, activation='tanh')(decoder_h4)

output = Dense(100, activation='tanh')(decoder_h5)

autoencoder = Model(input_i,output)

autoencoder.compile('adadelta','mse')

model = Sequential()
model.add(Embedding(1000, 64))
model.compile('rmsprop', 'mse')

Здесь моя модель:

X_embedded = model.predict(X_train)
autoencoder.fit(X_embedded,X_embedded,epochs=10,
            batch_size=256, validation_split=.1)

Здесь моя ошибка: Пожалуйста, помогите мне, как решить эту проблему, что этоошибка, я не понял эту ошибку.

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-41-efb6de652413> in <module>()
----> 1 X_embedded = model.predict(X_train)
      2 autoencoder.fit(X_embedded,X_embedded,epochs=10,
      3             batch_size=256, validation_split=.1)

~\Anaconda3\lib\site-packages\keras\engine\training.py in predict(self, x, batch_size, verbose, steps)
   1167                                             batch_size=batch_size,
   1168                                             verbose=verbose,
-> 1169                                             steps=steps)
   1170 
   1171     def train_on_batch(self, x, y,

~\Anaconda3\lib\site-packages\keras\engine\training_arrays.py in predict_loop(model, f, ins, batch_size, verbose, steps)
    292                 ins_batch[i] = ins_batch[i].toarray()
    293 
--> 294             batch_outs = f(ins_batch)
    295             batch_outs = to_list(batch_outs)
    296             if batch_index == 0:

~\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in __call__(self, inputs)
   2713                 return self._legacy_call(inputs)
   2714 
-> 2715             return self._call(inputs)
   2716         else:
   2717             if py_any(is_tensor(x) for x in inputs):

~\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in _call(self, inputs)
   2673             fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata)
   2674         else:
-> 2675             fetched = self._callable_fn(*array_vals)
   2676         return fetched[:len(self.outputs)]
   2677 

~\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args, **kwargs)
   1437           ret = tf_session.TF_SessionRunCallable(
   1438               self._session._session, self._handle, args, status,
-> 1439               run_metadata_ptr)
   1440         if run_metadata:
   1441           proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
    526             None, None,
    527             compat.as_text(c_api.TF_Message(self.status.status)),
--> 528             c_api.TF_GetCode(self.status.status))
    529     # Delete the underlying status object from memory otherwise it stays alive
    530     # as there is a reference to status from this from the traceback due to

InvalidArgumentError: indices[26,26] = 2298 is not in [0, 1000)
     [[{{node embedding_2/embedding_lookup}} = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_2/embeddings/read, embedding_2/Cast, embedding_2/embedding_lookup/axis)]]

class MinimalRNNCell(keras.layers.Layer):
...