MemoryError с Keras fit_generator - PullRequest
       9

MemoryError с Keras fit_generator

0 голосов
/ 22 апреля 2020

Я написал generator для Keras, поскольку мои данные слишком велики для загрузки в оперативную память. generator должен решить мои проблемы с памятью, но я снова сталкиваюсь с ошибкой памяти (см. Ниже).

Когда я пытаюсь запустить модель снова (без изменений), она останавливается после первого партия. Я предполагаю, что тренировочные сэмплы каким-то образом хранятся в ОЗУ в любом случае, иначе он будет работать до 2-го кратного (как это было ниже).

Я также предположил, что это связано с num_workers , но они просто установлены на num_workers = cpu_count() - и так как он останавливается после первого пакета - если запустить второй раз - я не предполагаю, что это ошибка.

Чего мне не хватает? Почему я сталкиваюсь с ошибками памяти? (Я также получаю windows ошибку от python .exe, в которой говорится, что "приложение 0xc000012d не может быть правильно запущено ..)


class final_generator(Sequence):
    ''' 
    Generates data for Keras

    list_IDs =  a list of npy. files to load
    labels   =  a dictionary of labels {'filename1.npy':1,'filename1.npy':0,...etc}
    filepath =  for example 'C:/Users/Desktop/np_files/'
    '''

    def __init__(self, list_IDs, labels, filepath, batch_size=32, sentence_length=1000, features=768 ,shuffle=True, to_fit=True):
        ''' initialization '''
        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.sentence_length = sentence_length 
        self.features = features 
        self.filepath = filepath
        self.shuffle = shuffle
        self.to_fit = to_fit
        self.on_epoch_end()

    def __len__(self):
        ''' Denotes the number of batches per epoch '''

        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        ''' 
        Generate one batch of data
        :param index: index of the batch; is created when called!
        :return: X and y when fitting. X only when predicting
        '''
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self._generate_data(list_IDs_temp)

        if self.to_fit:
            return X, y
        else:
            return X

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes) # suffles list IN PLACE! so does NOT create new list 


    def _generate_data(self, list_IDs_temp):
        '''
        Generates data containing batch_size images
        :param list_IDs_temp: list of label ids to load
        :return: batch of images

        list_IDs_temp is created when __getitem__ is called

        '''
        # Initialization
        X = np.empty((self.batch_size, self.sentence_length, self.features))
        y = np.empty((self.batch_size), dtype=int)

        for i, ID in enumerate(list_IDs_temp):
            # i is a number;
            # ID is the file-name

            # load single file
            single_file = np.load(os.path.join(self.filepath,ID))
            ## create empty array to contain batch of features and labels
            batch_features = np.zeros((self.sentence_length, self.features))

            # pad loaded array to same length        
            shape = np.shape(single_file)
            batch_features[:shape[0],:shape[1]] = single_file 

            ## append to sequence
            X[i,] = batch_features

            y[i] = self.labels[ID]

        return X, y

10-кратная перекрестная проверка, которую я использую для обучения моя модель:


from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10, random_state=0, shuffle=False)

for train_index, val_index in skf.split(x_train, y_train):
    #print("train and val", (train_index,val_index))
    x_trains, x_vals = x_train[train_index], x_train[val_index]
    y_trains, y_vals = y_train[train_index], y_train[val_index]

    # convert numpy to list
    x_trains = x_trains.flatten() # generator takes lists
    x_trains = x_trains.tolist()

    x_vals= x_vals.flatten()
    x_vals = x_vals.tolist()

    # other variable that must be fed is a dictionary of labels
    training_generator = final_generator(x_trains, labels, path, batch_size=50, sentence_length=no_sentences_per_doc,
                                         features=sentence_embedding, shuffle=True, to_fit=True)
    validation_generator = final_generator(x_vals, labels, path, batch_size=50, sentence_length=no_sentences_per_doc,
                                           features=sentence_embedding, shuffle=False, to_fit=True)

    ##### MODEL #####
    sent_input  = Input(shape=(no_sentences_per_doc, sentence_embedding))
    l_lstm_sent = Bidirectional(LSTM(100, #activation='tanh',
                                    return_sequences=False
                                    ))(sent_input)

    sent_dense = Dense(200, activation='relu', name='sent_dense')(l_lstm_sent) 
    sent_drop = Dropout(0.3,name='sent_dropout')(sent_dense)
    preds = Dense(1, activation='sigmoid',name='output')(sent_drop)  

    # Model compile
    model = Model(sent_input, preds)
    model.compile(loss='binary_crossentropy',optimizer='adam',
                  metrics=['acc'])  


    # Train model on dataset
    history = model.fit_generator(generator=training_generator, 
                                  #steps_per_epoch=None,   
                                  epochs=epoch,
                                  validation_data=validation_generator,
                                  #use_multiprocessing=True,
                                  workers=num_workers
                                  )      

Ошибка, которую я получаю:

choose n_splits kfold
Testing sus-label..
Epoch 1/10
80/80 [==============================] - 327s 4s/step - loss: 0.7019 - acc: 0.5092 - val_loss: 0.6989 - val_acc: 0.4950
Epoch 2/10
80/80 [==============================] - 333s 4s/step - loss: 0.6949 - acc: 0.5095 - val_loss: 0.6954 - val_acc: 0.5175
Epoch 3/10
80/80 [==============================] - 347s 4s/step - loss: 0.6932 - acc: 0.5185 - val_loss: 0.6941 - val_acc: 0.5150
Epoch 4/10
80/80 [==============================] - 367s 5s/step - loss: 0.6916 - acc: 0.5190 - val_loss: 0.6908 - val_acc: 0.5125
Epoch 5/10
80/80 [==============================] - 375s 5s/step - loss: 0.6930 - acc: 0.5110 - val_loss: 0.6921 - val_acc: 0.5100
Epoch 6/10
80/80 [==============================] - 389s 5s/step - loss: 0.6911 - acc: 0.5225 - val_loss: 0.6938 - val_acc: 0.5150
Epoch 7/10
80/80 [==============================] - 415s 5s/step - loss: 0.6870 - acc: 0.5335 - val_loss: 0.6979 - val_acc: 0.5075
Epoch 8/10
80/80 [==============================] - 429s 5s/step - loss: 0.6870 - acc: 0.5293 - val_loss: 0.6964 - val_acc: 0.5125
Epoch 9/10
80/80 [==============================] - 449s 6s/step - loss: 0.6846 - acc: 0.5385 - val_loss: 0.7006 - val_acc: 0.4950
Epoch 10/10
80/80 [==============================] - 469s 6s/step - loss: 0.6820 - acc: 0.5458 - val_loss: 0.6996 - val_acc: 0.5125
8/8 [==============================] - 16s 2s/step
Epoch 1/10
16/80 [=====>........................] - ETA: 7:01 - loss: 0.7282 - acc: 0.5100 Traceback (most recent call last):

  File "<ipython-input-37-c0662c65bd58>", line 64, in <module>
    workers=num_workers)  # check if workers should be set to 1 or other on colab

  File "C:\Users\anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)

  File "C:\Users\anaconda3\lib\site-packages\keras\engine\training.py", line 1418, in fit_generator
    initial_epoch=initial_epoch)

  File "C:\Users\anaconda3\lib\site-packages\keras\engine\training_generator.py", line 181, in fit_generator
    generator_output = next(output_generator)

  File "C:\Users\anaconda3\lib\site-packages\keras\utils\data_utils.py", line 601, in get
    six.reraise(*sys.exc_info())

  File "C:\Users\anaconda3\lib\site-packages\six.py", line 693, in reraise
    raise value

  File "C:\Users\anaconda3\lib\site-packages\keras\utils\data_utils.py", line 595, in get
    inputs = self.queue.get(block=True).get()

  File "C:\Users\anaconda3\lib\multiprocessing\pool.py", line 657, in get
    raise self._value

  File "C:\Users\anaconda3\lib\multiprocessing\pool.py", line 121, in worker
    result = (True, func(*args, **kwds))

  File "C:\Users\anaconda3\lib\site-packages\keras\utils\data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]

  File "<ipython-input-27-5fce059fe2c4>", line 151, in __getitem__
    X, y = self._generate_data(list_IDs_temp)

  File "<ipython-input-27-5fce059fe2c4>", line 184, in _generate_data
    batch_features = np.zeros((self.sentence_length, self.features))

MemoryError: Unable to allocate array with shape (1000, 768) and data type float64

Запуск его во второй раз, вызывает эту ошибку:

Epoch 1/20
 5/80 [>.............................] - ETA: 10:22 - loss: 0.7472 - acc: 0.4720Traceback (most recent call last):

  File "<ipython-input-38-5dc410bd47a0>", line 62, in <module>
    validation_data=validation_generator,

  File "C:\Users\anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)

  File "C:\Users\anaconda3\lib\site-packages\keras\engine\training.py", line 1418, in fit_generator
    initial_epoch=initial_epoch)

  File "C:\Users\anaconda3\lib\site-packages\keras\engine\training_generator.py", line 217, in fit_generator
    class_weight=class_weight)

  File "C:\Users\anaconda3\lib\site-packages\keras\engine\training.py", line 1217, in train_on_batch
    outputs = self.train_function(ins)

  File "C:\Users\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py", line 2715, in __call__
    return self._call(inputs)

  File "C:\Users\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py", line 2655, in _call
    dtype=tf.as_dtype(tensor.dtype).as_numpy_dtype))

  File "C:\Users\anaconda3\lib\site-packages\numpy\core\_asarray.py", line 85, in asarray
    return array(a, dtype, copy=False, order=order)

MemoryError: Unable to allocate array with shape (50, 1000, 768) and data type float32

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...