Я изменил код, который нашел здесь: https://github.com/geifmany/cifar-vgg/blob/master/cifar100vgg.py, чтобы он работал с tf.keras
, но когда я использую fit_generator
, я получаю сообщение об ошибке «Устройство или ресурсы заняты».Я пытался с tf.keras.models.Sequential.fit
, и я не получаю эту ошибку.
from __future__ import print_function
import numpy as np
import os
from keras.datasets import cifar100
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPool2D, BatchNormalization
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
import tensorflow as tf
import tensorflow.keras as keras
def build_model():
def normalize(x,y):
def main():
batch_size = 128
maxepochs = 250
learning_rate = 0.1
lr_decay = 1e-6
lr_drop = 20
tf.logging.set_verbosity(tf.logging.INFO)
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train, x_test = normalize(x_train, x_test)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = build_model()
def lr_scheduler(epoch):
return learning_rate * (0.5 ** (epoch // lr_drop))
reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
horizontal_flip=True, # randomly flip images
vertical_flip=False) # randomly flip images
datagen.fit(x_train)
sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.fit_generator(datagen.flow(x_train, y_train,
batch_size=batch_size),
steps_per_epoch=None,
epochs=maxepochs,
use_multiprocessing=False,
validation_data=(x_test, y_test), callbacks=[reduce_lr], verbose=1)
Это ошибка, которую я получаю по окончании обучения (после 250 эпох):
Traceback (most recent call last):
File "/BS/work/anaconda3/envs/gpu/lib/python3.6/multiprocessing/util.py", line 262, in _run_finalizers
finalizer()
File "/BS/work/anaconda3/envs/gpu/lib/python3.6/multiprocessing/util.py", line 186, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/BS/work/anaconda3/envs/gpu/lib/python3.6/shutil.py", line 486, in rmtree
_rmtree_safe_fd(fd, path, onerror)
File "/BS/work/anaconda3/envs/gpu/lib/python3.6/shutil.py", line 444, in _rmtree_safe_fd
onerror(os.unlink, fullname, sys.exc_info())
File "/BS/work/anaconda3/envs/gpu/lib/python3.6/shutil.py", line 442, in _rmtree_safe_fd
os.unlink(name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '.nfs00000000000890d30000a675'