BalancedBatchGenerator выбрасывает AttributeError model.fit_generator - PullRequest
0 голосов
/ 16 мая 2019

Я новичок в tf и keras, я использую colab notebook python 3, с 2.2.4-tf tenorflow.keras.

Вызов pip list показывает

imbalanced-learn         0.4.3                
imblearn                 0.0 

я пытаюсь использовать из imblearn.keras BalancedBatchGenerator, за которым следует model.fit_generator и получаю ошибку атрибута

AttributeError: 'BalancedBatchGenerator' object has no attribute 'shape'

Входные обучающие данные и метки представляют собой пустой массив, который имеет атрибут shape, поэтому не уверен, что этоидет не так

nest.flatten(Xfeatures_train)[0]

>>array([[9.97292995e-03, 4.43795400e-09, 1.99458601e-08, ...,
        1.99458599e-02, 5.98375825e-03, 9.97293055e-01],
       [9.97279119e-03, 2.79238166e-09, 5.98367507e-08, ...,
       [5.35506010e-02, 2.52044821e-08, 7.14007982e-08, ...,
        7.14007989e-02, 2.14202404e-02, 9.63910818e-01]], 
        dtype=float32)

>> (1106, 7)

nest.flatten(ylabels_train)[0]

>>array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       ...,
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)

>> (1106, 3)
feature_dimensions=Xfeatures_train.shape[1]
tensor_dimensions=7

n_hidden_1 = 256 # 1st hidden layer
n_hidden_2 = 128
n_classes = 3 # classes for prediction

model = tf.keras.Sequential([
    tf.keras.layers.Dense(tensor_dimensions, input_dim=feature_dimensions, activation='relu'),
    tf.keras.layers.Dense(n_hidden_1, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(n_hidden_2, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(n_classes, activation='softmax')
])

model.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])
from imblearn.keras import BalancedBatchGenerator
from imblearn.under_sampling import RandomUnderSampler

training_generator=BalancedBatchGenerator(Xfeatures_train, ylabels_train, sampler=RandomUnderSampler(), batch_size=10)

training_epochs = 2
callback_history = model.fit_generator(training_generator, epochs=training_epochs, steps_per_epoch=10, verbose=2)
ERROR ----
----> 3 callback_history = model.fit_generator(training_generator, epochs=training_epochs, steps_per_epoch=10, verbose=2)
      4 #history = model.fit_generator(BalancedBatchGenerator(Xfeatures_train, ylabels_train, sampler=RandomUnderSampler(), batch_size=batch_size, random_state=42),
      5 #                              steps_per_epoch=len(Xfeatures_train)//batch_size,

2 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
   1424         use_multiprocessing=use_multiprocessing,
   1425         shuffle=shuffle,
-> 1426         initial_epoch=initial_epoch)
   1427 
   1428   def evaluate_generator(self,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, **kwargs)
    113       batch_size=batch_size,
    114       epochs=epochs - initial_epoch,
--> 115       shuffle=shuffle)
    116 
    117   do_validation = validation_data is not None

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_generator.py in convert_to_generator_like(data, batch_size, steps_per_epoch, epochs, shuffle)
    375 
    376   # Create generator from NumPy or EagerTensor Input.
--> 377   num_samples = int(nest.flatten(data)[0].shape[0])
    378   if batch_size is None:
    379     raise ValueError('You must specify `batch_size`')

AttributeError: 'BalancedBatchGenerator' object has no attribute 'shape'

Я могу воспроизвести эту ошибку со следующим кодом

from sklearn.datasets import load_iris
iris = load_iris()
from imblearn.datasets import make_imbalance
class_dict = dict()
class_dict[0] = 30; class_dict[1] = 50; class_dict[2] = 40
X, y = make_imbalance(iris.data, iris.target, class_dict)
X=X.astype('float32')

#import keras
y = tf.keras.utils.to_categorical(y, 3)


model1 = tf.keras.models.Sequential()
model1.add(tf.keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
                             activation='softmax'))

model1.compile(optimizer='sgd', loss='categorical_crossentropy',
              metrics=['accuracy'])

from imblearn.keras import BalancedBatchGenerator
from imblearn.under_sampling import NearMiss
from imblearn.under_sampling import RandomUnderSampler
training_generator1 = BalancedBatchGenerator(X, y, sampler=RandomUnderSampler(), batch_size=10, random_state=42)

#training_generator1
callback_history = model1.fit_generator(generator=training_generator1,
                                      epochs=10, verbose=2)

в приведенном выше коде, если я раскомментирую #import keras иизменение с tf.keras на keras ОШИБКА похожа, похоже на некоторую разницу в API-интерфейсах keras и tf.keras

...