Как использовать (imblearn.keras import BalancedBatchGenerator) с более чем двумя массивами демонов X_train? - PullRequest
0 голосов
/ 16 апреля 2019

Я строю модель CNN, обученную на несбалансированном наборе данных с использованием Keras. Я работаю над повторной выборкой данных с помощью imblearn.keras.balanced_batch_generator предоставлено imblearn.

Мой массив x_train имеет форму (n_samples, 32, 32, 1), в то время как fit_generator для balance_batch_generator принимает ввод для x_train с формой (n_samples, n_features).

Как включить размеры изображений (32, 32, 1), чтобы они составляли один dem n_features?

train = np.array(train,dtype="float32") #as mnist
train_labels = np.array(train_labels,dtype="float32") #as mnist
train = np.reshape(train,(-1,64,64,1))

вывод:

(9098, 64, 64, 1)
(9098, 1)

загрузить данные для CNN:

x_train = np.load(open(r'C:\...\train.npy', 'rb'))
y_train = np.load(open(r'C:\...\train_labels.npy', 'rb'))
y_train = keras.utils.to_categorical(y_train, num_classes = 5)

from imblearn.keras import BalancedBatchGenerator
from imblearn.keras import balanced_batch_generator

from imblearn.under_sampling import NearMiss


training_set_generator = balanced_batch_generator( # Create Training set
        x_train, y_train,
        sampler=NearMiss())



validation_set_generator = balanced_batch_generator( # Create Testing set
        x_valid, y_valid,
        sampler=NearMiss())


#STEP_SIZE_TRAIN=training_set_generator.n//training_set_generator.batch_size
#STEP_SIZE_VALID=validation_set_generator.n//validation_set_generator.batch_size

history = classifier.fit_generator(generator=training_set_generator, # Fit it to the training set and tested it on the testing set
        validation_data=validation_set_generator,
        epochs=10)

Ошибка массажа:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-63-32bb382e1b13> in <module>
     11 training_set_generator = balanced_batch_generator( # Create Training set
     12         x_train, y_train,
---> 13         sampler=NearMiss())
     14 
     15 

~\Anaconda3\lib\site-packages\imblearn\keras\_generator.py in balanced_batch_generator(X, y, sample_weight, sampler, batch_size, keep_sparse, random_state)
    233     return tf_bbg(X=X, y=y, sample_weight=sample_weight,
    234                   sampler=sampler, batch_size=batch_size,
--> 235                   keep_sparse=keep_sparse, random_state=random_state)

~\Anaconda3\lib\site-packages\imblearn\tensorflow\_generator.py in balanced_batch_generator(X, y, sample_weight, sampler, batch_size, keep_sparse, random_state)
    129         if sampler_.__class__.__name__ not in DONT_HAVE_RANDOM_STATE:
    130             set_random_state(sampler_, random_state)
--> 131     sampler_.fit_resample(X, y)
    132     if not hasattr(sampler_, 'sample_indices_'):
    133         raise ValueError("'sampler' needs to have an attribute "

~\Anaconda3\lib\site-packages\imblearn\base.py in fit_resample(self, X, y)
     78         self._deprecate_ratio()
     79 
---> 80         X, y, binarize_y = self._check_X_y(X, y)
     81 
     82         self.sampling_strategy_ = check_sampling_strategy(

~\Anaconda3\lib\site-packages\imblearn\base.py in _check_X_y(X, y)
    136     def _check_X_y(X, y):
    137         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
--> 138         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
    139         return X, y, binarize_y
    140 

~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
    754                     ensure_min_features=ensure_min_features,
    755                     warn_on_dtype=warn_on_dtype,
--> 756                     estimator=estimator)
    757     if multi_output:
    758         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,

~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    568         if not allow_nd and array.ndim >= 3:
    569             raise ValueError("Found array with dim %d. %s expected <= 2."
--> 570                              % (array.ndim, estimator_name))
    571         if force_all_finite:
    572             _assert_all_finite(array,

ValueError: Found array with dim 4. Estimator expected <= 2.

...