Ошибка при проверке цели: ожидается, что плотность_6 будет иметь 4 измерения, но получен массив с формой (20116, 10) - PullRequest
0 голосов
/ 08 января 2020

Я пытаюсь использовать модель DNN в Keras, но получаю эту ошибку:

Ошибка при проверке цели: ожидается, что плотность_6 имеет 4 измерения, но получен массив с формой (20116, 10 )

Что означает ошибка?

У меня есть данные поезда и тестового изображения, для которых приведены формы.

 print(X_train.shape, X_val.shape) # (20116, 150, 160, 1) (3550, 150, 160, 1)

Это мой действительный код (Я тренирую модель DNN):

def dnn_asr():
    model = Sequential()

    model.add(Dense(2048, input_shape=(X_train.shape[1], X_train.shape[2], 1)))
    model.add(ELU())
    # model.add(Dropout(0.05))

    model.add(Dense(2048))
    model.add(ELU())
    # model.add(Dropout(0.05))

    model.add(Dense(2048))
    model.add(ELU())
    # model.add(Dropout(0.05))

    model.add(Dense(2048))
    model.add(ELU())
    # model.add(Dropout(0.05))

    model.add(Dense(2048))
    model.add(ELU())
    # model.add(Dropout(0.05))

    model.add(Dense(257))
    # model.add(Activation('tanh'))

    model.compile(loss='mse',
                  optimizer='adam',
                  metrics=['accuracy'])

    h = model.fit(x=(X_train), y=Y_train, batch_size=32,
                  epochs=15,
                  verbose=1,
                  validation_data=((X_val), Y_val),
                  shuffle=True,
                  )

    return model

Скажите, пожалуйста, какую ошибку я делаю и как это исправить.

model.summary ()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 150, 160, 2048)    4096      
_________________________________________________________________
elu_1 (ELU)                  (None, 150, 160, 2048)    0         
_________________________________________________________________
dense_2 (Dense)              (None, 150, 160, 2048)    4196352   
_________________________________________________________________
elu_2 (ELU)                  (None, 150, 160, 2048)    0         
_________________________________________________________________
dense_3 (Dense)              (None, 150, 160, 2048)    4196352   
_________________________________________________________________
elu_3 (ELU)                  (None, 150, 160, 2048)    0         
_________________________________________________________________
dense_4 (Dense)              (None, 150, 160, 2048)    4196352   
_________________________________________________________________
elu_4 (ELU)                  (None, 150, 160, 2048)    0         
_________________________________________________________________
dense_5 (Dense)              (None, 150, 160, 2048)    4196352   
_________________________________________________________________
elu_5 (ELU)                  (None, 150, 160, 2048)    0         
_________________________________________________________________
dense_6 (Dense)              (None, 150, 160, 257)     526593    
=================================================================
Total params: 17,316,097
Trainable params: 17,316,097
Non-trainable params: 0
_________________________________________________________________

На самом деле я скачал репозиторий из github и пытаюсь изменить его модель.

Это мой код:

from __future__ import print_function
import copy
import glob
import os
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import IPython
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
from scipy.io import wavfile
from sklearn.model_selection import train_test_split

from keras import backend as K
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau
from keras.initializers import glorot_uniform
from keras.layers import Dropout, Dense, BatchNormalization, Flatten, Conv2D, MaxPooling2D
from keras.models import Sequential
from keras.utils import to_categorical

def overlap(X, window_size, window_step):
    """
    Create an overlapped version of X
    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to window and overlap
    window_size : int
        Size of windows to take
    window_step : int
        Step size between windows
    Returns
    -------
    X_strided : shape=(n_windows, window_size)
        2D array of overlapped X
    """
    if window_size % 2 != 0:
        raise ValueError("Window size must be even!")
    # Make sure there are an even number of windows before stridetricks
    append = np.zeros((window_size - len(X) % window_size))
    X = np.hstack((X, append))

    ws = window_size
    ss = window_step
    a = X

    valid = len(a) - ws
    nw = (valid) // ss
    out = np.ndarray((nw, ws), dtype=a.dtype)

    for i in range(nw):
        # "slide" the window along the samples
        start = i * ss
        stop = start + ws
        out[i] = a[start: stop]

    return out

def stft(X, fftsize=128, step=65, mean_normalize=True, real=False,
         compute_onesided=True):
    """
    Compute STFT for 1D real valued input X
    """
    if real:
        local_fft = np.fft.rfft
        cut = -1
    else:
        local_fft = np.fft.fft
        cut = None
    if compute_onesided:
        cut = fftsize // 2
    if mean_normalize:
        X -= X.mean()

    X = overlap(X, fftsize, step)

    size = fftsize
    win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(size) / (size - 1))
    X = X * win[None]
    X = local_fft(X)[:, :cut]
    return X

def pretty_spectrogram(d, log=True, thresh=5, fft_size=512, step_size=64):
    """
    creates a spectrogram
    log: take the log of the spectrgram
    thresh: threshold minimum power for log spectrogram
    """
    specgram = np.abs(stft(d, fftsize=fft_size, step=step_size, real=False,
                           compute_onesided=True))

    if log == True:
        specgram /= specgram.max()  # volume normalize to max 1
        specgram = np.log10(specgram)  # take log
        specgram[specgram < -thresh] = -thresh  # set anything less than the threshold as the threshold
    else:
        specgram[specgram < thresh] = thresh  # set anything less than the threshold as the threshold

    return specgram

# Also mostly modified or taken from https://gist.github.com/kastnerkyle/179d6e9a88202ab0a2fe
def invert_pretty_spectrogram(X_s, log=True, fft_size=512, step_size=512 / 4, n_iter=10):
    if log == True:
        X_s = np.power(10, X_s)

    X_s = np.concatenate([X_s, X_s[:, ::-1]], axis=1)
    X_t = iterate_invert_spectrogram(X_s, fft_size, step_size, n_iter=n_iter)
    return X_t

def iterate_invert_spectrogram(X_s, fftsize, step, n_iter=10, verbose=False):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    reg = np.max(X_s) / 1E8
    X_best = copy.deepcopy(X_s)
    for i in range(n_iter):
        if verbose:
            print("Runnning iter %i" % i)
        if i == 0:
            X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                                     set_zero_phase=True)
        else:
            # Calculate offset was False in the MATLAB version
            # but in mine it massively improves the result
            # Possible bug in my impl?
            X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                                     set_zero_phase=False)
        est = stft(X_t, fftsize=fftsize, step=step, compute_onesided=False)
        phase = est / np.maximum(reg, np.abs(est))
        X_best = X_s * phase[:len(X_s)]
    X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                             set_zero_phase=False)
    return np.real(X_t)

def invert_spectrogram(X_s, step, calculate_offset=True, set_zero_phase=True):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    size = int(X_s.shape[1] // 2)
    wave = np.zeros((X_s.shape[0] * step + size))
    # Getting overflow warnings with 32 bit...
    wave = wave.astype('float64')
    total_windowing_sum = np.zeros((X_s.shape[0] * step + size))
    win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(size) / (size - 1))

    est_start = int(size // 2) - 1
    est_end = est_start + size
    for i in range(X_s.shape[0]):
        wave_start = int(step * i)
        wave_end = wave_start + size
        if set_zero_phase:
            spectral_slice = X_s[i].real + 0j
        else:
            # already complex
            spectral_slice = X_s[i]

        # Don't need fftshift due to different impl.
        wave_est = np.real(np.fft.ifft(spectral_slice))[::-1]
        if calculate_offset and i > 0:
            offset_size = size - step
            if offset_size <= 0:
                print("WARNING: Large step size >50\% detected! "
                      "This code works best with high overlap - try "
                      "with 75% or greater")
                offset_size = step
            offset = xcorr_offset(wave[wave_start:wave_start + offset_size],
                                  wave_est[est_start:est_start + offset_size])
        else:
            offset = 0
        wave[wave_start:wave_end] += win * wave_est[
                                           est_start - offset:est_end - offset]
        total_windowing_sum[wave_start:wave_end] += win
    wave = np.real(wave) / (total_windowing_sum + 1E-6)
    return wave

def xcorr_offset(x1, x2):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    x1 = x1 - x1.mean()
    x2 = x2 - x2.mean()
    frame_size = len(x2)
    half = frame_size // 2
    corrs = np.convolve(x1.astype('float32'), x2[::-1].astype('float32'))
    corrs[:half] = -1E30
    corrs[-half:] = -1E30
    offset = corrs.argmax() - len(x1)
    return offset

def pad_audio(samples, t=1, L=8000):
    if len(samples) >= t * L:
        return samples
    else:
        return np.pad(samples, pad_width=(L - len(samples), 0), mode='constant', constant_values=(0, 0))

def chop_audio(samples, t=1, L=8000):
    if len(samples) <= t * L:
        return samples
    else:
        return samples[0:L]

def get_data(paths, label):
    new_sample_rate = 8000
    max_len = 20
    X = []
    Y = []

    for i, fname in enumerate(paths):
        sample_rate, samples = wavfile.read(fname)
        resampled = signal.resample(samples, int(8000 / sample_rate * samples.shape[0]))
        samples = pad_audio(resampled)
        samples = chop_audio(samples)
        specgram = pretty_spectrogram(samples.astype('float64'), fft_size=fft_size,
                                      step_size=int(step_size), log=True, thresh=spec_thresh)
        Y.append(label)
        X.append(specgram)

        if i == len(paths) - 1:
            end = '\n'
        else:
            end = '\r'
        print('processed {}/{}'.format(i + 1, len(paths)), end=end)

    Y = to_categorical(Y, 10)

    return X, Y

### Parameters ###
L = 8000  # sample rate 
fft_size = 320  # window size for the FFT
step_size = fft_size / 6  # distance to slide along the window (in time)
spec_thresh = 4  # threshold for spectrograms (lower filters out more noise)
lowcut = 500  # Hz # Low cut for our butter bandpass filter
highcut = 8000  # Hz # High cut for our butter bandpass filter

# Grab your wav and filter it
mywav = 'zero/0132a06d_nohash_1.wav'
rate, data = wavfile.read(mywav)
data = signal.resample(data, int(8000 / rate * data.shape[0]))
print('Original sample rate: ', rate)
rate = 8000
data = pad_audio(data)
data = chop_audio(data)
#data = butter_bandpass_filter(data, lowcut, highcut, rate, order=1)
# Only use a short clip for our demo
if np.shape(data)[0]/float(rate) > 10:
    data = data[0:rate*10]
print('Length in time (s): ', np.shape(data)[0]/float(rate))

wav_spectrogram = pretty_spectrogram(data.astype('float64'), fft_size = fft_size,
                                   step_size = int(step_size), log = True, thresh = spec_thresh)

fig, ax = plt.subplots(nrows=1,ncols=1, figsize=(6,6))
cax = ax.matshow(np.transpose(wav_spectrogram), interpolation='nearest', aspect='auto', cmap=plt.cm.afmhot, origin='lower')
fig.colorbar(cax)
plt.title('Original Spectrogram')
plt.show()
recovered_audio_orig = invert_pretty_spectrogram(wav_spectrogram, fft_size = fft_size,
                                            step_size = int(step_size), log = True, n_iter = 10)
print(recovered_audio_orig.shape)


data_dir=os.path.join('')
paths_a=glob.glob(os.path.join(data_dir,'one','*.wav'))
paths_b=glob.glob(os.path.join(data_dir,'two','*.wav'))
paths_c=glob.glob(os.path.join(data_dir,'three','*.wav'))
paths_d=glob.glob(os.path.join(data_dir,'four','*.wav'))
paths_e=glob.glob(os.path.join(data_dir,'five','*.wav'))
paths_f=glob.glob(os.path.join(data_dir,'six','*.wav'))
paths_g=glob.glob(os.path.join(data_dir,'seven','*.wav'))
paths_h=glob.glob(os.path.join(data_dir,'eight','*.wav'))
paths_i=glob.glob(os.path.join(data_dir,'nine','*.wav'))
paths_j=glob.glob(os.path.join(data_dir,'zero','*.wav'))
paths_all=paths_a+paths_b+paths_c+paths_d+paths_e+paths_f+paths_g+paths_h+paths_i+paths_j

X_a, Y_a=get_data(paths_a,1)
X_b, Y_b=get_data(paths_b,2)
X = np.concatenate((X_a, X_b), axis=0)
Y = np.concatenate((Y_a, Y_b), axis=0)
X_a, Y_a=get_data(paths_c,3)
X_b, Y_b=get_data(paths_d,4)
X = np.concatenate((X, X_a, X_b), axis=0)
Y = np.concatenate((Y, Y_a, Y_b), axis=0)
X_a, Y_a=get_data(paths_e,5)
X_b, Y_b=get_data(paths_f,6)
X = np.concatenate((X, X_a, X_b), axis=0)
Y = np.concatenate((Y, Y_a, Y_b), axis=0)
X_a, Y_a=get_data(paths_g,7)
X_b, Y_b=get_data(paths_h,8)
X = np.concatenate((X, X_a, X_b), axis=0)
Y = np.concatenate((Y, Y_a, Y_b), axis=0)
del X_b
del Y_b
X_a, Y_a=get_data(paths_i,9)
X = np.concatenate((X, X_a), axis=0)
Y = np.concatenate((Y, Y_a), axis=0)
X_a, Y_a=get_data(paths_j,0)
X = np.concatenate((X, X_a), axis=0)
Y = np.concatenate((Y, Y_a), axis=0)
print(X.shape, Y.shape)

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.15, random_state=5, shuffle=True)
X_train = X_train.reshape((X_train.shape[0],X_train.shape[1], X_train.shape[2],1))
X_val = X_val.reshape(X_val.shape[0],X_val.shape[1], X_val.shape[2],1)
print(X_train.shape, X_val.shape)


def custom():
    model = Sequential()
    model.add(Conv2D(16, kernel_size=(7, 7), name='one', activation='relu', padding='same',
                     input_shape=(X_train.shape[1], X_train.shape[2], 1), kernel_initializer=glorot_uniform(seed=0)))
    model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(16, kernel_size=(5, 5), name='two', activation='relu', padding='same',
                     kernel_initializer=glorot_uniform(seed=0)))
    model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(32, kernel_size=(3, 3), name='three', activation='relu', padding='same',
                     kernel_initializer=glorot_uniform(seed=0)))
    model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model

# K.tf_backend.clear_session()

model = custom()
model.summary()

K.set_value(model.optimizer.lr,0.001)
# model_check_point = Model(filepath='model.hdf5', save_best_only=True)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
                                            patience=3,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=0.00001)

h=model.fit(x=(X_train), y=Y_train, batch_size=32,
            epochs=15,
            verbose=1,
            validation_data=((X_val),Y_val),
            shuffle=True,
            # callbacks=[learning_rate_reduction,model_check_point]
            )

любая помощь будет оценена!

...