ValueError: Форма ввода для «Flatten» не полностью определена (полученный (0, 1, 4, 512)). Убедитесь, что передали полный «input_shape» - PullRequest
0 голосов
/ 20 октября 2019

Я пытаюсь реализовать этот проект https://github.com/imatge-upc/activitynet-2016-cvprw

Это код, который я пытаюсь запустить

import argparse

import numpy as np
from keras.layers import (LSTM, BatchNormalization, Convolution3D, Dense, Dropout, Flatten, Input,
                          MaxPooling3D, TimeDistributed, ZeroPadding3D)
from keras.models import Model, Sequential

from src.data import import_labels
from src.io_data import get_duration, get_num_frames, video_to_array
from src.processing import activity_localization, get_classification, smoothing


def run_all_pipeline(input_video, smoothing_k, activity_threshold):
    input_size = (112, 112)
    length = 16

    # Load labels
    with open('dataset/labels.txt', 'r') as f:
        labels = import_labels(f)

    print('Reading Video...')
    video_array = video_to_array(input_video, resize=input_size)
    if video_array is None:
        raise Exception('The video could not be read')
    nb_frames = get_num_frames(input_video)
    duration = get_duration(input_video)
    fps = nb_frames / duration
    print('Duration: {:.1f}s'.format(duration))
    print('FPS: {:.1f}'.format(fps))
    print('Number of frames: {}'.format(nb_frames))

    nb_clips = nb_frames // length
    video_array = video_array.transpose(1, 0, 2, 3)
    video_array = video_array[:nb_clips * length, :, :, :]
    video_array = video_array.reshape((nb_clips, length, 3, 112, 112))
    video_array = video_array.transpose(0, 2, 1, 3, 4)

    # Load C3D model and mean
    print('Loading C3D network...')
    model = C3D_conv_features(True)
    model.compile(optimizer='sgd', loss='mse')
    mean_total = np.load('data/models/c3d-sports1M_mean.npy')
    mean = np.mean(mean_total, axis=(0, 2, 3, 4), keepdims=True)

    # Extract features
    print('Extracting features...')
    X = video_array - mean
    Y = model.predict(X, batch_size=1, verbose=1)

    # Load the temporal localization network
    print('Loading temporal localization network...')
    model_localization = temporal_localization_network(True)
    model_localization.compile(
        optimizer='rmsprop', loss='categorical_crossentropy')

    # Predict with the temporal localization network
    print('Predicting...')
    Y = Y.reshape(nb_clips, 1, 4096)
    prediction = model_localization.predict(Y, batch_size=1, verbose=1)
    prediction = prediction.reshape(nb_clips, 201)

    # Post processing the predited output
    print('Post-processing output...')
    labels_idx, scores = get_classification(prediction, k=5)
    print('Video: {}\n'.format(input_video))
    print('Classification:')
    for idx, score in zip(labels_idx, scores):
        label = labels[idx]
        print('{:.4f}\t{}'.format(score, label))

    prediction_smoothed = smoothing(prediction, k=smoothing_k)
    activities_idx, startings, endings, scores = activity_localization(
        prediction_smoothed, activity_threshold)

    print('\nDetection:')
    print('Score\tInterval\t\tActivity')
    for idx, s, e, score in zip(activities_idx, startings, endings, scores):
        start = s * float(length) / fps
        end = e * float(length) / fps
        label = labels[idx]
        print(
            '{:.4f}\t{:.1f}s - {:.1f}s\t\t{}'.format(score, start, end, label))


def C3D_conv_features(summary=False):
    """ Return the Keras model of the network until the fc6 layer where the
    convolutional features can be extracted.
    """
    from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D
    from keras.layers.core import Dense, Dropout, Flatten
    from keras.models import Sequential

    model = Sequential()
    # 1st layer group
    model.add(
        Convolution3D(
            64,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv1',
            subsample=(1, 1, 1),
            input_shape=(3, 16, 112, 112),
            trainable=False))
    model.add(
        MaxPooling3D(
            pool_size=(1, 2, 2),
            strides=(1, 2, 2),
            border_mode='valid',
            name='pool1'))
    # 2nd layer group
    model.add(
        Convolution3D(
            128,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv2',
            subsample=(1, 1, 1),
            trainable=False))
    model.add(
        MaxPooling3D(
            pool_size=(2, 2, 2),
            strides=(2, 2, 2),
            border_mode='valid',
            name='pool2'))
    # 3rd layer group
    model.add(
        Convolution3D(
            256,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv3a',
            subsample=(1, 1, 1),
            trainable=False))
    model.add(
        Convolution3D(
            256,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv3b',
            subsample=(1, 1, 1),
            trainable=False))
    model.add(
        MaxPooling3D(
            pool_size=(2, 2, 2),
            strides=(2, 2, 2),
            border_mode='valid',
            name='pool3'))
    # 4th layer group
    model.add(
        Convolution3D(
            512,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv4a',
            subsample=(1, 1, 1),
            trainable=False))
    model.add(
        Convolution3D(
            512,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv4b',
            subsample=(1, 1, 1),
            trainable=False))
    model.add(
        MaxPooling3D(
            pool_size=(2, 2, 2),
            strides=(2, 2, 2),
            border_mode='valid',
            name='pool4'))
    # 5th layer group
    model.add(
        Convolution3D(
            512,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv5a',
            subsample=(1, 1, 1),
            trainable=False))
    model.add(
        Convolution3D(
            512,
            3,
            3,
            3,
            activation='relu',
            border_mode='same',
            name='conv5b',
            subsample=(1, 1, 1),
            trainable=False))
    model.add(ZeroPadding3D(padding=(0, 1, 1), name='zeropadding'))
    model.add(
        MaxPooling3D(
            pool_size=(2, 2, 2),
            strides=(2, 2, 2),
            border_mode='valid',
            name='pool5'))
    model.add(Flatten(name='flatten'))
    # FC layers group
    model.add(Dense(4096, activation='relu', name='fc6', trainable=False))
    model.add(Dropout(.5, name='do1'))
    model.add(Dense(4096, activation='relu', name='fc7'))
    model.add(Dropout(.5, name='do2'))
    model.add(Dense(487, activation='softmax', name='fc8'))

    # Load weights
    model.load_weights('data/models/c3d-sports1M_weights.h5')

    for _ in range(4):
        model.pop_layer()

    if summary:
        print(model.summary())
    return model


def temporal_localization_network(summary=False):
    input_features = Input(batch_shape=(1, 1, 4096, ), name='features')
    input_normalized = BatchNormalization(name='normalization')(input_features)
    input_dropout = Dropout(p=.5)(input_normalized)
    lstm = LSTM(
        512, return_sequences=True, stateful=True, name='lsmt1')(input_dropout)
    output_dropout = Dropout(p=.5)(lstm)
    output = TimeDistributed(
        Dense(201, activation='softmax'), name='fc')(output_dropout)

    model = Model(input=input_features, output=output)
    model.load_weights('data/models/temporal-location_weights.hdf5')

    if summary:
        model.summary()
    return model


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description=
        'Run all pipeline. Given a video, classify it and temporal localize the activity on it'
    )

    parser.add_argument(
        '-i',
        '--input-video',
        type=str,
        dest='input_video',
        help='Path to the input video')
    parser.add_argument(
        '-k',
        type=int,
        dest='smoothing_k',
        default=5,
        help='Smoothing factor at post-processing (default: %(default)s)')
    parser.add_argument(
        '-t',
        type=float,
        dest='activity_threshold',
        default=.2,
        help='Activity threshold at post-processing (default: %(default)s)')

    args = parser.parse_args()

    run_all_pipeline(args.input_video, args.smoothing_k,
                     args.activity_threshold)

Но я получаю этот вывод

Using Theano backend.
Reading Video...
Duration: 14.0s
FPS: 30.0
Number of frames: 419
Loading C3D network...
scripts/run_all_pipeline.py:108: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(64, (3, 3, 3), name="conv1", activation="relu", trainable=False, input_shape=(3, 16, 11..., padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:114: UserWarning: Update your `MaxPooling3D` call to the Keras 2 API: `MaxPooling3D(padding="valid", strides=(1, 2, 2), name="pool1", pool_size=(1, 2, 2))`
  name='pool1'))
scripts/run_all_pipeline.py:126: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(128, (3, 3, 3), name="conv2", activation="relu", trainable=False, padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:132: UserWarning: Update your `MaxPooling3D` call to the Keras 2 API: `MaxPooling3D(padding="valid", strides=(2, 2, 2), name="pool2", pool_size=(2, 2, 2))`
  name='pool2'))
scripts/run_all_pipeline.py:144: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(256, (3, 3, 3), name="conv3a", activation="relu", trainable=False, padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:155: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(256, (3, 3, 3), name="conv3b", activation="relu", trainable=False, padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:161: UserWarning: Update your `MaxPooling3D` call to the Keras 2 API: `MaxPooling3D(padding="valid", strides=(2, 2, 2), name="pool3", pool_size=(2, 2, 2))`
  name='pool3'))
scripts/run_all_pipeline.py:173: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(512, (3, 3, 3), name="conv4a", activation="relu", trainable=False, padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:184: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(512, (3, 3, 3), name="conv4b", activation="relu", trainable=False, padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:190: UserWarning: Update your `MaxPooling3D` call to the Keras 2 API: `MaxPooling3D(padding="valid", strides=(2, 2, 2), name="pool4", pool_size=(2, 2, 2))`
  name='pool4'))
scripts/run_all_pipeline.py:202: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(512, (3, 3, 3), name="conv5a", activation="relu", trainable=False, padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:213: UserWarning: Update your `Conv3D` call to the Keras 2 API: `Conv3D(512, (3, 3, 3), name="conv5b", activation="relu", trainable=False, padding="same", strides=(1, 1, 1))`
  trainable=False))
scripts/run_all_pipeline.py:220: UserWarning: Update your `MaxPooling3D` call to the Keras 2 API: `MaxPooling3D(padding="valid", strides=(2, 2, 2), name="pool5", pool_size=(2, 2, 2))`
  name='pool5'))
Traceback (most recent call last):
  File "scripts/run_all_pipeline.py", line 287, in <module>
    args.activity_threshold)
  File "scripts/run_all_pipeline.py", line 42, in run_all_pipeline
    model = C3D_conv_features(True)
  File "scripts/run_all_pipeline.py", line 221, in C3D_conv_features
    model.add(Flatten(name='flatten'))
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/sequential.py", line 182, in add
    output_tensor = layer(self.outputs[0])
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/base_layer.py", line 506, in __call__
    output_shape = self.compute_output_shape(input_shape)
  File "/usr/local/lib/python2.7/dist-packages/keras/layers/core.py", line 501, in compute_output_shape
    '(got ' + str(input_shape[1:]) + '). '
ValueError: The shape of the input to "Flatten" is not fully defined (got (0, 1, 4, 512)). Make sure to pass a complete "input_shape" or "batch_input_shape" argument to the first layer in your model.

Я пробовал доступные решения, такие как изменение порядка input_shape или изменение kera backend на theano / tenorflow, но это не решило проблему. Кто-нибудь может помочь?

...