декодирование tfrecord, содержащего видеокадры - PullRequest
0 голосов
/ 10 апреля 2019
I'm trying to decode a tfrecord file containing video frames, labels and 
   other important features to train a CRNN.

   *Eager execution is enabled*

   Two things are stopping me from progressing

$ 1.Когда я распечатываю Тензор, содержащий, например, мои декодированные метки, это вывод, который я получаю:

   Tensor("ParseSingleExample/ParseSingleExample:4", shape=(), 
   dtype=int64)

   Since I have eager execution enabled this output makes me believe that 
   this tensor is empty.

$ 2.Когда я пытаюсь перебрать возвращенные значения моей функции декодирования (фреймы, метки), я получаю эту ошибку:

   slice index 246 of dimension 0 out of bounds.
     [[Node: map/while/strided_slice = StridedSlice[Index=DT_INT64, 
         T=DT_STRING, begin_mask=0, ellipsis_mask=0, end_mask=0, 
         new_axis_mask=0, shrink_axis_mask=1] 
(map/while/strided_slice/Enter, 
         map/while/strided_slice/stack, map/while/strided_slice/stack_1, 
         map/while/strided_slice/Cast)]] [Op:IteratorGetNextSync]

$ Ниже я приведу сначала код, который использовал для генерации tfrecord изатем код, который я использую для его декодирования.

    def _int64_feature(value):
        """Wrapper for inserting int64 features into Example proto."""
        if not isinstance(value, list):
            value = [value]
        return 
tf.train.Feature(int64_list=tf.train.Int64List(value=value))

    def _bytes_feature(value):
        """Wrapper for inserting bytes features into Example proto."""
        return tf.train.Feature(bytes_list=tf.train.BytesList(value= 
[value]))

    def _bytes_list_feature(values):
        """Wrapper for inserting bytes features into Example proto."""
        return 
tf.train.Feature(bytes_list=tf.train.BytesList(value=values))


    def load_image(addr):
        # read an image and resize to (224, 224)
        # cv2 load images as BGR, convert it to RGB
        img = cv2.imread(addr)
        if img is None:
            return None
        img = cv2.resize(img, (150, 150), interpolation=cv2.INTER_CUBIC)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return img


    path_frames = 'C:/path_to_frames'
    output_file = 'test.tfrecords'
    print(output_file)

    def count_frames(path_frames):
      list = os.listdir(path_frames) 
      number_files = len(list)
      return number_files

    count = 0
    with tf.python_io.TFRecordWriter(output_file) as writer:

      cnt_frames = count_frames(path_frames)
      for file in glob.glob(path_frames+"*.jpg"):
        realpath, name = os.path.split(file)
        filename, file_extension = os.path.splitext(name)
        filename_split = filename.split('-')
        name_real =filename_split[0]

        if name_real == 'walk':
          class_id = 1
          class_label = 'walk'
        elif name_real == 'running':
          class_id = 2
          class_label = 'running'

        # Read and resize all video frames, np.uint8 of size [N,H,W,3]
        frames = load_image(file)


        features = {}
        features['num_frames']  = _int64_feature(cnt_frames)               
        features['height']      = _int64_feature(frames.shape[0])          
        features['width']       = _int64_feature(frames.shape[1])          
        features['channels']    = _int64_feature(frames.shape[2])          
        features['class_label'] = _int64_feature(class_id)                 
        features['class_text']  = 
    _bytes_feature(tf.compat.as_bytes(class_label))
        features['filename']    = 
 _bytes_feature(tf.compat.as_bytes(file))

        # Compress the frames using JPG and store in as a list of strings 
 in 
    'frames'
        encoded_frames = 
    [tf.compat.as_bytes(cv2.imencode(".jpg",frame[1].tobytes())
                          for frame in frames]
        features['frames'] = _bytes_list_feature(encoded_frames)
        print(file)

        tfrecord_example = 
    tf.train.Example(features=tf.train.Features(feature=features))
        writer.write(tfrecord_example.SerializeToString())

        count +=1




_______________________________________________________________________





    def decode(serialized_example):
      # Prepare feature list; read encoded JPG images as bytes
      features = dict()
      features["class_label"] = tf.FixedLenFeature((), tf.int64)
      features["frames"] = tf.VarLenFeature(tf.string)
      features["num_frames"] = tf.FixedLenFeature((), tf.int64)
      features['height'] = tf.FixedLenFeature((), tf.int64)
      features['width'] = tf.FixedLenFeature((), tf.int64)
      features['channels'] = tf.FixedLenFeature((), tf.int64)
      # Parse into tensors
      parsed_features = tf.parse_single_example(serialized_example, 
    features)

      # Randomly sample offset from the valid range.
      SEQ_NUM_FRAMES = len(features["num_frames"]) 

      random_offset = tf.random_uniform(
          shape=(), minval=0,
          maxval=parsed_features["num_frames"] - SEQ_NUM_FRAMES, 
    dtype=tf.int64)

      offsets = tf.range(random_offset, random_offset + SEQ_NUM_FRAMES)

      # Decode the encoded JPG images
      images = tf.map_fn(lambda i: 
    tf.image.decode_jpeg(parsed_features["frames"].values[i]),
                         offsets)

      images = tf.cast(images, tf.uint8)
      label = tf.cast(parsed_features["class_label"], tf.int64)
      return images, label

    filenames = 'test.tfrecords'

    dataset = tf.data.TFRecordDataset(filenames) 
    dataset = dataset.map(decode)
    dataset = dataset.batch(150)
    iterator = dataset.make_one_shot_iterator()
    one_full_video, label_frames = iterator.get_next()

$ Кто-нибудь сталкивался с такой же или похожей проблемой?

...