Tensorflow XLM Роберта Мульти-Класс - PullRequest
0 голосов
/ 30 мая 2020

Я пытаюсь точно настроить модель классификации последовательности XLMRoberta. У меня есть массив текстовых фрагментов от врачей с номерами 1-8 с различными диагностическими показаниями c. Я создал объект набора данных тензорного потока с помощью следующей функции:

import tensorflow as tf
from transformers import XLMRobertaTokenizer

from tensorflow.keras.utils import to_categorical

tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

def map_example_to_dict(input_ids, attention_masks, label):
    return {
      "input_ids": input_ids,
      "attention_mask": attention_masks,
    }, label

def convert_raw_to_xlmroberta_tfdataset(df_merge):
    Y = list(df_merge['ANS'])
    Y = [x-1 for x in Y]


    input_ids = []
    attention_mask = []
    labels = to_categorical(Y,num_classes=8) #one-hot encodes
    input_labels = []
    for i, text in enumerate(df_merge['Diagnostic Test Indication'].values):
        nlp_input = tokenizer.encode_plus(
                                text,                      
                                add_special_tokens = True, 
                                max_length = 300, #
                                pad_to_max_length = True, #
                                return_attention_mask = True, 
                  )

        input_ids.append(nlp_input['input_ids'])
        attention_mask.append(nlp_input['attention_mask'])
        input_labels.append([labels[i]])

    return tf.data.Dataset.from_tensor_slices((input_ids,
                                               attention_mask,input_labels)).map(map_example_to_dict)
# train dataset
batch_size = 100
ds_train_encoded = convert_raw_to_xlmroberta_tfdataset(df_merge).shuffle(10000).batch(batch_size)

Затем я создаю модель с помощью:

from transformers import TFXLMRobertaForSequenceClassification
import tensorflow as tf


# recommended learning rate for Adam 5e-5, 3e-5, 2e-5
learning_rate = 2e-5
# we will do just 1 epoch for illustration, though multiple epochs might be better as long as we will not overfit the model
number_of_epochs = 2
# model initialization
model = TFXLMRobertaForSequenceClassification.from_pretrained("jplu/tf-xlm-roberta-base")
# classifier Adam recommended
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08)
# we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy
loss = tf.keras.losses.SparseCategoricalCrossentropy()
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

После подбора модели я постоянно получаю ошибку:

Epoch 1/2

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-71-09e67b5e4b06> in <module>
----> 1 history = model.fit(ds_train_encoded, epochs=number_of_epochs)

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
     63   def _method_wrapper(self, *args, **kwargs):
     64     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
---> 65       return method(self, *args, **kwargs)
     66 
     67     # Running inside `run_distribute_coordinator` already.

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    781                 batch_size=batch_size):
    782               callbacks.on_train_batch_begin(step)
--> 783               tmp_logs = train_function(iterator)
    784               # Catch OutOfRangeError for Datasets of unknown size.
    785               # This blocks until the batch has finished executing.

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
    577         xla_context.Exit()
    578     else:
--> 579       result = self._call(*args, **kwds)
    580 
    581     if tracing_count == self._get_tracing_count():

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
    624       # This is the first call of __call__, so we have to initialize.
    625       initializers = []
--> 626       self._initialize(args, kwds, add_initializers_to=initializers)
    627     finally:
    628       # At this point we know that the initialization is complete (or less

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
    504     self._concrete_stateful_fn = (
    505         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
--> 506             *args, **kwds))
    507 
    508     def invalid_creator_scope(*unused_args, **unused_kwds):

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2444       args, kwargs = None, None
   2445     with self._lock:
-> 2446       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2447     return graph_function
   2448 

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
   2775 
   2776       self._function_cache.missed.add(call_context_key)
-> 2777       graph_function = self._create_graph_function(args, kwargs)
   2778       self._function_cache.primary[cache_key] = graph_function
   2779       return graph_function, args, kwargs

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2665             arg_names=arg_names,
   2666             override_flat_arg_shapes=override_flat_arg_shapes,
-> 2667             capture_by_value=self._capture_by_value),
   2668         self._function_attributes,
   2669         # Tell the ConcreteFunction to clean up its graph once it goes out of

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    979         _, original_func = tf_decorator.unwrap(python_func)
    980 
--> 981       func_outputs = python_func(*func_args, **func_kwargs)
    982 
    983       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
    439         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    440         # the function a weak reference to itself to avoid a reference cycle.
--> 441         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    442     weak_wrapped_fn = weakref.ref(wrapped_fn)
    443 

~\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

ValueError: in user code:

    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\engine\training.py:503 train_function  *
        outputs = self.distribute_strategy.run(
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\engine\training.py:466 train_step  **
        y, y_pred, sample_weight, regularization_losses=self.losses)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\losses.py:143 __call__
        losses = self.call(y_true, y_pred)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\losses.py:246 call
        return self.fn(y_true, y_pred, **self._fn_kwargs)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\losses.py:1527 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\keras\backend.py:4579 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    C:\Users\kyle\Anaconda3\envs\py37\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1117 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1, 8) and (None, 2) are incompatible

Я пробовал как разреженную категориальную перекрестную энтропию, так и просто категориальную перекрестную энтропию. Я использовал метки с горячим кодированием и «обычные» метки. Можно ли вообще выполнить мультиклассовую классификацию с TFXLMRoberta? Это начало работать, когда я вставил двоичный фиктивный набор этикеток.

...