Я пытался обучить модель bert в tensorflow 2.0 и обнаружил ошибку (когда я нажимал model.fit). Кто-нибудь знает, что это за ошибка !? Заранее спасибо.
!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow_hub as hub
import tokenization
%%time
module_url = "https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/1"
bert_layer = hub.KerasLayer(module_url, trainable=True)
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)
def bert_encode(texts, tokenizer, max_len=512):
all_tokens = []
all_masks = []
all_segments = []
for text in texts:
text = tokenizer.tokenize(text)
text = text[:max_len-2]
input_sequence = ["[CLS]"] + text + ["[SEP]"]
pad_len = max_len - len(input_sequence)
tokens = tokenizer.convert_tokens_to_ids(input_sequence)
tokens += [0] * pad_len
pad_masks = [1] * len(input_sequence) + [0] * pad_len
segment_ids = [0] * max_len
all_tokens.append(tokens)
all_masks.append(pad_masks)
all_segments.append(segment_ids)
return np.array(all_tokens), np.array(all_masks), np.array(all_segments)
train_input = bert_encode(train.text.values, tokenizer, max_len=160)
test_input = bert_encode(test.text.values, tokenizer, max_len=160)
train_labels = train.target.values
def build_model(bert_layer, max_len=512):
input_word_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_word_ids")
input_mask = Input(shape=(max_len,), dtype=tf.int32, name="input_mask")
segment_ids = Input(shape=(max_len,), dtype=tf.int32, name="segment_ids")
pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])
clf_output = sequence_output[:, 0, :]
out = Dense(1, activation='sigmoid')(clf_output)
model = Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)
model.compile(Adam(lr=2e-6), loss='binary_crossentropy', metrics=['accuracy'])
return model
model = build_model(bert_layer, max_len=160)
model.summary()
train_history = model.fit(
train_input, train_labels,
validation_split=0.3,
epochs=3,
batch_size=12
)
** ОШИБКА:
UnboundLocalError Traceback (последний вызов последний) в 3 validation_split = 0.3, 4 эпохи = 3, ----> 5 batch_size = 12 6) /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py в _method_wrapper (self, * args, ** kwargs) 64 def _method_wrapper (self, * args, ** kwargs): 65, если не self._in_multi_worker_mode (): # pylint: disable = protected-access ---> 66 return method (self, * args, ** kwargs) 67 68 # Уже запущен внутри run_distribute_coordinator
. /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit (self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle , class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, worker, use_multiprocessing) 854 logs = tmp_logs # Нет ошибок, теперь можно назначать журналам. 855 callbacks.on_train_batch_end (step, logs) -> 856 epoch_logs = copy.copy (logs) 857 858 # Выполнить проверку.
UnboundLocalError: локальная переменная logs, на которую ссылаются перед назначением **