Я пытаюсь получить доступ к некоторым промежуточным слоям в BERT. Как следует из этого вопроса stackoverflow , я пытаюсь получить доступ к этим промежуточным слоям, получив тензор по их имени.
Мне удалось успешно получить к ним доступ с помощью следующего кода (для первого слоя)
import tensorflow_hub as hub
import tensorflow as tf
import os
import numpy as np
bert = hub.Module(
bert_path,
trainable=False,
)
ids = [[101,1,2,447,575,6,102]]
masks = [[1,1,1,1,1,1,1]]
segments = [[1,1,1,1,1,1,1]]
input_dict = {'module/input_ids:0': ids,
'module/input_mask:0': masks,
'module/segment_ids:0': segments}
tensor_name = 'module/bert/encoder/layer_{}/output/LayerNorm/batchnorm/add_1:0'.format(1)
output = tf.get_default_graph().get_tensor_by_name(tensor_name)
with tf.Session() as s:
s.run(tf.initializers.global_variables())
first_layer_output = s.run(output, feed_dict=input_dict)
Однако я не совсем уверен, как это сделать с Керасом. Я пытался использовать пользовательские слои, как это
import tensorflow as tf
from config import BERT_MODEL_HUB
import tensorflow_hub as hub
class BertLayer(tf.layers.Layer):
def __init__(self, layer_num, n_fine_tune_layers=None, **kwargs):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
self.layer_num = layer_num
super(BertLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.bert = hub.Module(
BERT_MODEL_HUB,
trainable=self.trainable,
name="{}_module".format(self.name)
)
trainable_vars = self.bert.variables
# Remove unused layers
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
# Select how many layers to fine tune
if self.n_fine_tune_layers is not None:
trainable_vars = trainable_vars[-self.n_fine_tune_layers :]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [tf.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"sequence_output"
]
tensor_name = 'bert_layer/bert_layer_1_module/bert/encoder/layer_{}/output/LayerNorm/batchnorm/add_1:0'.format(self.layer_num)
output = tf.get_default_graph().get_tensor_by_name(tensor_name)
return output
def get_config(self):
config = {'n_fine_tune_layers': self.n_fine_tune_layers}
base_config = super(BertLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
и используйте этот слой вот так
from bert_layer_no_max import BertLayer
from tensorflow.keras.layers import Input
import tensorflow as tf
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '7'
in_id = Input(shape=(None,), name="input_ids")
in_mask = Input(shape=(None,), name="input_masks")
in_segment = Input(shape=(None,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
bert_output = BertLayer(layer_num=11)(bert_inputs)
model = tf.keras.Model(inputs = bert_inputs, outputs = bert_output)
model.summary()
def initialize_vars(sess):
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
tf.keras.backend.set_session(sess)
with tf.Session() as s:
initialize_vars(s)
result = model.predict([[[101,1,1,102]], [[1,1,1,1]], [[1,1,1,1]]])
print(result)
print(result.shape)
но это приводит к таким ошибкам
Traceback (most recent call last):
File "test.py", line 24, in <module>
result = model.predict([[[101,1,1,102]], [[1,1,1,1]], [[1,1,1,1]]])
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1113, in predict
self, x, batch_size=batch_size, verbose=verbose, steps=steps)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 329, in model_iteration
batch_outs = f(ins_batch)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/keras/backend.py", line 3076, in __call__
run_metadata=self.run_metadata)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1439, in __call__
run_metadata_ptr)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'bert_layer/bert_layer_1_module/input_ids' with dtype int32 and shape [?,?]
[[{{node bert_layer/bert_layer_1_module/input_ids}}]]
[[{{node bert_layer/bert_layer_1_module/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1}}]]
каков правильный путь для этого?