0 голосов
/ 29 апреля 2020

Я хочу расширить выход сети до двух выходов в этом коде

 def tag_dataset(self, dataset, model):
    """Tag data with numerical values"""
    correctLabels = []
    predLabels = []

    for i, data in enumerate(dataset):
        tokens, casing, char, labels = data
        tokens = np.asarray([tokens])
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  
    return predLabels, correctLabels

def buildModel(self):
    """Model layers"""
    # character input
    character_input = Input(shape=(None, 52,), name="Character_input")
    embed_char_out = TimeDistributed(
        Embedding(len(self.char2Idx), 30, embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5)), name="Character_embedding")(

    dropout = Dropout(self.dropout)(embed_char_out)

    # CNN
    conv1d_out = TimeDistributed(Conv1D(kernel_size=self.conv_size, filters=30, padding='same', activation='tanh', strides=1), name="Convolution")(dropout)
    maxpool_out = TimeDistributed(MaxPooling1D(52), name="Maxpool")(conv1d_out)
    char = TimeDistributed(Flatten(), name="Flatten")(maxpool_out)
    char = Dropout(self.dropout)(char)

    # word-level input
    words_input = Input(shape=(None,), dtype='int32', name='words_input')
    words = Embedding(input_dim=self.wordEmbeddings.shape[0], output_dim=self.wordEmbeddings.shape[1], weights=[self.wordEmbeddings],

    # case-info input
    casing_input = Input(shape=(None,), dtype='int32', name='casing_input')
    casing = Embedding(output_dim=self.caseEmbeddings.shape[1], input_dim=self.caseEmbeddings.shape[0], weights=[self.caseEmbeddings],

    # concat & BLSTM
    output = concatenate([words, casing, char])
    output = Bidirectional(LSTM(self.lstm_state_size, 
                                dropout=self.dropout,                        # on input to each LSTM block
                                recurrent_dropout=self.dropout_recurrent     # on recurrent input signal
                               ), name="BLSTM")(output)
    output1 = TimeDistributed(Dense(len(self.label2Idx), activation='softmax'),name="Softmax_layer1")(output)
    output2 = TimeDistributed(Dense(len(self.label2Idx), activation='softmax'),name="Softmax_layer2")(output)    #This line is added

    # set up model
    self.model = Model(inputs=[words_input, casing_input, character_input], outputs= [output1, output2])
    self.model.compile(loss='sparse_categorical_crossentropy', loss_weights=[0.5, 0.5], optimizer=self.optimizer)

    self.init_weights = self.model.get_weights()

    plot_model(self.model, to_file='model.png')
    print("Model built. Saved model.png\n")

def train(self):
    """Default training"""

    self.f1_test_history = []
    self.f1_dev_history = []

    for epoch in range(self.epochs):    
        print("Epoch {}/{}".format(epoch, self.epochs))
        for i,batch in enumerate(iterate_minibatches(self.train_batch,self.train_batch_len)):

            labels, tokens, casing, char = batch 

            self.model.train_on_batch([tokens, casing, char], [labels, labels] )

        # compute F1 scores
        predLabels, correctLabels = self.tag_dataset(self.test_batch, self.model)
        pre_test, rec_test, f1_test = compute_f1(predLabels, correctLabels, self.idx2Label)

Исходный код в https://github.com/mxhofer/Named-Entity-Recognition-BidirectionalLSTM-CNN-CoNLL.git.

Достаточно просто добавить этот плотный слой для получения двух выходов? Я добавил этот плотный слой, но он выдает ошибку для `compute_f1 ', которая определяется как

def compute_f1(predictions, correct, idx2Label):
label_pred = []
for sentence in predictions:
    label_pred.append([idx2Label[element] for element in sentence])

label_correct = []
for sentence in correct:
    label_correct.append([idx2Label[element] for element in sentence])

# print("predictions ", len(label_pred))
# print("correct labels ", len(label_correct))

prec = compute_precision(label_pred, label_correct)
rec = compute_precision(label_correct, label_pred)

f1 = 0
if (rec + prec) > 0:
    f1 = 2.0 * prec * rec / (prec + rec);

return prec, rec, f1


 in train
  pre_test, rec_test, f1_test = compute_f1(predLabels, correctLabels, self.idx2Label)
  File "...", line 10, in compute_f1
label_pred.append([idx2Label[element] for element in sentence])
   File "...", line 10, in <listcomp>
label_pred.append([idx2Label[element] for element in sentence])
  TypeError: unhashable type: 'numpy.ndarray'

1 голос
/ 29 апреля 2020

ПРИМЕЧАНИЕ: ответ основан на комментариях к исходному сообщению

Поскольку вы рассчитываете f1 для тех же значений, но на основе двух прогнозов, вы можете рассчитать его для обоих прогнозов и взять средний. без проверки правильности, я пишу код ниже:

def compute_f1(predictions, correct, idx2Label):
    prediction_1 = predictions[0]
    correct_1 = correct[0]
    label_pred = []
    for sentence in prediction_1:
        label_pred.append([idx2Label[element] for element in sentence])

    label_correct = []
    for sentence in correct_1:
        label_correct.append([idx2Label[element] for element in sentence])

    # print("predictions ", len(label_pred))
    # print("correct labels ", len(label_correct))

    prec_1 = compute_precision(label_pred, label_correct)
    rec_1 = compute_precision(label_correct, label_pred)

    f1_1 = 0
    if (rec_1 + prec_1) > 0:
        f1_1 = 2.0 * prec_1 * rec_1 / (prec_1 + rec_1);

    prediction_2 = predictions[1]
    correct_2 = correct[1]
    label_pred = []
    for sentence in prediction_2:
        label_pred.append([idx2Label[element] for element in sentence])

    label_correct = []
    for sentence in correct_2:
        label_correct.append([idx2Label[element] for element in sentence])

    # print("predictions ", len(label_pred))
    # print("correct labels ", len(label_correct))

    prec_2 = compute_precision(label_pred, label_correct)
    rec_2 = compute_precision(label_correct, label_pred)

    f1_2 = 0
    if (rec_2 + prec_2) > 0:
        f1_2 = 2.0 * prec_2 * rec_2 / (prec_2 + rec_2);

    # taking average
    prec = (prec_1 + prec_2)/2.
    rec = (rec_1 + rec_2)/2.
    f1 = (f1_1 + f1_2)/2.

    return prec, rec, f1