Разрешение анафоры в Стэнфордском НЛП с использованием python не работает - PullRequest
0 голосов
/ 16 января 2020
from pycorenlp import StanfordCoreNLP

nlp = StanfordCoreNLP('http://localhost:9000')


def resolve(corenlp_output):
    """ Transfer the word form of the antecedent to its associated pronominal anaphor(s) """
    for coref in corenlp_output['corefs']:
        mentions = corenlp_output['corefs'][coref]
        antecedent = mentions[0]  # the antecedent is the first mention in the coreference chain
        for j in range(1, len(mentions)):
            mention = mentions[j]
            if mention['type'] == 'PRONOMINAL':
                # get the attributes of the target mention in the corresponding sentence
                target_sentence = mention['sentNum']
                target_token = mention['startIndex'] - 1
                # transfer the antecedent's word form to the appropriate token in the sentence
                corenlp_output['sentences'][target_sentence - 1]['tokens'][target_token]['word'] = antecedent['text']


def print_resolved(corenlp_output):
    """ Print the "resolved" output """
    possessives = ['hers', 'his', 'their', 'theirs']
    for sentence in corenlp_output['sentences']:
        for token in sentence['tokens']:
            output_word = token['word']
            # check lemmas as well as tags for possessive pronouns in case of tagging errors
            if token['lemma'] in possessives or token['pos'] == 'PRP$':
                output_word += "'s"  # add the possessive morpheme
            output_word += token['after']
            print(output_word, end='')


text = "Tom and Jane are good friends. They are cool. He knows a lot of things and so does she. His car is red, but " \
       "hers is blue. It is older than hers. The big cat ate its dinner."

output = nlp.annotate(text, properties= {'annotators':'dcoref','outputFormat':'json','ner.useSUTime':'false'})

resolve(output)

print('Original:', text)
print('Resolved: ', end='')
print_resolved(output)

Когда я запускаю приведенный выше код, я получаю следующую ошибку:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-41-d6987b9a74e8> in <module>
----> 1 resolve(output)

<ipython-input-27-5b71f752d4d3> in resolve(corenlp_output)
      1 def resolve(corenlp_output):
      2     """ Transfer the word form of the antecedent to its associated pronominal anaphor(s) """
----> 3     for coref in corenlp_output['corefs']:
      4         mentions = corenlp_output['corefs'][coref]
      5         antecedent = mentions[0]  # the antecedent is the first mention in the coreference chain

TypeError: string indices must be integers

1 Ответ

0 голосов
/ 18 января 2020

Использование Neural Coref . Это намного, намного легче начать работать и, возможно, более точно.

...