Я пытаюсь обучить кастомным отношениям в Stanford CoreNLP, используя модель места рождения .
Я просмотрел эту документацию , в которой подробно описан процесс создания файла свойств (аналогично roth.properties) следующим образом:
#Below are some basic options. See edu.stanford.nlp.ie.machinereading.MachineReadingProperties class for more options.
# Pipeline options
annotators = pos, lemma, parse
parse.maxlen = 100
# MachineReading properties. You need one class to read the dataset into correct format. See edu.stanford.nlp.ie.machinereading.domains.ace.AceReader for another example.
datasetReaderClass = edu.stanford.nlp.ie.machinereading.domains.roth.RothCONLL04Reader
#Data directory for training. The datasetReaderClass reads data from this path and makes corresponding sentences and annotations.
trainPath = "D:\\stanford-corenlp-full-2017-06-09\\birthplace.corp"
#Whether to crossValidate, that is evaluate, or just train.
crossValidate = false
kfold = 10
#Change this to true if you want to use CoreNLP pipeline generated NER tags. The default model generated with the relation extractor release uses the CoreNLP pipeline provided tags (option set to true).
trainUsePipelineNER=false
# where to save training sentences. uses the file if it exists, otherwise creates it.
serializedTrainingSentencesPath = "D:\\stanford-corenlp-full-2017-06-09\\rel\\sentences.ser"
serializedEntityExtractorPath = "D:\\stanford-corenlp-full-2017-06-09\\rel\\entity_model.ser"
# where to store the output of the extractor (sentence objects with relations generated by the model). This is what you will use as the model when using 'relation' annotator in the CoreNLP pipeline.
serializedRelationExtractorPath = "D:\\stanford-corenlp-full-2017-06-09\\rel\\roth_relation_model_pipeline.ser"
# uncomment to load a serialized model instead of retraining
# loadModel = true
#relationResultsPrinters = edu.stanford.nlp.ie.machinereading.RelationExtractorResultsPrinter,edu.stanford.nlp.ie.machinereading.domains.roth.RothResultsByRelation. For printing output of the model.
relationResultsPrinters = edu.stanford.nlp.ie.machinereading.RelationExtractorResultsPrinter
#In this domain, this is trivial since all the entities are given (or set using CoreNLP NER tagger).
entityClassifier = edu.stanford.nlp.ie.machinereading.domains.roth.RothEntityExtractor
extractRelations = true
extractEvents = false
#We are setting the entities beforehand so the model does not learn how to extract entities etc.
extractEntities = false
#Opposite of crossValidate.
trainOnly=true
# The set chosen by feature selection using RothCONLL04:
relationFeatures = arg_words,arg_type,dependency_path_lowlevel,dependency_path_words,surface_path_POS,entities_between_args,full_tree_path
# The above features plus the features used in Bjorne BioNLP09:
# relationFeatures = arg_words,arg_type,dependency_path_lowlevel,dependency_path_words,surface_path_POS,entities_between_args,full_tree_path,dependency_path_POS_unigrams,dependency_path_word_n_grams,dependency_path_POS_n_grams,dependency_path_edge_lowlevel_n_grams,dependency_path_edge-node-edge-grams_lowlevel,dependency_path_node-edge-node-grams_lowlevel,dependency_path_directed_bigrams,dependency_path_edge_unigrams,same_head,entity_counts
Я выполняю эту команду в своем каталоге D:\stanford-corenlp-full-2017-06-09
:
D:\stanford-corenlp-full-2017-06-09\stanford-corenlp-3.8.0\edu\stanford\nlp>java -cp classpath edu.stanford.nlp.ie.machinereading.MachineReading --arguments roth.properties
и я получаю эту ошибку
Error: Could not find or load main class edu.stanford.nlp.ie.machinereading.MachineReading
Caused by: java.lang.ClassNotFoundException: edu.stanford.nlp.ie.machinereading.MachineReading
Также я попытался программно обучить модели пользовательских отношений с помощью приведенного ниже кода C #:
using java.util;
using System.Collections.Generic;
namespace StanfordRelationDemo
{
class Program
{
static void Main(string[] args)
{
string jarRoot = @"D:\Stanford English Model\stanford-english-corenlp-2018-10-05-models\";
string modelsDirectory = jarRoot + @"edu\stanford\nlp\models";
string sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
//+ modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
+ modelsDirectory + @"\sutime\english.sutime.txt";
Properties props = new Properties();
props.setProperty("annotators", "pos, lemma, parse");
props.setProperty("parse.maxlen", "100");
props.setProperty("datasetReaderClass", "edu.stanford.nlp.ie.machinereading.domains.roth.RothCONLL04Reader");
props.setProperty("trainPath", "D://Stanford English Model//stanford-english-corenlp-2018-10-05-models//edu//stanford//nlp//models//birthplace.corp");
props.setProperty("crossValidate", "false");
props.setProperty("kfold", "10");
props.setProperty("trainOnly", "true");
props.setProperty("trainUsePipelineNER", "true");
props.setProperty("serializedTrainingSentencesPath", "D://Stanford English Model//stanford-english-corenlp-2018-10-05-models//edu//stanford//nlp//models//rel//sentences.ser");
props.setProperty("serializedEntityExtractorPath", "D://Stanford English Model//stanford-english-corenlp-2018-10-05-models//edu//stanford//nlp//models//rel//entity_model.ser");
props.setProperty("serializedRelationExtractorPath", "D://Stanford English Model//stanford-english-corenlp-2018-10-05-models//edu//stanford//nlp//models//rel//roth_relation_model_pipeline.ser");
props.setProperty("relationResultsPrinters", "edu.stanford.nlp.ie.machinereading.RelationExtractorResultsPrinter");
props.setProperty("entityClassifier", "edu.stanford.nlp.ie.machinereading.domains.roth.RothEntityExtractor");
props.setProperty("extractRelations", "true");
props.setProperty("extractEvents", "false");
props.setProperty("extractEntities", "false");
props.setProperty("trainOnly", "true");
props.setProperty("relationFeatures", "arg_words,arg_type,dependency_path_lowlevel,dependency_path_words,surface_path_POS,entities_between_args,full_tree_path");
var propertyKeys = props.keys();
var propertyStringArray = new List<string>();
while (propertyKeys.hasMoreElements())
{
var key = propertyKeys.nextElement();
propertyStringArray.Add($"-{key}");
propertyStringArray.Add(props.getProperty(key.ToString(), string.Empty));
}
var machineReader = edu.stanford.nlp.ie.machinereading.MachineReading.makeMachineReading(propertyStringArray.ToArray());
var utestResultList = machineReader.run();
}
}
}
Я получаю это исключение:
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
Unhandled Exception: edu.stanford.nlp.io.RuntimeIOException: Error while loading a tagger model (probably missing model file) --->
java.io.IOException: невозможно открыть
"Edu / Стэнфорд / NLP / модель / позы-Таггер / английский-left3words / английский-left3words-distsim.tagger"
как путь к классу, имя файла или URL
в edu.stanford.nlp.io.IOUtils.getInputStreamFromURLOrClasspathOrFileSystem (String
textFileOrUrl)
в edu.stanford.nlp.tagger.maxent.MaxentTagger.readModelAndInit (Свойства
config, String modelFileOrUrl, Boolean printLoading)
--- Конец внутренней трассировки стека исключений ---
в edu.stanford.nlp.tagger.maxent.MaxentTagger.readModelAndInit (Свойства
config, String modelFileOrUrl, Boolean printLoading)
в edu.stanford.nlp.tagger.maxent.MaxentTagger..ctor (String modelFile, Конфигурация свойств, Булева печать printLoading)
at edu.stanford.nlp.tagger.maxent.MaxentTagger..ctor (String modelFile)
в edu.stanford.nlp.pipeline.POSTaggerAnnotator.loadModel (String,
Логическое)
в edu.stanford.nlp.pipeline.POSTaggerAnnotator..ctor (String annotatorName, Свойства реквизита)
в edu.stanford.nlp.pipeline.AnnotatorImplementations.posTagger (Свойства
свойства)
в edu.stanford.nlp.pipeline.StanfordCoreNLP.lambda $ getNamedAnnotators $ 42 (Свойства
, AnnotatorImplementations)
at edu.stanford.nlp.pipeline.StanfordCoreNLP. <> Anon4.apply (Object,
Объект)
на edu.stanford.nlp.pipeline.StanfordCoreNLP.lambda $ getDefaultAnnotatorPool $ 65 (вход
, Свойства, AnnotatorImplementations)
на edu.stanford.nlp.pipeline.StanfordCoreNLP. <> Anon27.get ()
на edu.stanford.nlp.util.Lazy.3.compute ()
на edu.stanford.nlp.util.Lazy.get ()
на edu.stanford.nlp.pipeline.AnnotatorPool.get (имя строки)
в edu.stanford.nlp.pipeline.StanfordCoreNLP.construct (Свойства,
Boolean, AnnotatorImplementations, AnnotatorPool)
в edu.stanford.nlp.pipeline.StanfordCoreNLP..ctor (свойства реквизиты, логические принудительные требования, AnnotatorPool annotatorPool)
в edu.stanford.nlp.pipeline.StanfordCoreNLP..ctor (свойства реквизиты, логические принудительные требования)
на edu.stanford.nlp.ie.machinereading.MachineReading.makeMachineReading (String []
арг)
в StanfordRelationDemo.Program.Main (String [] args) в C: \ Users \ m1039332 \ Documents \ Visual Studio
2017 \ Projects \ StanfordRelationDemo \ StanfordRelationDemo \ Program.cs: линии
46
Я просто не могу тренировать пользовательские отношения, используя CoreNLP, какие-либо очевидные ошибки, которые я делаю, я был бы признателен, если бы кто-нибудь указал на это.