Я тестировал самый простой пример для изучения анализа настроений с использованием пользовательского IEnumerable dataview вместо традиционной загрузки данных из текстовых файлов.Я создал список TestData и TrainingData с некоторыми примерами обзора, которые легко изучить, следуя примеру, доступному на github и документации.Но чего-то не хватает, и созданная мною модель работает неправильно ... просто дает неправильный результат как положительный для всего.
Main
private static string ModelPath = @"C:\ML\SentimentModel.zip";
void Main()
{
var mlContext = new MLContext(seed: 1);
var trainingData = GetTrainingData();
var testData = GetTestData();
BuildTrainEvaluateAndSaveModel(mlContext, trainingData, testData);
TestPrediction(mlContext);
}
Тестирование и обучение
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext, List<SentimentData> trainingData, List<SentimentData> testData)
{
// STEP 1: Common data loading configuration
IDataView trainingDataView = mlContext.Data.ReadFromEnumerable(trainingData);
IDataView testDataView = mlContext.Data.ReadFromEnumerable(trainingData);
// STEP 2: Common data process configuration with pipeline data transformations
var dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: DefaultColumnNames.Features, inputColumnName: nameof(SentimentData.Text));
// STEP 3: Set the training algorithm, then create and config the modelBuilder
var trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumn: DefaultColumnNames.Label, featureColumn: DefaultColumnNames.Features);
var trainingPipeline = dataProcessPipeline.Append(trainer);
// STEP 4: Train the model fitting to the DataSet
Console.WriteLine("=============== Training the model ===============");
ITransformer trainedModel = trainingPipeline.Fit(trainingDataView);
// STEP 6: Save/persist the trained model to a .ZIP file
using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
mlContext.Model.Save(trainedModel, fs);
Console.WriteLine("The model is saved to {0}", ModelPath);
return trainedModel;
}
private void TestPrediction(MLContext mlContext)
{
var testData = GetTestData();
ITransformer trainedModel;
using (var stream = new FileStream(ModelPath, FileMode.Open, FileAccess.Read, FileShare.Read))
{
trainedModel = mlContext.Model.Load(stream);
}
var engine = trainedModel.CreatePredictionEngine<SentimentData, SentimentPrediction>(mlContext);
foreach(var test in testData)
{
var result = engine.Predict(test);
Console.WriteLine($"Prediction : {(Convert.ToBoolean(result.Prediction) ? "Negative" : "Postive")} | Actual: {test.Expected} | Text : {test.Text}");
}
}
Модели и обучение / Тестовые данные
public List<SentimentData> GetTrainingData()
{
return new List<SentimentData>
{
new SentimentData
{
Label = true,
Text = "Good service."
},
new SentimentData
{
Label = true,
Text = "Very good service"
},
new SentimentData
{
Label = true,
Text = "Amazing service"
},
new SentimentData
{
Label = true,
Text = "Great staff, will visit again. thanks for the gift"
},
new SentimentData
{
Label = false,
Text = "Bad staff, bad service. Will never visit this hotel"
},
new SentimentData
{
Label = false,
Text = "The service was very bad"
},
new SentimentData
{
Label = false,
Text = "Hotel location is worst"
}
};
}
public List<SentimentData> GetTestData()
{
return new List<SentimentData>
{
new SentimentData
{
Label = true,
Text = "Worst hotel in New York",
Expected = "Negative"
},
new SentimentData
{
Label = true,
Text = "I ordered pizza and recieved Wine. Bad staff",
Expected = "Negative"
},
new SentimentData
{
Label = true,
Text = "The hotel was so amazing, and they givena bag to me on gift",
Expected = "Positive"
},
new SentimentData
{
Label = true,
Text = "The hotel staff was great, will visit again",
Expected = "Positive"
}
};
}
public class SentimentData
{
public bool Label { get; set; }
public string Text { get; set; }
// Additional property for testing purpose
public string Expected {get; set;}
}
public class SentimentPrediction
{
[ColumnName("PredictedLabel")]
public bool Prediction { get; set; }
public float Probability { get; set; }
public float Score { get; set; }
}