Я пытался реализовать word2ve c с помощью ConvNetSharp. Мне удалось создать базовую c версию алгоритма с использованием Relu на скрытом слое, но он не очень точный, и я хотел бы реализовать более точную (и точную) версию алгоритма с использованием API потока ConvNetSharp. Любая помощь очень ценится!
var corpus = new List<string> {
"king is a strong man",
"queen is a wise woman",
"boy is a young man",
"girl is a young woman",
"prince is a young king",
"princess is a young queen",
"man is strong",
"woman is pretty",
"prince is a boy will be king",
"princess is a girl will be queen" };
corpus = RemoveStopWords(corpus);
//build the list of words
var words = corpus.SelectMany(s => s.Split(' ')).Distinct().ToList();
//build training data based on the words around the focus word
var WINDOW_SIZE = 2;
var data = new List<(string, string)>();
foreach (var sentence in corpus)
{
var split = sentence.Split();
var count = 0;
foreach (var word in split)
{
var start = count - WINDOW_SIZE;
var end = count + WINDOW_SIZE + 1;
if (start < 0)
{
start = 0;
}
if (end > split.Length)
{
end = split.Length;
}
for (var i = start; i < end; i++)
{
var neighbor = split[i];
if (neighbor != word)
{
data.Add((word, neighbor));
}
}
count++;
}
}
var inputs = new List<double[]>();
var labels = new List<double[]>();
foreach (var item in data)
{
inputs.Add(OneHotEncoding(item.Item1, words));
labels.Add(OneHotEncoding(item.Item2, words));
}
//setup the net
var DIMENSIONS = 4;
var net = new ConvNetSharp.Core.Net<double>();
var width = words.Count;
var batchSize = inputs.Count;
net.AddLayer(new InputLayer(width, 1, 1));
net.AddLayer(new FullyConnLayer(DIMENSIONS));
net.AddLayer(new ReluLayer());
net.AddLayer(new FullyConnLayer(width));
net.AddLayer(new SoftmaxLayer(width));
//train
var trainer = new AdamTrainer<double>(net) { LearningRate = 0.01, BatchSize = batchSize };
var netx = BuilderInstance.Volume.SameAs(new Shape(width, 1, 1, batchSize));
var hotLabels = BuilderInstance.Volume.SameAs(new Shape(1, 1, width, batchSize));
for (var i = 0; i < batchSize; i++)
{
var label = labels[i];
for (var w = 0; w < width; w++)
{
var input = inputs[i][w];
var output = label[w];
netx.Set(w, 0, 0, i, input);
hotLabels.Set(0, 0, w, i, output);
}
}
for (var i = 0; i < 500; i++)
{
trainer.Train(netx, hotLabels);
Console.WriteLine($"loss {trainer.Loss}");
}
//extract 'embedings' from hidden layer into a results dictionary
var hiddenLayer = net.Layers[1];
var vecs = hiddenLayer.GetParametersAndGradients();
var v = vecs[0].Volume.Storage;
var results = new Dictionary<string, List<double>>();
foreach (var word in words)
{
var index = words.IndexOf(word);
var d = new List<double>();
for (int i = 0; i < DIMENSIONS; i++)
{
var item = v.Get(0, 0, index, i);
d.Add(item);
}
results.Add(word, d);
}
//try to use the embedings to perform operations in word space
Console.WriteLine("\r\n(king - man) + woman");
PrintNearest(Add(Subtract(results["king"], results["man"]), results["woman"]), results);
Console.WriteLine("\r\nqueen + young");
PrintNearest(Add(results["queen"], results["young"]), results);
Console.WriteLine("\r\nman + young");
PrintNearest(Add(results["man"], results["young"]), results);
private string[] NearestList(List<double> find, Dictionary<string, List<double>> results)
{
return results.OrderBy(r => Distance(find, r.Value)).Select(r => r.Key).ToArray();
}
public static double Distance(List<double> a, List<double> b)
{
var dot = 0.0d;
var mag1 = 0.0d;
var mag2 = 0.0d;
for (var n = 0; n < a.Count; n++)
{
dot += a[n] * b[n];
mag1 += Math.Pow(a[n], 2);
mag2 += Math.Pow(b[n], 2);
}
return dot / (Math.Sqrt(mag1) * Math.Sqrt(mag2)) *-1;
}
private double[] OneHotEncoding(string word, List<string> words)
{
var result = new double[words.Count];
result[words.IndexOf(word)] = 1;
return result;
}
private List<string> RemoveStopWords(List<string> corpus)
{
var stop_words = new List<string> { "is", "a", "will", "be" };
var results = new List<string>();
foreach (var text in corpus)
{
var newText = string.Join(" ", text.Split(' ').Where(w => !stop_words.Contains(w)));
results.Add(string.Join(" ", newText));
}
return results;
}
private void PrintNearest(List<double> find, Dictionary<string, List<double>> results)
{
foreach (var r in NearestList(find, results))
{
Console.WriteLine($"ans = {r}");
}
}
private List<double> Add(List<double> a, List<double> b)
{
return a.Zip(b, (i, j) => i + j).ToList();
}
private List<double> Subtract(List<double> a, List<double> b)
{
return a.Zip(b, (i, j) => i - j).ToList();
}
Я попытался реализовать поток, но я изо всех сил стараюсь выровнять размеры формы:
var net = new ConvNetSharp<double>();
var x = net.PlaceHolder("x");
var y = net.PlaceHolder("y");
// hidden layer: which represents word vector eventually
var w1 = net.Variable(BuilderInstance<double>.Volume.Random(new Shape(ONE_HOT_DIM, EMBEDING_DIM)), "w1", true);
var b1 = net.Variable(BuilderInstance<double>.Volume.Random(new Shape(1)), "b1", true);
var hidden_layer = net.MatMult(x, w1) + b1;
// output layer
var w2 = net.Variable(BuilderInstance<double>.Volume.Random(new Shape(EMBEDING_DIM, ONE_HOT_DIM)), "w2", true);
var b2 = net.Variable(BuilderInstance<double>.Volume.Random(new Shape(1)), "b2", true);
var prediction = net.Softmax(net.Reshape(net.MatMult(hidden_layer, w2) + b2, new Shape(ONE_HOT_DIM)));
// loss function: cross entropy
var loss = net.CrossEntropyLoss(prediction, y);
// training operation
var batchSize = inputs.Count;
var optimizer = new AdamOptimizer<double>(net, 0.01f, 0.9f, 0.999f, 1e-08f);
using (var session = new Session<double>())
{
session.Differentiate(loss);
double currentCost = 0;
do
{
var xx = BuilderInstance<double>.Volume.SameAs(new Shape(ONE_HOT_DIM));
var yy = BuilderInstance<double>.Volume.SameAs(new Shape(ONE_HOT_DIM));
for (var i = 0; i < batchSize; i++)
{
var input = inputs[i];
var label = labels[i];
for (var w = 0; w < ONE_HOT_DIM; w++)
{
xx.Set(w, 0, 0, 0, input[w]);
yy.Set(w, 0, 0, 0, label[w]);
}
var dico = new Dictionary<string, Volume<double>> { { "x", xx }, { "y", yy } };
var result = session.Run(loss, dico);
currentCost = Math.Abs(result.ToArray().Sum());
Console.WriteLine($"cost: {currentCost}");
session.Run(optimizer, dico);
}
} while (currentCost > 1e-5);
}
var v = hidden_layer.Result;