windowcapture
исходный код / Tools/TrainCharEmbed.cs

TrainCharEmbed.cs

49 строк · 1,949 байт · модуль Tools
 1using System;
 2using System.IO;
 3using System.Collections.Generic;
 4using WindowCapture.Helpers;
 5
 6class TrainCharEmbed
 7{
 8    static void Main()
 9    {
10        string dataDir = Path.Combine(Path.GetDirectoryName(
11            System.Reflection.Assembly.GetExecutingAssembly().Location), "..", "Data");
12        if (!Directory.Exists(dataDir))
13            dataDir = Path.Combine(Environment.CurrentDirectory, "Data");
14
15        string dictPath = Path.Combine(dataDir, "dict_ru.txt");
16        Console.WriteLine("Loading dictionary...");
17        var lines = File.ReadAllLines(dictPath);
18        var words = new List<string>();
19        foreach (var l in lines)
20        {
21            string w = l.Trim().ToLower();
22            if (w.Length >= 2 && w.Length <= 20) words.Add(w);
23        }
24        Console.WriteLine("Loaded " + words.Count + " words");
25
26        string savePath = Path.Combine(dataDir, "charembed.bin");
27        Console.WriteLine("Training CharEmbedNet (30 epochs)...");
28        CharEmbedNet.Train(words.ToArray(), savePath, 30);
29
30        // Verify
31        var net = new CharEmbedNet();
32        net.Load(savePath);
33        string[] testWords = { "привет", "прввет", "здравствуйте", "здраствуте", "компьютер", "компуктер" };
34        Console.WriteLine("\nSimilarity test:");
35        for (int i = 0; i < testWords.Length; i += 2)
36        {
37            float sim = net.Similarity(testWords[i], testWords[i + 1]);
38            Console.WriteLine("  " + testWords[i] + " ~ " + testWords[i + 1] + " = " + sim.ToString("F3"));
39        }
40
41        var nearest = net.FindNearest("прввет", 5);
42        Console.WriteLine("\nNearest to 'прввет': " + string.Join(", ", nearest.ToArray()));
43
44        nearest = net.FindNearest("компуктер", 5);
45        Console.WriteLine("Nearest to 'компуктер': " + string.Join(", ", nearest.ToArray()));
46
47        Console.WriteLine("\nDone!");
48    }
49}