1using System; 2using System.IO; 3using System.Collections.Generic; 4using WindowCapture.Helpers; 5 6class TrainCharEmbed 7{ 8 static void Main() 9 { 10 string dataDir = Path.Combine(Path.GetDirectoryName( 11 System.Reflection.Assembly.GetExecutingAssembly().Location), "..", "Data"); 12 if (!Directory.Exists(dataDir)) 13 dataDir = Path.Combine(Environment.CurrentDirectory, "Data"); 14 15 string dictPath = Path.Combine(dataDir, "dict_ru.txt"); 16 Console.WriteLine("Loading dictionary..."); 17 var lines = File.ReadAllLines(dictPath); 18 var words = new List<string>(); 19 foreach (var l in lines) 20 { 21 string w = l.Trim().ToLower(); 22 if (w.Length >= 2 && w.Length <= 20) words.Add(w); 23 } 24 Console.WriteLine("Loaded " + words.Count + " words"); 25 26 string savePath = Path.Combine(dataDir, "charembed.bin"); 27 Console.WriteLine("Training CharEmbedNet (30 epochs)..."); 28 CharEmbedNet.Train(words.ToArray(), savePath, 30); 29 30 // Verify 31 var net = new CharEmbedNet(); 32 net.Load(savePath); 33 string[] testWords = { "привет", "прввет", "здравствуйте", "здраствуте", "компьютер", "компуктер" }; 34 Console.WriteLine("\nSimilarity test:"); 35 for (int i = 0; i < testWords.Length; i += 2) 36 { 37 float sim = net.Similarity(testWords[i], testWords[i + 1]); 38 Console.WriteLine(" " + testWords[i] + " ~ " + testWords[i + 1] + " = " + sim.ToString("F3")); 39 } 40 41 var nearest = net.FindNearest("прввет", 5); 42 Console.WriteLine("\nNearest to 'прввет': " + string.Join(", ", nearest.ToArray())); 43 44 nearest = net.FindNearest("компуктер", 5); 45 Console.WriteLine("Nearest to 'компуктер': " + string.Join(", ", nearest.ToArray())); 46 47 Console.WriteLine("\nDone!"); 48 } 49}