1// TestSpellCheck: word-level test of the noisy-channel ranker (CompactSpell.Lookup). 2// Measures the ranker ALONE (no forcedFix, no CharNN/CharEmbedNet) so improvements are visible. 3using System; 4using System.IO; 5using System.Collections.Generic; 6using System.Text; 7using WindowCapture.Helpers; 8 9class TestSpellCheck 10{ 11 static void Main() 12 { 13 string dataDir = Path.Combine(Path.GetDirectoryName( 14 System.Reflection.Assembly.GetExecutingAssembly().Location), "..", "Data"); 15 if (!Directory.Exists(dataDir)) 16 dataDir = Path.Combine(Environment.CurrentDirectory, "Data"); 17 18 Console.OutputEncoding = Encoding.UTF8; 19 Console.WriteLine("=== SpellCheck Test (noisy-channel ranker) ===\n"); 20 21 Console.Write("Loading dictionary... "); 22 string dictPath = Path.Combine(dataDir, "dict_ru.txt"); 23 var words = new List<string>(); 24 foreach (var l in File.ReadAllLines(dictPath)) 25 { 26 string w = l.Trim().ToLower(); 27 if (w.Length >= 1 && w.Length <= 25) words.Add(w); 28 } 29 Console.WriteLine(words.Count + " words"); 30 31 var compact = new CompactSpell(); 32 compact.Build(words.ToArray()); 33 string bloomPath = Path.Combine(dataDir, "dict_ru.bloom"); 34 if (File.Exists(bloomPath)) compact.Bloom = new BloomFilter(bloomPath); 35 Console.WriteLine("CompactSpell ready: " + compact.IsReady + "\n"); 36 37 // input → expected. Pure ranker (no hardcoded forcedFix). 38 var tests = new string[,] { 39 {"привет","привет"},{"прввет","привет"},{"привт","привет"},{"приввет","привет"},{"превет","привет"}, 40 {"прведт","привет"},{"компуктер","компьютер"},{"тихналогия","технология"}, 41 {"пошол","пошёл"},{"пшоел","пошёл"}, 42 {"сиводня","сегодня"},{"кароче","короче"},{"канешно","конечно"}, 43 {"здраствуте","здравствуйте"},{"расчитывать","рассчитывать"}, 44 // generalization — common phonetic/keyboard typos NOT special-cased anywhere: 45 {"програма","программа"},{"граматный","грамотный"}, 46 {"сабака","собака"},{"карова","корова"},{"малако","молоко"}, 47 {"вада","вода"},{"харашо","хорошо"},{"тилефон","телефон"}, 48 {"дамой","домой"},{"севодня","сегодня"},{"агурец","огурец"}, 49 {"привед","привет"},{"спосибо","спасибо"},{"пожалуста","пожалуйста"}, 50 }; 51 52 int pass = 0, fail = 0; 53 var fails = new List<string>(); 54 for (int i = 0; i < tests.GetLength(0); i++) 55 { 56 string input = tests[i, 0]; 57 string expected = tests[i, 1]; 58 59 string result = input; 60 if (!compact.ContainsTrusted(input)) 61 { 62 var cands = compact.Lookup(input, 5); 63 if (cands.Count == 0 && compact.ContainsExact(input)) 64 { 65 // frequency-override (mirrors ProcessWord): typo is itself in the dirty dict — 66 // force candidate generation and accept a trusted close neighbour. 67 var forced = compact.Lookup(input, 5, true); 68 foreach (var c in forced) 69 { 70 if (!compact.ContainsTrusted(c.Word)) continue; 71 if (c.Distance == 1 || 72 (c.Distance == 2 && c.Word[0] == input[0] && SpellScore.EditPlausibility(input, c.Word) >= 2000)) 73 { result = c.Word; break; } 74 } 75 } 76 else if (cands.Count > 0) result = cands[0].Word; 77 } 78 79 bool ok = result == expected; 80 if (ok) pass++; else { fail++; fails.Add("\"" + input + "\" → \"" + result + "\" (want \"" + expected + "\")"); } 81 Console.WriteLine((ok ? " PASS" : " FAIL") + ": \"" + input + "\" → \"" + result + "\"" + 82 (ok ? "" : " (expected: \"" + expected + "\")")); 83 } 84 85 Console.WriteLine("\n=== Results: " + pass + " passed, " + fail + " failed (" + 86 (100 * pass / (pass + fail)) + "%) ==="); 87 } 88}