windowcapture
исходный код / Tools/TestSpellCheck.cs

TestSpellCheck.cs

88 строк · 4,510 байт · модуль Tools
 1// TestSpellCheck: word-level test of the noisy-channel ranker (CompactSpell.Lookup).
 2// Measures the ranker ALONE (no forcedFix, no CharNN/CharEmbedNet) so improvements are visible.
 3using System;
 4using System.IO;
 5using System.Collections.Generic;
 6using System.Text;
 7using WindowCapture.Helpers;
 8
 9class TestSpellCheck
10{
11    static void Main()
12    {
13        string dataDir = Path.Combine(Path.GetDirectoryName(
14            System.Reflection.Assembly.GetExecutingAssembly().Location), "..", "Data");
15        if (!Directory.Exists(dataDir))
16            dataDir = Path.Combine(Environment.CurrentDirectory, "Data");
17
18        Console.OutputEncoding = Encoding.UTF8;
19        Console.WriteLine("=== SpellCheck Test (noisy-channel ranker) ===\n");
20
21        Console.Write("Loading dictionary... ");
22        string dictPath = Path.Combine(dataDir, "dict_ru.txt");
23        var words = new List<string>();
24        foreach (var l in File.ReadAllLines(dictPath))
25        {
26            string w = l.Trim().ToLower();
27            if (w.Length >= 1 && w.Length <= 25) words.Add(w);
28        }
29        Console.WriteLine(words.Count + " words");
30
31        var compact = new CompactSpell();
32        compact.Build(words.ToArray());
33        string bloomPath = Path.Combine(dataDir, "dict_ru.bloom");
34        if (File.Exists(bloomPath)) compact.Bloom = new BloomFilter(bloomPath);
35        Console.WriteLine("CompactSpell ready: " + compact.IsReady + "\n");
36
37        // input → expected. Pure ranker (no hardcoded forcedFix).
38        var tests = new string[,] {
39            {"привет","привет"},{"прввет","привет"},{"привт","привет"},{"приввет","привет"},{"превет","привет"},
40            {"прведт","привет"},{"компуктер","компьютер"},{"тихналогия","технология"},
41            {"пошол","пошёл"},{"пшоел","пошёл"},
42            {"сиводня","сегодня"},{"кароче","короче"},{"канешно","конечно"},
43            {"здраствуте","здравствуйте"},{"расчитывать","рассчитывать"},
44            // generalization — common phonetic/keyboard typos NOT special-cased anywhere:
45            {"програма","программа"},{"граматный","грамотный"},
46            {"сабака","собака"},{"карова","корова"},{"малако","молоко"},
47            {"вада","вода"},{"харашо","хорошо"},{"тилефон","телефон"},
48            {"дамой","домой"},{"севодня","сегодня"},{"агурец","огурец"},
49            {"привед","привет"},{"спосибо","спасибо"},{"пожалуста","пожалуйста"},
50        };
51
52        int pass = 0, fail = 0;
53        var fails = new List<string>();
54        for (int i = 0; i < tests.GetLength(0); i++)
55        {
56            string input = tests[i, 0];
57            string expected = tests[i, 1];
58
59            string result = input;
60            if (!compact.ContainsTrusted(input))
61            {
62                var cands = compact.Lookup(input, 5);
63                if (cands.Count == 0 && compact.ContainsExact(input))
64                {
65                    // frequency-override (mirrors ProcessWord): typo is itself in the dirty dict —
66                    // force candidate generation and accept a trusted close neighbour.
67                    var forced = compact.Lookup(input, 5, true);
68                    foreach (var c in forced)
69                    {
70                        if (!compact.ContainsTrusted(c.Word)) continue;
71                        if (c.Distance == 1 ||
72                            (c.Distance == 2 && c.Word[0] == input[0] && SpellScore.EditPlausibility(input, c.Word) >= 2000))
73                        { result = c.Word; break; }
74                    }
75                }
76                else if (cands.Count > 0) result = cands[0].Word;
77            }
78
79            bool ok = result == expected;
80            if (ok) pass++; else { fail++; fails.Add("\"" + input + "\" → \"" + result + "\" (want \"" + expected + "\")"); }
81            Console.WriteLine((ok ? "  PASS" : "  FAIL") + ": \"" + input + "\" → \"" + result + "\"" +
82                (ok ? "" : " (expected: \"" + expected + "\")"));
83        }
84
85        Console.WriteLine("\n=== Results: " + pass + " passed, " + fail + " failed (" +
86            (100 * pass / (pass + fail)) + "%) ===");
87    }
88}