windowcapture
исходный код / Helpers/CharNN.cs

CharNN.cs

392 строк · 15,782 байт · модуль Helpers
  1using System;
  2using System.Collections.Generic;
  3
  4namespace WindowCapture.Helpers
  5{
  6    /// <summary>
  7    /// CharNN: Character-level neural error model for spelling correction.
  8    /// Combines keyboard geometry, language bigram statistics, and error pattern detection
  9    /// to score how likely a given correction is the intended word.
 10    /// Pure C# — no ML libraries needed.
 11    /// </summary>
 12    public static class CharNN
 13    {
 14        // Keyboard coordinates: char → (row, col)
 15        private static Dictionary<char, float[]> keyPosRu;
 16        private static Dictionary<char, float[]> keyPosEn;
 17
 18        // Character bigram log-probabilities (trained from dictionary)
 19        // Index: charIndex(prev) * ALPHA_SIZE + charIndex(next)
 20        private const int ALPHA_SIZE = 34; // а-я + ё
 21        private static float[] bigramLogProb;
 22        private static volatile bool modelReady;
 23
 24        public static bool IsReady { get { return modelReady; } }
 25
 26        // Weights for the neural score components
 27        private const float W_KEYBOARD = 0.30f;
 28        private const float W_BIGRAM = 0.20f;
 29        private const float W_PATTERN = 0.25f;
 30        private const float W_FREQ = 0.15f;
 31        private const float W_CONTEXT = 0.10f;
 32
 33        static CharNN()
 34        {
 35            InitKeyboard();
 36        }
 37
 38        private static void InitKeyboard()
 39        {
 40            // ЙЦУКЕН layout
 41            keyPosRu = new Dictionary<char, float[]>();
 42            string[] ruRows = {
 43                "йцукенгшщзхъ",
 44                "фывапролджэ",
 45                "ячсмитьбю"
 46            };
 47            for (int r = 0; r < ruRows.Length; r++)
 48                for (int c = 0; c < ruRows[r].Length; c++)
 49                    keyPosRu[ruRows[r][c]] = new float[] { r, c + r * 0.25f }; // offset for staggered rows
 50            keyPosRu['ё'] = new float[] { 0, -1f };
 51
 52            // QWERTY layout
 53            keyPosEn = new Dictionary<char, float[]>();
 54            string[] enRows = {
 55                "qwertyuiop",
 56                "asdfghjkl",
 57                "zxcvbnm"
 58            };
 59            for (int r = 0; r < enRows.Length; r++)
 60                for (int c = 0; c < enRows[r].Length; c++)
 61                    keyPosEn[enRows[r][c]] = new float[] { r, c + r * 0.25f };
 62        }
 63
 64        /// <summary>Train bigram model from dictionary words. Call on background thread.</summary>
 65        public static void BuildModel(string[] dictionary)
 66        {
 67            var sw = System.Diagnostics.Stopwatch.StartNew();
 68
 69            // Count bigrams
 70            long[] counts = new long[ALPHA_SIZE * ALPHA_SIZE];
 71            long[] uniCounts = new long[ALPHA_SIZE];
 72
 73            int maxWords = Math.Min(dictionary.Length, 200000); // top 200k for speed
 74            for (int w = 0; w < maxWords; w++)
 75            {
 76                string word = dictionary[w];
 77                for (int i = 0; i < word.Length; i++)
 78                {
 79                    int ci = CharIdx(word[i]);
 80                    if (ci < 0) continue;
 81                    uniCounts[ci]++;
 82                    if (i > 0)
 83                    {
 84                        int pi = CharIdx(word[i - 1]);
 85                        if (pi >= 0) counts[pi * ALPHA_SIZE + ci]++;
 86                    }
 87                }
 88            }
 89
 90            // Convert to log-probabilities with Laplace smoothing
 91            bigramLogProb = new float[ALPHA_SIZE * ALPHA_SIZE];
 92            for (int i = 0; i < ALPHA_SIZE; i++)
 93            {
 94                long total = uniCounts[i] + ALPHA_SIZE; // Laplace smoothing
 95                for (int j = 0; j < ALPHA_SIZE; j++)
 96                {
 97                    long count = counts[i * ALPHA_SIZE + j] + 1; // +1 smoothing
 98                    bigramLogProb[i * ALPHA_SIZE + j] = (float)Math.Log((double)count / total);
 99                }
100            }
101
102            sw.Stop();
103            Logger.Log("textproc", "CharNN built: bigram model from " + maxWords + " words in " + sw.ElapsedMilliseconds + "ms");
104            modelReady = true;
105        }
106
107        /// <summary>Load pre-trained weights from binary file (instant, ~5KB).</summary>
108        public static void LoadWeights(string filePath)
109        {
110            try
111            {
112                byte[] raw = System.IO.File.ReadAllBytes(filePath);
113                bigramLogProb = new float[ALPHA_SIZE * ALPHA_SIZE];
114                int expected = ALPHA_SIZE * ALPHA_SIZE * 4; // 34*34*4 = 4624 bytes
115                if (raw.Length >= expected)
116                {
117                    Buffer.BlockCopy(raw, 0, bigramLogProb, 0, expected);
118                    modelReady = true;
119                    Logger.Log("textproc", "CharNN loaded weights: " + raw.Length + " bytes");
120                }
121            }
122            catch (Exception ex) { Logger.Log("textproc", "CharNN load err: " + ex.Message); }
123        }
124
125        /// <summary>
126        /// Neural score: how likely is it that 'input' was meant to be 'candidate'?
127        /// Higher = more likely. Range roughly 0-1.
128        /// </summary>
129        public static float NeuralScore(string input, string candidate, int freqIdx, int freqCutoff, string prevWord)
130        {
131            if (!modelReady) return 0f;
132
133            float kbScore = KeyboardScore(input, candidate);
134            float bgScore = BigramScore(candidate);
135            float ptScore = ErrorPatternScore(input, candidate);
136            float frScore = FreqScore(freqIdx, freqCutoff);
137            float ctScore = ContextScore(candidate, prevWord);
138
139            return W_KEYBOARD * kbScore + W_BIGRAM * bgScore + W_PATTERN * ptScore + W_FREQ * frScore + W_CONTEXT * ctScore;
140        }
141
142        // ===== Component 1: Keyboard proximity for substitution errors =====
143        private static float KeyboardScore(string input, string candidate)
144        {
145            // Align the two strings and check substituted characters
146            // Use simple positional alignment (not full DP for speed)
147            int minLen = Math.Min(input.Length, candidate.Length);
148            float totalProx = 0f;
149            int subs = 0;
150
151            for (int i = 0; i < minLen; i++)
152            {
153                if (input[i] != candidate[i])
154                {
155                    float dist = KeyDist(input[i], candidate[i]);
156                    // Close keys → high score (likely fat-finger)
157                    totalProx += Math.Max(0, 1f - dist / 3f);
158                    subs++;
159                }
160            }
161
162            if (subs == 0) return 1f; // identical = perfect
163            return totalProx / subs;
164        }
165
166        private static float KeyDist(char a, char b)
167        {
168            a = char.ToLower(a);
169            b = char.ToLower(b);
170            float[] pa, pb;
171
172            // Try Russian layout
173            if (keyPosRu.TryGetValue(a, out pa) && keyPosRu.TryGetValue(b, out pb))
174                return (float)Math.Sqrt((pa[0] - pb[0]) * (pa[0] - pb[0]) + (pa[1] - pb[1]) * (pa[1] - pb[1]));
175
176            // Try English layout
177            if (keyPosEn.TryGetValue(a, out pa) && keyPosEn.TryGetValue(b, out pb))
178                return (float)Math.Sqrt((pa[0] - pb[0]) * (pa[0] - pb[0]) + (pa[1] - pb[1]) * (pa[1] - pb[1]));
179
180            return 5f; // unknown = far
181        }
182
183        // ===== Component 2: Bigram naturalness of the candidate word =====
184        private static float BigramScore(string word)
185        {
186            if (word.Length < 2) return 0.5f;
187            float sum = 0;
188            int count = 0;
189            for (int i = 1; i < word.Length; i++)
190            {
191                int pi = CharIdx(word[i - 1]);
192                int ci = CharIdx(word[i]);
193                if (pi >= 0 && ci >= 0)
194                {
195                    sum += bigramLogProb[pi * ALPHA_SIZE + ci];
196                    count++;
197                }
198            }
199            if (count == 0) return 0.5f;
200            // Normalize: typical log-prob is -3 to -1. Map to 0-1.
201            float avg = sum / count;
202            return Sigmoid(avg + 2.5f); // shift so that avg=-2.5 → 0.5
203        }
204
205        // ===== Component 3: Error pattern detection =====
206        private static float ErrorPatternScore(string input, string candidate)
207        {
208            float score = 0.5f; // neutral
209
210            // Detect doubled characters in input that aren't in candidate (duplication error)
211            int doublesInInput = CountDoubles(input);
212            int doublesInCand = CountDoubles(candidate);
213            if (doublesInInput > doublesInCand)
214                score += 0.2f * (doublesInInput - doublesInCand); // likely duplication error
215
216            // Detect transposition: adjacent chars swapped
217            if (input.Length == candidate.Length)
218            {
219                int swaps = 0;
220                for (int i = 0; i < input.Length - 1; i++)
221                {
222                    if (input[i] == candidate[i + 1] && input[i + 1] == candidate[i]
223                        && input[i] != candidate[i])
224                    {
225                        swaps++;
226                        i++; // skip next
227                    }
228                }
229                if (swaps > 0) score += 0.3f; // transposition is a very common error
230            }
231
232            // Length analysis
233            int lenDiff = candidate.Length - input.Length;
234            if (lenDiff == 0) score += 0.15f;  // same length = substitution (most common typo)
235            if (lenDiff == 1) score += 0.1f;   // candidate 1 char longer = user missed a key
236            if (lenDiff == -1) score += 0.05f; // candidate 1 char shorter = user typed extra key (less common)
237            if (lenDiff < -1) score -= 0.1f;   // candidate much shorter = suspicious
238
239            return Math.Min(1f, score);
240        }
241
242        private static int CountDoubles(string s)
243        {
244            int count = 0;
245            for (int i = 1; i < s.Length; i++)
246                if (s[i] == s[i - 1]) count++;
247            return count;
248        }
249
250        // ===== Component 4: Word frequency =====
251        private static float FreqScore(int freqIdx, int freqCutoff)
252        {
253            if (freqIdx < freqCutoff / 10) return 1.0f;     // top 10% = very common
254            if (freqIdx < freqCutoff) return 0.7f;           // top 80k = common
255            if (freqIdx < freqCutoff * 5) return 0.3f;       // top 400k = known
256            return 0.1f;                                       // rare
257        }
258
259        // ===== Component 5: Morphological context from previous word =====
260        private static readonly HashSet<string> preps = new HashSet<string> {
261            "в","на","по","к","с","у","за","от","из","до","для","без","при","через","под","над","перед","про"
262        };
263
264        private static float ContextScore(string candidate, string prevWord)
265        {
266            if (string.IsNullOrEmpty(prevWord) || candidate.Length < 3) return 0.5f;
267            string prev = prevWord.ToLower();
268            float score = 0.5f;
269
270            // After preposition → prefer oblique noun cases
271            if (preps.Contains(prev))
272            {
273                if (EndsWith(candidate, "ом", "ем", "ой", "ам", "ях", "ую", "ым", "ей", "ов", "ах", "ие", "ию"))
274                    score += 0.3f;
275                if (EndsWith(candidate, "ть", "ться")) score -= 0.2f;
276            }
277
278            // After "не" → prefer verbs
279            if (prev == "не" || prev == "ни")
280            {
281                if (EndsWith(candidate, "ть", "ет", "ит", "ал", "ла", "ся", "ли", "ют", "ат", "ешь", "ишь"))
282                    score += 0.3f;
283            }
284
285            // After adjective → prefer nouns
286            if (EndsWith(prev, "ый", "ий", "ой", "ая", "яя", "ое", "ее", "ые", "ие"))
287            {
288                if (!EndsWith(candidate, "ть", "ет", "ит", "ся"))
289                    score += 0.15f;
290            }
291
292            // After possessive → prefer nouns
293            if (prev == "мой" || prev == "твой" || prev == "наш" || prev == "ваш" ||
294                prev == "это" || prev == "его" || prev == "её" || prev == "их")
295            {
296                if (!EndsWith(candidate, "ть", "ся")) score += 0.2f;
297            }
298
299            return Math.Min(1f, Math.Max(0f, score));
300        }
301
302        // ===== Component 6: Sentence coherence =====
303        /// <summary>
304        /// How well does this candidate fit the sentence so far?
305        /// Uses char-bigram overlap between candidate and sentence words
306        /// as a proxy for semantic relatedness.
307        /// </summary>
308        public static float SentenceCoherence(string candidate, string[] sentenceWords)
309        {
310            if (!modelReady || sentenceWords == null || sentenceWords.Length == 0) return 0.5f;
311
312            float totalScore = 0;
313            int count = 0;
314
315            // For each word in the sentence, compute char-trigram overlap with candidate
316            // Words that share trigrams are likely from the same topic/context
317            var candTrigrams = GetTrigrams(candidate);
318            if (candTrigrams.Count == 0) return 0.5f;
319
320            foreach (string sw in sentenceWords)
321            {
322                if (sw.Length < 3) continue;
323                var swTrigrams = GetTrigrams(sw);
324                if (swTrigrams.Count == 0) continue;
325
326                // Jaccard similarity of trigram sets
327                int intersection = 0;
328                foreach (string t in candTrigrams)
329                    if (swTrigrams.Contains(t)) intersection++;
330
331                int union = candTrigrams.Count + swTrigrams.Count - intersection;
332                if (union > 0)
333                {
334                    totalScore += (float)intersection / union;
335                    count++;
336                }
337            }
338
339            if (count == 0) return 0.5f;
340
341            // Also: check morphological agreement
342            // If sentence contains preposition and candidate has matching case ending
343            float morphBonus = 0;
344            for (int i = sentenceWords.Length - 1; i >= Math.Max(0, sentenceWords.Length - 3); i--)
345            {
346                string w = sentenceWords[i].ToLower();
347                // "в моменте" — preposition "в" + prepositional case "-е"/"-и"
348                if (preps.Contains(w))
349                {
350                    if (EndsWith(candidate, "е", "и", "у", "ю", "ом", "ем", "ой", "ам", "ей", "ах", "ях"))
351                        morphBonus += 0.3f;
352                    break;
353                }
354            }
355
356            float avgSim = totalScore / count;
357            // Scale: 0 overlap = 0, some overlap = up to 1
358            return Math.Min(1f, avgSim * 3f + morphBonus);
359        }
360
361        private static HashSet<string> GetTrigrams(string word)
362        {
363            var result = new HashSet<string>();
364            string w = word.ToLower();
365            for (int i = 0; i <= w.Length - 3; i++)
366                result.Add(w.Substring(i, 3));
367            return result;
368        }
369
370        // ===== Helpers =====
371        private static int CharIdx(char c)
372        {
373            c = char.ToLower(c);
374            if (c >= 'а' && c <= 'я') return c - 'а'; // 0-31
375            if (c == 'ё') return 32;
376            return -1;
377        }
378
379        private static float Sigmoid(float x)
380        {
381            return 1f / (1f + (float)Math.Exp(-x));
382        }
383
384        private static bool EndsWith(string word, params string[] suffixes)
385        {
386            foreach (var s in suffixes)
387                if (word.Length >= s.Length && word.EndsWith(s, StringComparison.Ordinal))
388                    return true;
389            return false;
390        }
391    }
392}