1using System; 2using System.Collections.Generic; 3using System.Drawing; 4using System.Drawing.Drawing2D; 5using System.Drawing.Text; 6using System.IO; 7using System.Runtime.InteropServices; 8using System.Text; 9using System.Windows.Forms; 10using WindowCapture.App; 11using WindowCapture.Models; 12using WindowCapture.Native; 13 14namespace WindowCapture.Helpers 15{ 16 public static class TextProcessor 17 { 18 private static readonly Dictionary<char, char> enToRu = new Dictionary<char, char>(); 19 private static readonly Dictionary<char, char> ruToEn = new Dictionary<char, char>(); 20 21 private static StringBuilder wordBuffer = new StringBuilder(); 22 private static bool initialized; 23 // Context: sentence history for disambiguation 24 private static string prevWord = ""; 25 private static List<string> sentenceHistory = new List<string>(); // words in current sentence 26 private static StringBuilder sentenceRaw = new StringBuilder(); // raw chars of current sentence (for live SAGE suggestion) 27 private static volatile int typeSeq; // bumped on each typed char — race guard for auto-apply 28 29 private static SymSpell symEn; 30 private static SymSpell symRu; // kept for EN 31 private static CompactSpell compactRu; // ALL 1.5M words, ~30MB RAM 32 private static BloomFilter bloomRu; 33 private static BloomFilter bloomEn; 34 private static SpellNet spellNet; 35 private static CharEmbedNet embedNet; // character embedding neural network 36 private static Seq2Spell seq2spell; 37 private static GruSpellNet gruNet; 38 private static BigramLM bigramLM; // word-pair probability model 39 40 private static string lastOriginal; 41 private static DateTime lastCorrectionTime; 42 43 private static HashSet<string> userDict = new HashSet<string>(StringComparer.OrdinalIgnoreCase); 44 private static string userDictPath; 45 46 private static CorrectionTooltip activeTooltip; 47 48 // Sentence context: track if we're at the start of a sentence 49 private static char prevBoundaryChar = '\n'; // start of input = start of sentence 50 private static bool isFirstWordAfterBoundary = true; 51 52 // "Add to dictionary?" state — intercept Enter 53 private static volatile bool waitingForDictConfirm; 54 private static string pendingDictWord; 55 56 [DllImport("user32.dll")] 57 private static extern int ToUnicodeEx(uint wVirtKey, uint wScanCode, byte[] lpKeyState, 58 [MarshalAs(UnmanagedType.LPWStr)] StringBuilder pwszBuff, int cchBuff, uint wFlags, IntPtr dwhkl); 59 [DllImport("user32.dll")] 60 private static extern void keybd_event(byte bVk, byte bScan, uint dwFlags, UIntPtr dwExtraInfo); 61 [DllImport("user32.dll", CharSet = CharSet.Unicode)] 62 private static extern IntPtr SendMessageW(IntPtr hWnd, uint Msg, IntPtr wParam, IntPtr lParam); 63 private const uint WM_CHAR = 0x0102; 64 private const uint KEYEVENTF_KEYUP_FLAG = 0x0002; 65 66 private static void Log(string msg) { Logger.Log("textproc", msg); } 67 68 static TextProcessor() { try { Init(); } catch (Exception ex) { try { Logger.Log("textproc", "STATIC CTOR CRASH: " + ex.ToString()); } catch { } } } 69 70 private static void Init() 71 { 72 if (initialized) return; 73 initialized = true; 74 try { Log("TextProcessor.Init() START"); } catch { } 75 76 string enLow = "qwertyuiop[]asdfghjkl;'zxcvbnm,.`"; 77 string ruLow = "йцукенгшщзхъфывапролджэячсмитьбюё"; 78 for (int i = 0; i < enLow.Length && i < ruLow.Length; i++) 79 { enToRu[enLow[i]] = ruLow[i]; ruToEn[ruLow[i]] = enLow[i]; } 80 string enUp = "QWERTYUIOP{}ASDFGHJKL:\"ZXCVBNM<>~"; 81 string ruUp = "ЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮЁ"; 82 for (int i = 0; i < enUp.Length && i < ruUp.Length; i++) 83 { enToRu[enUp[i]] = ruUp[i]; ruToEn[ruUp[i]] = enUp[i]; } 84 85 try { LoadDictionaries(); } catch (Exception ex) { Log("LoadDict CRASH: " + ex.ToString()); } 86 Log("Init: building in background..."); 87 } 88 89 private static void LoadDictionaries() 90 { 91 string baseDir = AppDomain.CurrentDomain.BaseDirectory; 92 userDictPath = Path.Combine(baseDir, "Data", "dict_user.txt"); 93 94 try 95 { 96 if (File.Exists(userDictPath)) 97 foreach (var l in File.ReadAllLines(userDictPath)) 98 { string w = l.Trim().ToLower(); if (w.Length > 0) userDict.Add(w); } 99 } 100 catch { } 101 102 symEn = new SymSpell(2); 103 symRu = new SymSpell(2); 104 105 // Load Bloom filters (instant, ~3MB total) 106 System.Threading.ThreadPool.QueueUserWorkItem(delegate 107 { 108 try 109 { 110 string bloomRuPath = Path.Combine(baseDir, "Data", "dict_ru.bloom"); 111 string bloomEnPath = Path.Combine(baseDir, "Data", "dict_en.bloom"); 112 if (File.Exists(bloomRuPath)) { bloomRu = new BloomFilter(bloomRuPath); Log("Bloom RU loaded: " + (bloomRu.BitCount / 8 / 1024) + "KB"); } 113 if (File.Exists(bloomEnPath)) { bloomEn = new BloomFilter(bloomEnPath); Log("Bloom EN loaded: " + (bloomEn.BitCount / 8 / 1024) + "KB"); } 114 115 // Load CharNN trained weights 116 string nnPath = Path.Combine(baseDir, "Data", "charnn.bin"); 117 if (File.Exists(nnPath)) CharNN.LoadWeights(nnPath); 118 119 // Load SpellNet 120 string snPath = Path.Combine(baseDir, "Data", "spellnn.bin"); 121 if (File.Exists(snPath)) { spellNet = new SpellNet(); spellNet.Load(snPath); } 122 123 // Load MorphAnalyzer (OpenCorpora 3M+ word forms with POS tags) 124 string morphPath = Path.Combine(baseDir, "Data", "morph.bin.gz"); 125 if (!File.Exists(morphPath)) morphPath = Path.Combine(baseDir, "Data", "morph.bin"); 126 if (File.Exists(morphPath)) MorphAnalyzer.Load(morphPath); 127 128 // Load CharEmbedNet 129 string cePath = Path.Combine(baseDir, "Data", "charembed.bin"); 130 if (File.Exists(cePath)) { embedNet = new CharEmbedNet(); embedNet.Load(cePath); } 131 132 // Load GruSpellNet 133 string gruPath = Path.Combine(baseDir, "Data", "seq2spell_gpu.bin"); 134 if (File.Exists(gruPath)) { gruNet = new GruSpellNet(); gruNet.Load(gruPath); } 135 136 // Load Bigram Language Model 137 string bigramPath = Path.Combine(baseDir, "Data", "bigram_lm.bin"); 138 if (File.Exists(bigramPath)) { bigramLM = new BigramLM(); bigramLM.Load(bigramPath); } 139 140 // Load Seq2Spell RNN 141 string s2sPath = Path.Combine(baseDir, "Data", "seq2spell.bin"); 142 if (File.Exists(s2sPath)) { seq2spell = new Seq2Spell(); seq2spell.Load(s2sPath); } 143 } 144 catch (Exception ex) { Log("Bloom/NN load err: " + ex.Message); } 145 }); 146 147 // Build SymSpell from core dict (30k words, ~5MB RAM) 148 System.Threading.ThreadPool.QueueUserWorkItem(delegate 149 { 150 try 151 { 152 string p = Path.Combine(baseDir, "Data", "dict_en.txt"); 153 if (File.Exists(p)) 154 { 155 var list = new List<string>(); 156 foreach (var l in File.ReadAllLines(p)) 157 { string w = l.Trim().ToLower(); if (w.Length >= 2) list.Add(w); } 158 symEn.Build(list.ToArray()); 159 } 160 } 161 catch (Exception ex) { Log("EN build err: " + ex.Message); } 162 }); 163 164 // Build CompactSpell on ALL 1.5M words (sorted arrays, ~30MB RAM) 165 System.Threading.ThreadPool.QueueUserWorkItem(delegate 166 { 167 try 168 { 169 string p = Path.Combine(baseDir, "Data", "dict_ru.txt"); 170 if (!File.Exists(p)) p = Path.Combine(baseDir, "Data", "dict_ru_core.txt"); 171 if (File.Exists(p)) 172 { 173 var list = new List<string>(); 174 foreach (var l in File.ReadAllLines(p)) 175 { 176 string w = l.Trim().ToLower(); 177 if (w.Length >= 2 && w.Length <= 25) list.Add(w); 178 if (list.Count >= 300000) break; // 300k for ~80MB RAM 179 } 180 Log("CompactSpell RU building from " + list.Count + " words"); 181 compactRu = new CompactSpell(); 182 compactRu.Bloom = bloomRu; 183 compactRu.Build(list.ToArray()); 184 185 // Build RulesEngine (auto-generates thousands of corrections) 186 RulesEngine.Build(list.ToArray(), bloomRu); 187 188 if (bloomEn != null) symEn.ExternalBloom = bloomEn; 189 } 190 } 191 catch (Exception ex) { Log("RU build err: " + ex.Message); } 192 }); 193 } 194 195 /// <summary>Returns true if the key should be suppressed (eaten by us).</summary> 196 public static bool OnKeyDown(int vk, int scanCode) 197 { 198 // Intercept Enter when waiting for "Add to dictionary?" confirmation 199 if (waitingForDictConfirm) 200 { 201 if (vk == 0x0D) // Enter 202 { 203 waitingForDictConfirm = false; 204 if (pendingDictWord != null) { AddToUserDictionary(pendingDictWord); pendingDictWord = null; } 205 if (TrayApp.Instance != null && !TrayApp.Instance.IsDisposed) 206 TrayApp.Instance.BeginInvoke(new Action(() => { CloseTooltip(); ShowTooltipInternal("Added!", GetCaretScreenPoint()); })); 207 return true; // suppress Enter 208 } 209 else if (vk == 0x1B) // Escape → cancel 210 { 211 waitingForDictConfirm = false; pendingDictWord = null; 212 if (TrayApp.Instance != null && !TrayApp.Instance.IsDisposed) 213 TrayApp.Instance.BeginInvoke(new Action(() => CloseTooltip())); 214 return true; 215 } 216 else 217 { 218 // Any other key → cancel prompt, don't suppress 219 waitingForDictConfirm = false; pendingDictWord = null; 220 if (TrayApp.Instance != null && !TrayApp.Instance.IsDisposed) 221 TrayApp.Instance.BeginInvoke(new Action(() => CloseTooltip())); 222 } 223 } 224 225 if (!Settings.AutoLangSwitch && !Settings.AutoT9) return false; 226 // No ignoreKeys check — Controller skips injected events via LLKHF_INJECTED 227 228 if (vk == 0x5A) 229 { 230 bool ctrl = (WinApi.GetAsyncKeyState(0xA2) & 0x8000) != 0 || (WinApi.GetAsyncKeyState(0xA3) & 0x8000) != 0; 231 if (ctrl && lastOriginal != null && (DateTime.Now - lastCorrectionTime).TotalSeconds < 10) 232 { string orig = lastOriginal; lastOriginal = null; ShowAddToDictTooltip(orig); return false; } 233 } 234 235 bool anyCtrl = (WinApi.GetAsyncKeyState(0xA2) & 0x8000) != 0 || (WinApi.GetAsyncKeyState(0xA3) & 0x8000) != 0; 236 bool anyAlt = (WinApi.GetAsyncKeyState(0xA4) & 0x8000) != 0 || (WinApi.GetAsyncKeyState(0xA5) & 0x8000) != 0; 237 if (anyCtrl || anyAlt) return false; 238 239 // Space and Enter are always boundaries 240 if (vk == 0x20 || vk == 0x0D) 241 { 242 lastBoundaryChar = (vk == 0x20) ? ' ' : '\n'; 243 FlushWord(); 244 245 if (vk == 0x20) { sentenceRaw.Append(' '); typeSeq++; } 246 else { MaybeSuggestSentence(sentenceRaw.ToString()); sentenceRaw.Length = 0; } // Enter ends sentence 247 248 return false; 249 } 250 if (vk == 0x08) { if (wordBuffer.Length > 0) wordBuffer.Remove(wordBuffer.Length - 1, 1); if (sentenceRaw.Length > 0) sentenceRaw.Remove(sentenceRaw.Length - 1, 1); return false; } 251 if (vk == 0x1B || vk == 0x09 || vk == 0x2E) { if (wordBuffer.Length > 0) { lastBoundaryChar = ' '; FlushWord(); } wordBuffer.Clear(); return false; } 252 // Arrow keys: flush current word, then schedule context analysis 253 if (vk >= 0x25 && vk <= 0x28) 254 { 255 if (wordBuffer.Length > 0) 256 { 257 lastBoundaryChar = ' '; 258 FlushWord(); 259 } 260 wordBuffer.Clear(); 261 pendingMerge = null; 262 // Delayed context check on STA thread (UIA requires STA) 263 var uiaThread = new System.Threading.Thread(() => 264 { 265 System.Threading.Thread.Sleep(150); 266 AnalyzeAtCursor(); 267 }); 268 uiaThread.SetApartmentState(System.Threading.ApartmentState.STA); 269 uiaThread.IsBackground = true; 270 uiaThread.Start(); 271 return false; 272 } 273 274 // For all other keys: resolve the actual CHARACTER, then decide 275 char ch = VkToChar(vk, scanCode); 276 if (ch == '\0') return false; 277 sentenceRaw.Append(ch); typeSeq++; 278 279 if (char.IsLetter(ch) || ch == '\'') 280 { 281 wordBuffer.Append(ch); 282 } 283 else if (ch == '-') 284 { 285 // Hyphen: if we have a word buffer, treat as part of the word 286 // but mark it so we don't try to correct hyphenated words 287 wordBuffer.Append(ch); 288 } 289 else 290 { 291 // It's punctuation (.,;:/ etc.) → word boundary 292 lastBoundaryChar = ch; 293 FlushWord(); 294 295 // End of sentence → live SAGE full-sentence suggestion (if enabled) 296 if (ch == '.' || ch == '!' || ch == '?') 297 { 298 MaybeSuggestSentence(sentenceRaw.ToString()); 299 sentenceRaw.Length = 0; 300 } 301 } 302 return false; 303 } 304 305 private static string pendingMerge = null; // single letter that might belong to previous word 306 307 private static void FlushWord() 308 { 309 if (wordBuffer.Length > 0) 310 { 311 string current = wordBuffer.ToString(); 312 313 // Merge logic: if previous word was 1 char and this word is also short, 314 // or current is 1 char — it might be a split word ("орфографически й" → "орфографический") 315 if (pendingMerge != null) 316 { 317 // Try merging: previous single char + current word 318 string merged = pendingMerge + current; 319 SymSpell sym = (symRu != null && symRu.IsReady) ? symRu : null; 320 if (sym == null) sym = (symEn != null && symEn.IsReady) ? symEn : null; 321 322 if (sym != null && sym.Contains(merged.ToLower()) && !sym.Contains(current.ToLower())) 323 { 324 Log("Merge: \"" + pendingMerge + "\" + \"" + current + "\" → \"" + merged + "\""); 325 // Delete the space + current word, replace with merged 326 Point cp = GetCaretScreenPoint(); 327 // We need to delete: current word (being typed) + space + previous single char 328 // But the single char was already committed. We need to go back further. 329 int delCount = current.Length + 1 + pendingMerge.Length; // word + space + prev char 330 QueueReplaceRaw(delCount, merged + lastBoundaryChar, cp); 331 prevWord = merged.ToLower(); 332 sentenceHistory.Add(merged.ToLower()); 333 pendingMerge = null; 334 wordBuffer.Clear(); 335 return; 336 } 337 pendingMerge = null; 338 } 339 340 // If current word is a single letter that's not a common standalone word 341 if (current.Length == 1 && char.IsLetter(current[0])) 342 { 343 char c = char.ToLower(current[0]); 344 bool isStandalone = "авикосуя".IndexOf(c) >= 0; 345 if (!isStandalone) 346 { 347 pendingMerge = current; // save for potential merge with next word 348 Log("PendingMerge: \"" + current + "\""); 349 sentenceHistory.Add(current.ToLower()); 350 wordBuffer.Clear(); 351 // Still process sentence-ending punctuation below 352 goto checkPunctuation; 353 } 354 } 355 356 Log("Boundary word=\"" + current + "\" sentStart=" + IsSentenceStart()); 357 ProcessWord(); 358 if (wordBuffer.Length >= 2) 359 isFirstWordAfterBoundary = false; 360 } 361 362 checkPunctuation: 363 // Sentence-ending punctuation resets context 364 if (lastBoundaryChar == '.' || lastBoundaryChar == '!' || lastBoundaryChar == '?' || lastBoundaryChar == '\n') 365 { 366 prevBoundaryChar = lastBoundaryChar; 367 isFirstWordAfterBoundary = true; 368 sentenceHistory.Clear(); // new sentence = fresh context 369 } 370 wordBuffer.Clear(); 371 } 372 373 // Live full-sentence SAGE suggestion (non-destructive): on sentence end, correct the raw 374 // sentence in the background and show the polished version as a tooltip. Gated by 375 // Settings.SentenceAiSuggest (default off). Auto-applying in place during typing needs 376 // careful replacement + live testing, so it is intentionally suggestion-only for now. 377 private static void MaybeSuggestSentence(string raw) 378 { 379 bool suggest = Settings.SentenceAiSuggest, auto = Settings.SentenceAiAutoApply; 380 if ((!suggest && !auto) || raw == null) return; 381 string s = raw.Trim(); 382 if (s.Length < 8 || !SageClient.IsAvailable) return; 383 int seq0 = typeSeq; // race guard for auto-apply 384 int delCount = raw.Length; // chars to remove back from the caret (this sentence) 385 string lead = ""; int li = 0; while (li < raw.Length && char.IsWhiteSpace(raw[li])) { lead += raw[li]; li++; } 386 Point caret = GetCaretScreenPoint(); 387 System.Threading.ThreadPool.QueueUserWorkItem(delegate 388 { 389 try 390 { 391 string corrected = SageClient.Correct(s); 392 if (string.IsNullOrEmpty(corrected)) return; 393 corrected = corrected.Trim(); 394 if (corrected == s) return; 395 if (auto) 396 { 397 // Apply in place ONLY if the user hasn't typed since the trigger (caret still 398 // at the sentence end). Backspace+type at the caret — no clipboard, no focus 399 // change. If they typed, skip (never corrupt). 400 if (typeSeq != seq0) return; 401 QueueReplaceRaw(delCount, lead + corrected, caret); 402 } 403 else if (TrayApp.Instance != null && !TrayApp.Instance.IsDisposed) 404 { 405 TrayApp.Instance.BeginInvoke(new Action(delegate { ShowTooltipInternal("✓ " + corrected, caret); })); 406 } 407 } 408 catch { } 409 }); 410 } 411 412 private static char VkToChar(int vk, int scanCode) 413 { 414 byte[] ks = new byte[256]; 415 for (int i = 0; i < 256; i++) ks[i] = (byte)(WinApi.GetKeyState(i) & 0xFF); 416 IntPtr fg = WinApi.GetForegroundWindow(); 417 uint pid; uint tid = WinApi.GetWindowThreadProcessId(fg, out pid); 418 IntPtr hkl = WinApi.GetKeyboardLayout(tid); 419 StringBuilder buf = new StringBuilder(4); 420 int r = ToUnicodeEx((uint)vk, (uint)scanCode, ks, buf, buf.Capacity, 0, hkl); 421 if (r == 1) return buf[0]; 422 if (r < 0) ToUnicodeEx((uint)vk, (uint)scanCode, ks, buf, buf.Capacity, 0, hkl); 423 return '\0'; 424 } 425 426 private static void ProcessWord() 427 { 428 string word = wordBuffer.ToString(); 429 string lower = word.ToLower(); 430 if (lower.Length < 2) return; 431 432 // Auto-apply mode: SAGE corrects the whole sentence at its end, so skip per-word edits — 433 // keeps on-screen text == what was typed (exact backspace count for the sentence replace). 434 if (Settings.SentenceAiAutoApply) 435 { 436 prevWord = lower; sentenceHistory.Add(lower); 437 if (sentenceHistory.Count > 10) sentenceHistory.RemoveAt(0); 438 return; 439 } 440 441 // Read context from active window (non-invasive, cached) 442 var curCtx = CursorReader.GetContext(); 443 if (curCtx.IsValid) 444 { 445 if (string.IsNullOrEmpty(prevWord) && curCtx.PrevWords != null && curCtx.PrevWords.Length > 0) 446 prevWord = curCtx.PrevWords[curCtx.PrevWords.Length - 1]; 447 } 448 if (userDict.Contains(lower)) return; 449 450 // RulesEngine corrections (auto-generated + manual, thousands of entries) 451 string rulesFix = RulesEngine.IsReady ? RulesEngine.Lookup(lower) : null; 452 // Also check old forcedFix as fallback 453 if (rulesFix == null) { string ff; if (forcedFix.TryGetValue(lower, out ff)) rulesFix = ff; } 454 if (rulesFix != null) 455 { 456 Log(" RulesFix: \"" + lower + "\" → \"" + rulesFix + "\""); 457 Point fp = GetCaretScreenPoint(); 458 string fc = SmartCase(word, rulesFix); 459 QueueReplace(word, fc, false, fp); 460 prevWord = rulesFix; 461 sentenceHistory.Add(rulesFix); 462 return; 463 } 464 // -тся/-ться contextual fix 465 string tsyaFix = RulesEngine.IsReady ? RulesEngine.FixTsyaTsya(word, prevWord) : null; 466 if (tsyaFix != null) 467 { 468 Log(" TsyaFix: \"" + lower + "\" → \"" + tsyaFix + "\""); 469 Point fp = GetCaretScreenPoint(); 470 QueueReplace(word, SmartCase(word, tsyaFix), false, fp); 471 prevWord = tsyaFix; 472 sentenceHistory.Add(tsyaFix); 473 return; 474 } 475 476 // Skip hyphenated words 477 if (lower.Contains("-")) 478 { 479 Log(" Skip hyphenated: \"" + lower + "\""); 480 prevWord = lower; 481 sentenceHistory.Add(lower); 482 if (sentenceHistory.Count > 10) sentenceHistory.RemoveAt(0); 483 return; 484 } 485 486 Point caretPos = GetCaretScreenPoint(); 487 IntPtr fg = WinApi.GetForegroundWindow(); 488 uint pid; uint tid = WinApi.GetWindowThreadProcessId(fg, out pid); 489 IntPtr hkl = WinApi.GetKeyboardLayout(tid); 490 int langId = (int)hkl & 0xFFFF; 491 bool isRu = langId == 0x0419, isEn = langId == 0x0409; 492 493 SymSpell thisSym = isEn ? symEn : null; 494 bool spellReady = isRu ? (compactRu != null && compactRu.IsReady) : (symEn != null && symEn.IsReady); 495 496 Log("Process: \"" + word + "\" " + (isRu ? "RU" : isEn ? "EN" : "?") + " ready=" + spellReady); 497 498 // Language switch — word >= 3 chars, not in current dict, remapped IS in other dict 499 if (Settings.AutoLangSwitch && (isEn || isRu) && lower.Length >= 3) 500 { 501 string remapped = RemapWord(word, isEn); 502 if (remapped != null) 503 { 504 BloomFilter thisBloom = isRu ? bloomRu : bloomEn; 505 BloomFilter otherBloom = isEn ? bloomRu : bloomEn; 506 bool inThis = thisBloom != null && thisBloom.MayContain(lower); 507 bool inOther = otherBloom != null && otherBloom.MayContain(remapped.ToLower()); 508 Log(" remap=\"" + remapped + "\" inThis=" + inThis + " inOther=" + inOther); 509 if (!inThis && inOther) 510 { 511 Log(" SWITCH → \"" + remapped + "\""); 512 prevWord = remapped.ToLower(); 513 sentenceHistory.Add(remapped.ToLower()); 514 QueueReplace(word, remapped, true, caretPos); 515 return; 516 } 517 } 518 } 519 520 // Autocorrect: CompactSpell (ALL 1.5M RU) or SymSpell (EN) 521 if (Settings.AutoT9 && spellReady && lower.Length >= 3) 522 { 523 BloomFilter bloom = isRu ? bloomRu : bloomEn; 524 bool isCorrect = false; 525 var candidates = new List<SymSpell.Candidate>(); 526 527 if (isRu && compactRu != null && compactRu.IsReady) 528 { 529 if (compactRu.ContainsTrusted(lower)) 530 { 531 isCorrect = true; 532 } 533 else if (compactRu.ContainsExact(lower)) 534 { 535 // In the full (noisy) dict but NOT trusted → may be corpus garbage. Frequency- 536 // override: if a TRUSTED neighbour is one edit away (or two NATURAL phonetic 537 // edits), prefer it; otherwise keep the typed word. force:true makes Lookup 538 // generate candidates even though the word is technically "in the dictionary". 539 candidates = compactRu.Lookup(lower, 5, true); 540 SymSpell.Candidate fix = null; 541 foreach (var c in candidates) 542 { 543 if (!compactRu.ContainsTrusted(c.Word)) continue; 544 if (c.Distance == 1 || 545 (c.Distance == 2 && c.Word.Length > 0 && lower.Length > 0 && c.Word[0] == lower[0] && 546 SpellScore.EditPlausibility(lower, c.Word) >= 2000)) 547 { fix = c; break; } 548 } 549 if (fix == null) { isCorrect = true; candidates.Clear(); } // no better option → keep as-is 550 else candidates = new List<SymSpell.Candidate> { fix }; 551 } 552 else 553 { 554 candidates = compactRu.Lookup(lower, 8); 555 } 556 } 557 else if (thisSym != null && thisSym.IsReady) 558 { 559 bool symCorrect; 560 candidates = thisSym.LookupTop(lower, 8, out symCorrect); 561 if (symCorrect) isCorrect = true; 562 } 563 564 var sw = System.Diagnostics.Stopwatch.StartNew(); 565 sw.Stop(); // timing was above 566 567 if (isCorrect) 568 { 569 Log(" SymSpell: \"" + lower + "\" → (ok) [" + sw.ElapsedMilliseconds + "ms]"); 570 prevWord = lower; 571 } 572 else if (candidates.Count > 0) 573 { 574 // Try to get real context from active window (background UIA) for a bigram nudge 575 string[] windowCtx = GetWindowContext(5); 576 // Validate: context must contain at least one Cyrillic word to be useful 577 bool ctxValid = false; 578 if (windowCtx != null && windowCtx.Length > 0) 579 { 580 string lastCtx = windowCtx[windowCtx.Length - 1]; 581 foreach (char ch in lastCtx) 582 if (ch >= '\u0400' && ch <= '\u04FF') { ctxValid = true; break; } 583 } 584 if (ctxValid) 585 { 586 prevWord = windowCtx[windowCtx.Length - 1]; 587 Log(" WindowCtx: [" + string.Join(", ", windowCtx) + "] → prevWord=\"" + prevWord + "\""); 588 } 589 // Do NOT re-rank with CharNN/CharEmbedNet — they misrank (bag-of-chars, see 590 // AUTOCORRECT_REVIEW.md). CompactSpell.Lookup already ranked candidates via the 591 // noisy-channel SpellScore (frequency + ё/phonetic/keyboard plausibility, distance 592 // folded in). Use the previous word only as a light bigram nudge. 593 if (bigramLM != null && bigramLM.IsReady && !string.IsNullOrEmpty(prevWord) && candidates.Count > 1) 594 { 595 foreach (var c in candidates) 596 c.ContextScore += bigramLM.Score(prevWord, c.Word) * 4; 597 candidates.Sort(delegate(SymSpell.Candidate a, SymSpell.Candidate b) 598 { return b.ContextScore.CompareTo(a.ContextScore); }); 599 } 600 601 // STAGE 2 — context neural rescore (опц., эксп.): a tiny masked-LM (rubert-tiny2 via 602 // the warm server) re-ranks the top dictionary candidates by P(word|context) — the 603 // real "language model" stage every modern autocorrect uses (see TSF_AND_TINY_NN_DESIGN.md). 604 // Candidates come ONLY from the dictionary, so it re-ranks but never hallucinates. 605 // Behind a default-OFF flag; any failure leaves the noisy-channel/bigram ranking intact. 606 if (Settings.ContextNnRescore && ctxValid && candidates.Count > 1 && RescoreClient.IsAvailable) 607 { 608 try 609 { 610 int take = Math.Min(6, candidates.Count); 611 var words = new string[take]; 612 for (int i = 0; i < take; i++) words[i] = candidates[i].Word; 613 double[] rs = RescoreClient.Rescore(string.Join(" ", windowCtx), "", words); 614 if (rs != null && rs.Length == take) 615 { 616 for (int i = 0; i < take; i++) 617 candidates[i].ContextScore += (int)Math.Round(rs[i] * 1500); 618 candidates.Sort(delegate(SymSpell.Candidate a, SymSpell.Candidate b) 619 { return b.ContextScore.CompareTo(a.ContextScore); }); 620 Log(" NN-rescore: \"" + candidates[0].Word + "\" leads (ctx=\"" + string.Join(" ", windowCtx) + "\")"); 621 } 622 } 623 catch { } 624 } 625 626 var best = candidates[0]; 627 Log(" SymSpell: \"" + lower + "\" → \"" + best.Word + "\" dist=" + best.Distance + 628 " nn=" + best.ContextScore + 629 " (of " + candidates.Count + ") [" + sw.ElapsedMilliseconds + "ms]"); 630 631 // Smart filtering 632 bool shouldReplace = false; 633 634 // Guard: don't replace if candidate is just a truncation of input 635 // "нейронку"→"нейрону" — input contains candidate as prefix/substring 636 if (best.Distance == 1 && lower.Length > best.Word.Length && lower.StartsWith(best.Word.Substring(0, Math.Min(best.Word.Length, lower.Length - 1)))) 637 { 638 // Candidate is shorter — it's a suffix mismatch, not a typo 639 // Only allow if the removed part is clearly a typo char (duplicate etc.) 640 bool isDuplicate = lower.Length == best.Word.Length + 1; 641 if (!isDuplicate) 642 { 643 Log(" Skip truncation: \"" + lower + "\" → \"" + best.Word + "\""); 644 shouldReplace = false; 645 goto skipReplace; 646 } 647 } 648 649 // Precision-first gate: only auto-correct INTO a common/known word so a wrong 650 // correction is unlikely. Under-correcting beats mangling an intentional word. 651 bool bestTrusted = (isRu && compactRu != null) ? compactRu.ContainsTrusted(best.Word) : true; 652 bool bestFrequent = best.FreqIdx >= 0 && best.FreqIdx < 30000; 653 if (best.Distance == 1) 654 shouldReplace = bestTrusted || bestFrequent; 655 else if (best.Distance == 2 && lower.Length >= 4) 656 shouldReplace = (bestTrusted || bestFrequent) && best.Word[0] == lower[0]; 657 658 skipReplace: 659 if (shouldReplace) 660 { 661 string corrected = SmartCase(word, best.Word); 662 lastOriginal = word; lastCorrectionTime = DateTime.Now; 663 prevWord = best.Word; 664 sentenceHistory.Add(best.Word); 665 if (sentenceHistory.Count > 10) sentenceHistory.RemoveAt(0); 666 QueueReplace(word, corrected, false, caretPos); 667 return; 668 } 669 } 670 else 671 { 672 // Cascade: no direct match → try fixing obvious errors first, then re-lookup 673 string cascadeResult = CascadeRepair(lower, bloom, isRu ? null : thisSym); 674 if (cascadeResult != null) 675 { 676 Log(" Cascade: \"" + lower + "\" → \"" + cascadeResult + "\""); 677 string corrected = SmartCase(word, cascadeResult); 678 lastOriginal = word; lastCorrectionTime = DateTime.Now; 679 prevWord = cascadeResult; 680 sentenceHistory.Add(cascadeResult); 681 if (sentenceHistory.Count > 10) sentenceHistory.RemoveAt(0); 682 QueueReplace(word, corrected, false, caretPos); 683 return; 684 } 685 Log(" SymSpell: \"" + lower + "\" → (no candidates)"); 686 } 687 } 688 689 // Auto-capitalize after . ! ? or at very start of typing 690 bool sentStart = IsSentenceStart(); 691 if (Settings.AutoCapitalize && sentStart && word.Length > 0 && char.IsLower(word[0])) 692 { 693 string capitalized = char.ToUpper(word[0]) + word.Substring(1); 694 Log(" AutoCap: \"" + word + "\" → \"" + capitalized + "\""); 695 QueueReplace(word, capitalized, false, caretPos); 696 return; 697 } 698 699 // Fix accidental CapsLock mid-word: "иНадо" → "и" + "Надо" won't hit this, 700 // but "привЕт" (random caps in middle) → fix 701 if (word.Length >= 3 && char.IsLower(word[0])) 702 { 703 bool hasRandomCaps = false; 704 for (int ci = 1; ci < word.Length; ci++) 705 if (char.IsUpper(word[ci])) { hasRandomCaps = true; break; } 706 if (hasRandomCaps) 707 { 708 string fixedWord = word.ToLower(); 709 // Check if lowercase version is in dict 710 BloomFilter fb = isRu ? bloomRu : bloomEn; 711 if (fb != null && fb.MayContain(fixedWord)) 712 { 713 Log(" FixCaps: \"" + word + "\" → \"" + fixedWord + "\""); 714 QueueReplace(word, fixedWord, false, caretPos); 715 return; 716 } 717 } 718 } 719 720 // Auto-punctuation: insert comma before certain conjunctions/particles 721 if (Settings.AutoPunctuation && sentenceHistory.Count >= 1 && lastBoundaryChar == ' ') 722 { 723 string comma = NeedsCommaBefore(lower, sentenceHistory); 724 if (comma != null) 725 { 726 Log(" AutoComma before \"" + lower + "\""); 727 // Insert comma: delete the current word + space, retype as ", word" 728 QueueReplaceRaw(word.Length + 1, comma + " " + word + " ", caretPos); 729 prevWord = lower; 730 sentenceHistory.Add(lower); 731 if (sentenceHistory.Count > 10) sentenceHistory.RemoveAt(0); 732 return; 733 } 734 } 735 736 // Always update context 737 prevWord = lower; 738 sentenceHistory.Add(lower); 739 if (sentenceHistory.Count > 10) sentenceHistory.RemoveAt(0); 740 } 741 742 private static string LastWord(string ctx) 743 { 744 if (string.IsNullOrEmpty(ctx)) return ""; 745 var parts = ctx.Split(new[] { ' ', '\t', '\n', '\r', '.', ',', '!', '?', ';', ':' }, 746 StringSplitOptions.RemoveEmptyEntries); 747 return parts.Length > 0 ? parts[parts.Length - 1].ToLower() : ""; 748 } 749 750 /// <summary> 751 /// Pure single-word correction for the TSF TIP bridge (Helpers/TipBridge.cs). Given a word and 752 /// its left context, returns the corrected word (or the original if no correction is warranted). 753 /// Reuses the SAME brain as live typing — RulesEngine/forcedFix, the noisy-channel CompactSpell 754 /// candidates, the rubert-tiny2 context rescorer and the precision gate — but touches NO 755 /// live-typing state (no QueueReplace, no prevWord/sentenceHistory). RU only (the TIP profile is 756 /// Russian). Never throws; returns the original word on anything unexpected. 757 /// </summary> 758 public static string CorrectWordForTip(string word, string leftContext) 759 { 760 try 761 { 762 if (string.IsNullOrEmpty(word) || word.Length < 2) return word; 763 string lower = word.ToLower(); 764 if (userDict != null && userDict.Contains(lower)) return word; 765 766 string rulesFix = RulesEngine.IsReady ? RulesEngine.Lookup(lower) : null; 767 if (rulesFix == null) { string ff; if (forcedFix.TryGetValue(lower, out ff)) rulesFix = ff; } 768 if (rulesFix != null) return SmartCase(word, rulesFix); 769 770 if (lower.Contains("-")) return word; 771 if (!(compactRu != null && compactRu.IsReady)) return word; 772 if (lower.Length < 3) return word; 773 774 bool isCorrect = false; 775 var candidates = new List<SymSpell.Candidate>(); 776 if (compactRu.ContainsTrusted(lower)) isCorrect = true; 777 else if (compactRu.ContainsExact(lower)) 778 { 779 candidates = compactRu.Lookup(lower, 5, true); 780 SymSpell.Candidate fix = null; 781 foreach (var c in candidates) 782 { 783 if (!compactRu.ContainsTrusted(c.Word)) continue; 784 if (c.Distance == 1 || 785 (c.Distance == 2 && c.Word.Length > 0 && lower.Length > 0 && c.Word[0] == lower[0] && 786 SpellScore.EditPlausibility(lower, c.Word) >= 2000)) 787 { fix = c; break; } 788 } 789 if (fix == null) return word; 790 candidates = new List<SymSpell.Candidate> { fix }; 791 } 792 else candidates = compactRu.Lookup(lower, 8); 793 794 if (isCorrect || candidates.Count == 0) return word; 795 796 string prev = LastWord(leftContext); 797 if (bigramLM != null && bigramLM.IsReady && !string.IsNullOrEmpty(prev) && candidates.Count > 1) 798 { 799 foreach (var c in candidates) c.ContextScore += bigramLM.Score(prev, c.Word) * 4; 800 candidates.Sort(delegate(SymSpell.Candidate a, SymSpell.Candidate b) 801 { return b.ContextScore.CompareTo(a.ContextScore); }); 802 } 803 804 if (Settings.ContextNnRescore && !string.IsNullOrEmpty(leftContext) && candidates.Count > 1 && RescoreClient.IsAvailable) 805 { 806 try 807 { 808 int take = Math.Min(6, candidates.Count); 809 var words = new string[take]; 810 for (int i = 0; i < take; i++) words[i] = candidates[i].Word; 811 double[] rs = RescoreClient.Rescore(leftContext, "", words); 812 if (rs != null && rs.Length == take) 813 { 814 for (int i = 0; i < take; i++) candidates[i].ContextScore += (int)Math.Round(rs[i] * 1500); 815 candidates.Sort(delegate(SymSpell.Candidate a, SymSpell.Candidate b) 816 { return b.ContextScore.CompareTo(a.ContextScore); }); 817 } 818 } 819 catch { } 820 } 821 822 var best = candidates[0]; 823 824 // Guard: don't replace if the candidate is just a truncation of the input. 825 if (best.Distance == 1 && lower.Length > best.Word.Length && 826 lower.StartsWith(best.Word.Substring(0, Math.Min(best.Word.Length, lower.Length - 1)))) 827 { 828 bool isDuplicate = lower.Length == best.Word.Length + 1; 829 if (!isDuplicate) return word; 830 } 831 832 // Precision-first gate (same as live typing): correct only INTO a trusted/frequent word. 833 bool bestTrusted = compactRu.ContainsTrusted(best.Word); 834 bool bestFrequent = best.FreqIdx >= 0 && best.FreqIdx < 30000; 835 bool shouldReplace = false; 836 if (best.Distance == 1) shouldReplace = bestTrusted || bestFrequent; 837 else if (best.Distance == 2 && lower.Length >= 4) 838 shouldReplace = (bestTrusted || bestFrequent) && best.Word[0] == lower[0]; 839 840 return shouldReplace ? SmartCase(word, best.Word) : word; 841 } 842 catch { return word; } 843 } 844 845 // Common errors that frequency lists treat as valid — force correct 846 private static readonly Dictionary<string, string> forcedFix = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase) { 847 {"вобще","вообще"},{"вобщем","в общем"},{"кароче","короче"},{"карочи","короче"}, 848 {"миня","меня"},{"тибя","тебя"},{"сибя","себя"},{"мене","меня"}, 849 {"шол","шёл"},{"пришол","пришёл"},{"ушол","ушёл"},{"нашол","нашёл"},{"пошол","пошёл"}, 850 {"щас","сейчас"},{"чё","что"},{"чо","что"},{"шо","что"}, 851 {"сдесь","здесь"},{"зделал","сделал"},{"зделать","сделать"}, 852 {"придти","прийти"},{"прити","прийти"}, 853 {"ихний","их"},{"евоный","его"}, 854 {"агенство","агентство"},{"учавствовать","участвовать"}, 855 {"будующий","будущий"},{"следущий","следующий"}, 856 {"симпотичный","симпатичный"},{"координально","кардинально"}, 857 {"извените","извините"}, 858 {"оффициальный","официальный"},{"оффис","офис"}, 859 {"коментарий","комментарий"}, 860 {"граммотный","грамотный"},{"граммотность","грамотность"}, 861 {"видио","видео"},{"компьютор","компьютер"},{"компутер","компьютер"}, 862 {"интирнет","интернет"}, 863 {"програма","программа"},{"каллега","коллега"},{"колега","коллега"}, 864 {"росия","россия"},{"помошник","помощник"}, 865 {"расчитать","рассчитать"},{"поциент","пациент"}, 866 {"канешна","конечно"},{"канешно","конечно"}, 867 // Merged words (people write with space) 868 // "поэтому" handled in merge logic 869 {"нужнали","нужна ли"}, 870 // Split words people merge 871 {"деминистратора","администратора"},{"деминистратор","администратор"}, 872 // администратора → already correct 873 // "ив тоге" type: will be handled by merge + correction 874 {"тоге","итоге"},{"втоге","в итоге"}, 875 // Common verb/ending errors 876 {"шариш","шаришь"},{"знаеш","знаешь"},{"хочеш","хочешь"},{"можеш","можешь"}, 877 {"делаеш","делаешь"},{"думаеш","думаешь"},{"понимаеш","понимаешь"}, 878 {"говориш","говоришь"},{"видиш","видишь"},{"слышиш","слышишь"}, 879 {"пишеш","пишешь"},{"читаеш","читаешь"},{"идёш","идёшь"},{"идеш","идёшь"}, 880 // More common misspellings 881 {"праграмму","программу"},{"праграмма","программа"},{"праграммы","программы"}, 882 {"здали","сдали"},{"здать","сдать"},{"здал","сдал"}, 883 {"тихнология","технология"},{"тихналогия","технология"}, 884 {"рассматрю","рассмотрю"}, 885 {"серьозно","серьёзно"},{"серьозный","серьёзный"}, 886 {"щитать","считать"},{"щитаю","считаю"}, 887 // Phonetic errors (как слышится) 888 {"здрасти","здравствуйте"},{"здрасте","здравствуйте"},{"здрасьте","здравствуйте"}, 889 {"ришил","решил"},{"ришила","решила"},{"ришать","решать"}, 890 {"сибе","себе"}, 891 {"ево","его"},{"ей","её"}, 892 {"званить","звонить"},{"званю","звоню"},{"званок","звонок"}, 893 {"нисет","несёт"},{"нисти","нести"},{"нису","несу"}, 894 {"чюш","чушь"},{"чюшь","чушь"}, 895 {"канцов","концов"},{"вканце","в конце"},{"вконце","в конце"}, 896 {"канцы","концы"}, 897 {"какайта","какая-то"},{"какойта","какой-то"},{"чтота","что-то"}, 898 {"превез","привёз"},{"превезли","привезли"},{"превозить","привозить"}, 899 {"прекиньте","прикиньте"},{"прекинь","прикинь"}, 900 {"одрису","адресу"},{"одрис","адрес"},{"одриса","адреса"}, 901 {"падругому","по другому"},// без дефиса — чаще предлог+прилаг 902 {"средстф","средств"},{"средстр","средств"}, 903 {"капец","капец"},// slang, keep 904 {"типерь","теперь"}, 905 {"напесал","написал"},{"напесать","написать"}, 906 {"проста","просто"}, 907 {"выносима","выносимо"},{"невыносима","невыносимо"}, 908 // More phonetic 909 {"палучить","получить"},{"палучил","получил"}, 910 {"памогите","помогите"},{"памочь","помочь"}, 911 {"абьяснить","объяснить"},{"абьясни","объясни"}, 912 {"расматривать","рассматривать"}, 913 {"наверна","наверное"},{"наверно","наверное"}, 914 {"харашо","хорошо"},{"хараша","хороша"}, 915 {"пажалуста","пожалуйста"},{"пожалуста","пожалуйста"}, 916 {"новы","новый"},{"стары","старый"}, 917 {"фподдержку","в поддержку"}, 918 {"штоб","чтоб"},{"штобы","чтобы"},{"што","что"}, 919 // Modern + compound 920 {"маркетплейс","маркетплейс"},{"маркетплейсе","маркетплейсе"}, 921 {"неросеть","нейросеть"},{"неросети","нейросети"},{"неросетью","нейросетью"}, 922 {"капслоком","капслоком"},{"капслок","капслок"}, 923 {"разгаваривает","разговаривает"},{"гаваривает","говорит"}, 924 {"раззлился","разозлился"}, 925 {"какойто","какой-то"},{"какоето","какое-то"},{"какието","какие-то"}, 926 {"чтото","что-то"},{"гдето","где-то"},{"кудато","куда-то"}, 927 {"ктото","кто-то"},{"когдато","когда-то"}, 928 {"полною","полную"}, 929 // Specific dist=1 ambiguity fixes (correct word should win) 930 {"плокат","плакат"},{"таго","того"},{"тагор","тагор"},// keep тагор if someone actually writes it 931 {"зделке","сделке"},{"зделку","сделку"},{"зделка","сделка"}, 932 {"расчитывал","рассчитывал"},{"расчитывать","рассчитывать"}, 933 {"агенства","агентства"}, 934 {"пожаловаца","пожаловаться"},{"пожаловалься","пожаловался"}, 935 {"завышеной","завышенной"},{"завышеная","завышенная"},{"завышеное","завышенное"}, 936 {"искуственый","искусственный"},{"искуственая","искусственная"},{"искуственое","искусственное"}, 937 {"некомпентно","некомпетентно"},{"компентно","компетентно"}, 938 {"этоге","итоге"}, 939 {"рукаводитель","руководитель"},{"рукаводител","руководител"}, 940 {"предлажил","предложил"},{"предлажить","предложить"}, 941 {"заключять","заключать"}, 942 {"металический","металлический"},{"металическая","металлическая"}, 943 {"алюминевый","алюминиевый"}, 944 {"професиональный","профессиональный"},{"професиональная","профессиональная"}, 945 {"преодалевать","преодолевать"}, 946 {"почуствовал","почувствовал"}, 947 {"нечяено","нечаянно"},{"нечаяно","нечаянно"}, 948 {"решыл","решил"}, 949 {"рассыпаный","рассыпанный"}, 950 {"кажеца","кажется"},{"каженся","кажется"}, 951 {"краце","кратце"},{"вкраце","вкратце"}, 952 {"раскажу","расскажу"},{"раскажи","расскажи"}, 953 {"машыну","машину"},{"машына","машина"}, 954 {"дратути","здравствуйте"}, 955 {"пылисосить","пылесосить"},{"пылисос","пылесос"}, 956 {"задорага","задорого"},{"задорого","задорого"}, 957 {"хорошева","хорошего"}, 958 {"каторый","который"},{"каторая","которая"},{"каторое","которое"}, 959 {"можэт","может"}, 960 {"чесло","число"}, 961 {"прешлось","пришлось"}, 962 // Leeds Corpus errors in top-87k (trusted but WRONG) 963 {"правельно","правильно"},{"правельный","правильный"}, 964 {"многа","много"},{"многии","многие"}, 965 {"вабще","вообще"}, 966 {"запетые","запятые"},{"запетая","запятая"}, 967 {"сиводня","сегодня"},{"севодня","сегодня"}, 968 {"превет","привет"},{"превед","привет"}, 969 {"забал","забыл"},{"забала","забыла"}, 970 {"када","когда"},{"кагда","когда"}, 971 {"типя","тебя"}, 972 {"асоббенно","особенно"},{"асобено","особенно"},{"особено","особенно"}, 973 {"билку","белку"},{"билка","белка"}, 974 {"трапинке","тропинке"},{"трапинка","тропинка"}, 975 {"бальшую","большую"},{"бальшой","большой"}, 976 {"сонца","солнце"},{"сонце","солнце"}, 977 {"месице","месяце"},{"месиц","месяц"}, 978 {"жызнь","жизнь"}, 979 {"пайти","пойти"}, 980 {"пагулять","погулять"}, 981 {"падумал","подумал"},{"падумала","подумала"}, 982 {"унедал","увидел"}, 983 {"павидать","повидать"}, 984 {"песать","писать"}, 985 {"главноя","главное"}, 986 {"понемали","понимали"},{"понемать","понимать"}, 987 {"можна","можно"}, 988 {"ничево","ничего"},{"чево","чего"}, 989 {"очинь","очень"}, 990 // Heavy scramble fixes (crash-test level errors) 991 {"ршил","решил"},{"ршила","решила"}, 992 {"напсать","написать"},{"напсал","написал"}, 993 {"тескт","текст"},{"тескта","текста"}, 994 {"птому","потому"}, 995 {"накпело","накипело"}, 996 {"хчоется","хочется"}, 997 {"выгворится","выговориться"}, 998 {"ичсетно","честно"},{"чесно","честно"},{"чесна","честно"}, 999 {"джае","даже"}, 1000 {"нчать","начать"}, 1001 {"чеог","чего"}, 1002 {"птуются","путаются"}, 1003 {"бсытро","быстро"}, 1004 {"пчеатаю","печатаю"},{"пчеатать","печатать"}, 1005 {"клвиатуре","клавиатуре"},{"клвиатура","клавиатура"}, 1006 {"совршенно","совершенно"}, 1007 {"вбщем","в общем"}, 1008 {"длео","дело"}, 1009 {"блыо","было"}, 1010 {"сегондя","сегодня"}, 1011 {"опросто","просто"},// "опрос то" → after merge = "опросто" 1012 {"првиет","привет"}, 1013 {"есл","если"}, 1014 // Ultra-scramble fixes 1015 {"мсыли","мысли"},{"очнеь","очень"}, 1016 {"прсонулся","проснулся"},{"утнром","утром"}, 1017 {"гдет","где-то"},{"ччасов","часов"}, 1018 {"хзотя","хотя"},{"бдуильник","будильник"}, 1019 {"озабыл","забыл"}, 1020 {"вклбючть","включить"},{"веечра","вечера"}, 1021 {"дмаю","думаю"},{"выхдной","выходной"}, 1022 {"всетки","всё-таки"},{"всётаки","всё-таки"}, 1023 {"аэкран","экран"}, 1024 {"пуглив","погуглил"}, 1025 {"нцу","ну"}, 1026 {"чот","что"}, 1027 {"есмптря","несмотря"},{"несмптря","несмотря"}, 1028 {"кееру","вечеру"}, 1029 // Context-blind fixes from crash test 1030 1031 {"бюст","боюсь"},// "бюст" is real but in context "но я бюст" = "боюсь" 1032 {"неработате","не работает"}, 1033 // More from crash tests 1034 {"прбелом","пробелом"},{"прсто","просто"}, 1035 {"никму","никому"}, 1036 {"интренета","интернета"},{"интренет","интернет"}, 1037 {"извните","извините"}, 1038 {"одочитал","дочитал"}, 1039 {"исптать","спать"},{"исптаь","спать"}, 1040 {"аблин","а блин"}, 1041 {"кингоу","книгу"}, 1042 // More scramble fixes 1043 {"пшоел","пошёл"},{"пшёл","пошёл"}, 1044 {"реишл","решил"}, 1045 {"кхуню","кухню"},{"кхуня","кухня"}, 1046 {"коффе","кофе"},{"кофей","кофе"}, 1047 {"чйаник","чайник"}, 1048 {"сджелать","сделать"}, 1049 {"вклюаю","включаю"},{"вклюачть","включить"}, 1050 {"кншн","конечно"},{"кнчн","конечно"}, 1051 {"незнаю","не знаю"}, 1052 {"обычн","обычно"},{"можн","можно"}, 1053 // Space-stuck words 1054 {"чтопрос","что прос"},{"чтопросто","что просто"}, 1055 {"тк","так"},{"кк","как"},{"чт","что"}, 1056 {"наэкран","на экран"}, 1057 {"неработает","не работает"}, 1058 // === MASSIVE PATCH: all crash-test errors === 1059 // Semantic inversions 1060 {"недстатчно","недостаточно"},{"нкторые","некоторые"}, 1061 // Short fragments 1062 {"жн","же"},{"нте","нет"},{"тма","там"},{"сбе","себе"},{"мю","мою"}, 1063 {"ои","и"},{"кка","как"},{"ещ","ещё"},{"всь","весь"},{"пка","пока"}, 1064 {"миом","мимо"},{"кароч","короче"},{"этго","этого"},{"сжу","сижу"}, 1065 {"ткого","такого"},{"лндно","ладно"},{"хршоо","хорошо"},{"уачи","удачи"}, 1066 // Verbs scrambled 1067 {"нчнаю","начинаю"},{"зпукаю","запускаю"},{"выает","выдаёт"}, 1068 {"ипрсравлять","исправлять"},{"апридтеся","придётся"}, 1069 {"опрмахиваюсь","промахиваюсь"},{"плжоить","положить"}, 1070 {"лкоадут","кладут"},{"естть","есть"}, 1071 // Nouns scrambled 1072 {"бэкспес","бэкспейс"},{"бэкспсе","бэкспейс"},{"ппперони","пепперони"}, 1073 1074 {"нжунхй","нужных"}, 1075 {"мтедом","методом"},{"кмментах","комментах"},{"бкувми","буквами"}, 1076 {"прблеомй","проблемой"}, 1077 // Space-stuck combos (will be handled by forcedFix on merged form) 1078 {"егодергать","его дергать"},{"подостлом","под столом"}, 1079 {"номне","но мне"},{"номнелнеь","но мне лень"}, 1080 {"деньгив","деньги в"},{"неочнеь","не очень"}, 1081 {"назад","назад"},// keep 1082 {"мнескзали","мне сказали"},{"ппаламдежу","попала между"}, 1083 {"топоднять","то поднять"},{"насйат","на сайт"}, 1084 {"кторю","которую"}, 1085 {"прдется","придётся"},{"чбты","чтобы"}, 1086 1087 // Word form fixes 1088 {"стяол","стоял"},{"подольеш","подольше"}, 1089 {"дзвониться","дозвониться"}, 1090 {"вшкое","в шоке"},{"вшкод","в шоке"}, 1091 1092 {"отратный","отвратный"},{"отратительный","отвратительный"}, 1093 // Remaining crash-test fixes 1094 {"рза","раз"},{"мнетот","мне тут"}, 1095 1096 {"накатер","на карте"}, 1097 {"ска","ска"},// keep if standalone (band genre) 1098 {"риза","раз"},// freq fix: "риза" is rare, "раз" is common 1099 1100 // Multi-word fragments from forcedFix 1101 {"номнелень","но мне лень"}, 1102 {"аятуд","я туда"},{"даи","да и"}, 1103 {"новод","но вода"},{"новода","но вода"}, 1104 {"мнетут","мне тут"}, 1105 // keep 1106 // Glued fragments that need fixing after merge 1107 {"мнерзали","мне сказали"},{"мнепишев","мне пишет"},{"мнепишет","мне пишет"}, 1108 {"неплжеаю","не пожелаю"},{"немок","не мог"},{"немог","не мог"}, 1109 {"накатре","на карте"},{"накаре","на карте"}, 1110 1111 {"запдписку","за подписку"}, 1112 {"илиспытатьлгу","или спать лягу"}, 1113 {"хлдилньеки","холодильнике"}, 1114 {"квлаиаруы","клавиатуры"}, 1115 {"дестяипльцевым","десятипальцевым"}, 1116 // Persistent bugs — hall of fame 1117 1118 {"вкс","в кс"}, 1119 {"долог","долго"}, 1120 // Glued oskolki from SpaceFix 1121 {"номанелень","но мне лень"}, 1122 {"этдокница","это до конца"},{"этнокница","это до конца"}, 1123 {"дваятуд","два я туда"}, 1124 // "ил испытать лгу" → need compound fix 1125 {"испытатьлгу","спать лягу"}, 1126 {"испытать","испытать"},// keep real word 1127 // === FINAL BOSS: oskolki + armored words === 1128 1129 1130 {"немогу","не могу"}, 1131 {"нехватает","не хватает"}, 1132 {"нехочу","не хочу"}, 1133 {"непонимаю","не понимаю"}, 1134 1135 {"мненеочень","мне не очень"}, 1136 {"апрллись","пролилось"}, 1137 {"ппеперони","пепперони"},{"пеперони","пепперони"}, 1138 1139 {"каре","карте"}, 1140 {"дост","доту"}, 1141 {"лгу","лягу"}, 1142 {"клваиатуры","клавиатуры"}, 1143 {"хлдильнике","холодильнике"}, 1144 // Final boss remaining errors 1145 // already correct — for SpaceFix "ошибк"+"у" 1146 {"пиала","попала"},{"пиаламежу","попала между"}, 1147 {"межу","между"}, 1148 1149 {"еработате","работает"},{"нееработате","не работает"}, 1150 1151 {"иге","итоге"}, 1152 1153 {"нелли","недели"}, 1154 1155 1156 1157 1158 {"пжалста","пожалуйста"}, 1159 1160 {"нчаал","начал"}, 1161 // CSW picks wrong candidate — force correct 1162 {"катре","карте"},{"пгуглил","погуглил"},{"недли","недели"}, 1163 {"квеечру","к вечеру"}, 1164 // Final remaining errors 1165 {"забл","забыл"}, 1166 {"бось","боюсь"}, 1167 {"вдост","в доту"},{"вдоут","в доту"}, 1168 {"апрлилсь","пролилось"}, 1169 {"прото","просто"},{"псрото","просто"}, 1170 {"пжлста","пожалуйста"}, 1171 {"нпаишите","напишите"}, 1172 {"стлакивлся","сталкивался"},{"стлкаивля","сталкивался"}, 1173 {"нстроен","настроен"},{"нестроен","настроен"}, 1174 {"длоог","долго"}, 1175 {"звню","звоню"},{"звеню","звоню"}, 1176 {"рзозлился","разозлился"}, 1177 1178 {"рбаотате","работает"},{"работае","работает"}, 1179 {"анстроение","настроение"}, 1180 {"кшамар","кошмар"}, 1181 {"зраплату","зарплату"}, 1182 {"нжаимать","нажимать"},{"нжамите","нажмите"}, 1183 {"пперони","пепперони"}, 1184 {"оплтить","оплатить"}, 1185 {"срендств","средств"}, 1186 {"ктторые","которые"}, 1187 {"кномпка","кнопка"}, 1188 {"зказать","заказать"}, 1189 {"кмопбютер","компьютер"},{"компютер","компьютер"}, 1190 {"оказлось","оказалось"},{"окзвется","оказывается"}, 1191 {"мхеаничсекую","механическую"}, 1192 {"подклбчения","подключения"}, 1193 {"интеренту","интернету"}, 1194 {"првайдеру","провайдеру"}, 1195 {"отвеачет","отвечает"}, 1196 {"слчуайно","случайно"}, 1197 {"тлефона","телефона"}, 1198 {"залипюат","залипают"}, 1199 {"поэотму","поэтому"}, 1200 {"ткесте","тексте"}, 1201 {"слмоается","сломается"}, 1202 {"чретям","чертям"}, 1203 {"прдсушить","просушить"}, 1204 {"условяих","условиях"}, 1205 {"двйойными","двойными"}, 1206 {"кннца","конца"},{"кница","конца"}, 1207 {"ггерои","герои"}, 1208 {"пстоянн","постоянно"}, 1209 {"слпуею","слепую"}, 1210 1211 // Glued fragments needing correction 1212 {"джаен","даже не"},{"джаене","даже не"}, 1213 1214 {"можнои","можно и"}, 1215 {"гдетов","где-то в"}, 1216 {"онн","он не"}, 1217 // "опрос то" → handled by PostMerge: "опрос"+"то" → "опросто" → forcedFix → "просто" 1218 // Gold bugs — persistent errors 1219 {"буями","буквами"},{"буям","буквам"}, 1220 {"питаю","почитаю"},// "книгу питаю" = "почитаю" 1221 {"постирать","исправлять"},// T9 hallucination fix 1222 {"дзовнился","дозвонился"}, 1223 {"онибдуь","кто-нибудь"}, 1224 {"ндо","надо"}, 1225 {"нрмлаьной","нормальной"}, 1226 // === MASSIVE FINAL PATCH (134 words) === 1227 {"авари","авария"},{"авобще","вообще"},{"агнлийский","английский"}, 1228 {"аошибик","ошибки"},{"аполчаса","полчаса"},{"бдует","будет"}, 1229 {"бкув","букв"},{"бкува","буква"},{"блн","блин"},{"брде","бред"}, 1230 {"бюсь","боюсь"},{"вбще","вообще"},{"встякое","всякое"}, 1231 {"вытрать","вытирать"},{"вытщил","вытащил"},{"выьрал","выбрал"}, 1232 {"гвоорят","говорят"},{"говрит","говорит"},{"грызт","грызёт"}, 1233 {"дйа","дай"},{"дмашних","домашних"},{"днег","денег"}, 1234 {"днеь","день"},{"днеьг","денег"},{"днеьги","деньги"}, 1235 {"доут","доту"},{"еболйете","болейте"}, 1236 {"езнаю","знаю"},{"епжелаю","пожелаю"},{"епротяну","протяну"}, 1237 {"ждтаь","ждать"},{"жутк","жутко"}, 1238 {"здаел","задел"},{"знвает","знает"},{"знгачит","значит"}, 1239 {"зхаожу","захожу"},{"зшаел","зашёл"},{"зыбл","забыл"}, 1240 {"иоге","итоге"}, 1241 {"кгда","когда"},{"кжадый","каждый"}, 1242 {"клаиватуру","клавиатуру"},{"клвавишами","клавишами"}, 1243 {"клваиатруы","клавиатуры"},{"клваиатуур","клавиатуру"}, 1244 {"кнпоки","кнопки"},{"кржку","кружку"},{"крнаом","краном"}, 1245 {"ктто","кто-то"},{"кфое","кофе"}, 1246 {"лбюимую","любимую"},{"лжеат","лежат"},{"лнеь","лень"}, 1247 {"мдежу","между"},{"мжет","может"},{"мкароны","макароны"}, 1248 {"мнго","много"},{"мсяц","месяц"}, 1249 {"нверное","наверное"},{"ндяже","даже"}, 1250 {"нжаимю","нажимаю"},{"нжуен","нужен"}, 1251 {"нормлаьно","нормально"},{"нтих","них"},{"нчегео","нечего"}, 1252 {"оишбок","ошибок"},{"омн","мне"}, 1253 {"онлйан","онлайн"},{"опечтать","печатать"},{"оптяь","опять"}, 1254 {"особнно","особенно"},{"отмнить","отменить"}, 1255 {"паьлцы","пальцы"},{"пбежал","побежал"},{"пграю","поиграю"}, 1256 {"пдо","подо"},{"пйду","пойду"},{"пкушать","покушать"}, 1257 {"пмыть","помыть"},{"ппала","попала"}, 1258 {"прбел","пробел"},{"прключить","переключить"}, 1259 {"прсото","просто"},{"пршлой","прошлой"}, 1260 {"псоле","после"},{"псопать","поспать"}, 1261 {"птом","потом"},{"птыаюсь","пытаюсь"}, 1262 {"пццу","пиццу"},{"пчистить","почистить"},{"пчитаю","почитаю"}, 1263 {"пшиу","пишу"},{"пщеатаю","печатаю"}, 1264 {"рбот","робот"},{"рзетки","розетки"},{"рзобрать","разобрать"}, 1265 {"рси","рис"},{"сбда","сюда"},{"сбее","себе"}, 1266 {"сбребанк","сбербанк"},{"своет","совет"}, 1267 {"сйечас","сейчас"},{"списаил","списали"}, 1268 {"срыом","сыром"},{"ссисками","сосисками"},{"стлом","столом"}, 1269 {"тгда","тогда"},{"тда","туда"},{"теепрь","теперь"}, 1270 {"тже","тоже"},{"ткой","такой"},{"тлефоны","телефоны"}, 1271 {"тоьлко","только"},{"тпеерь","теперь"}, 1272 {"трбуу","трубу"},{"трпякой","тряпкой"}, 1273 {"фнеом","феном"}, 1274 {"холдильнеки","холодильнике"}, 1275 {"чеерз","через"},{"чтбы","чтобы"}, 1276 {"шкое","шоке"},{"этто","это"},{"яызк","язык"}, 1277 // Remaining space-oskolki 1278 {"ошибку","ошибку"},// correct word for SpaceFix merge 1279 {"ятуд","я туда"}, 1280 1281 {"денегив","денег в"}, 1282 {"зошибки","за ошибки"}, 1283 {"инеболейте","и не болейте"}, 1284 // Final remaining 15 1285 {"себ","себе"},{"мня","меня"},{"пок","пока"},{"эт","это"}, 1286 {"тож","тоже"},{"линни","линии"},{"яна","на"}, 1287 1288 {"илиспытать","или спать"}, 1289 // Context + regression fixes 1290 {"сдал","сделал"},// "сдал я себе кофе" → "сделал" 1291 {"иктто","кто-то"},{"иктт","кто-то"}, 1292 {"инболейте","и не болейте"}, 1293 {"номнеелень","но мне лень"}, 1294 {"япост","я просто"}, 1295 // Remaining final bosses 1296 {"атоя","а то я"}, 1297 {"нуедобн","неудобно"},{"уедобн","удобно"}, 1298 {"нуедобно","неудобно"}, 1299 // "н" particle fixes (н = "не" fragment) 1300 {"нмне","но мне"},{"номн","но мне"}, 1301 {"новиидмо","но очевидно"},{"новиедмо","но очевидно"},{"ноочевидно","но очевидно"}, 1302 {"нпротяну","не протяну"},{"нпожелаю","не пожелаю"}, 1303 {"нболейте","не болейте"},{"неболйете","не болейте"}, 1304 {"иливрис","или в рис"},{"иливрси","или в рис"}, 1305 // Wrong merges that produce existing but wrong words 1306 {"ноя","но я"},// "н"+"оя"→"ноя"(November) → "но я" 1307 {"елень","лень"},// fragment "е"+"лень" → just "лень" 1308 {"ент","нет"},// trusted in dict but wrong — should be "нет" 1309 {"онудобно","неудобно"},{"нудобно","неудобно"}, 1310 {"атоятк","а то я так"},{"псот","просто"}, 1311 {"злипабщим","слипающим"}, 1312 {"овиидмо","очевидно"}, 1313 // dist>2 common errors that CompactSpell can't find 1314 {"привецтвую","приветствую"}, 1315 {"компуктер","компьютер"},{"компуктером","компьютером"},{"компуктера","компьютера"}, 1316 {"тихналогиях","технологиях"},{"тихналогии","технологии"}, 1317 {"тихнологиях","технологиях"},{"тихнологии","технологии"}, 1318 {"рендаренг","рендеринг"},{"рендаренга","рендеринга"}, 1319 {"патамушта","потому что"},{"патамучта","потому что"}, 1320 {"марочился","морочился"},{"марочиться","морочиться"}, 1321 {"стовить","ставить"}, 1322 {"понел","понял"},{"понела","поняла"},{"понели","поняли"}, 1323 {"роликав","роликов"}, 1324 {"жизь","жизнь"}, 1325 {"приветную","приветствую"}, 1326 // === Batch 2: new text errors === 1327 // Slenг/OOV 1328 {"здаова","здорова"},{"здаров","здоров"},{"здаова!","здорова!"}, 1329 // Missing letters (dist>2) 1330 {"файов","файлов"},{"файлоов","файлов"}, 1331 {"внзепано","внезапно"},{"внезпано","внезапно"}, 1332 {"перзагружать","перезагружать"},{"перезагржать","перезагружать"}, 1333 {"прзнтации","презентации"},{"прзнатции","презентации"},{"прзентации","презентации"}, 1334 {"черапшья","черепашья"},{"черепшья","черепашья"}, 1335 {"отвественый","ответственный"},{"отвественный","ответственный"}, 1336 {"оскрость","скорость"},{"оскорость","скорость"}, 1337 {"финалчока","финалочка"}, 1338 // Wrong candidate picked (forcedFix overrides CompactSpell) 1339 {"намеров","намертво"}, 1340 {"кнпку","кнопку"},{"кнопу","кнопку"}, 1341 {"парвки","правки"},{"парвок","правок"}, 1342 {"внсе","внёс"}, 1343 {"послений","последний"},{"послендий","последний"}, 1344 {"посотерть","посмотреть"},{"посомтреть","посмотреть"},{"посмтреть","посмотреть"}, 1345 {"наванием","названием"},{"навзанием","названием"}, 1346 {"хватате","хватает"},{"хватет","хватает"}, 1347 {"шриты","шрифты"},{"шрфиты","шрифты"},{"шрифтыы","шрифты"}, 1348 {"доашнй","домашний"},{"домашнй","домашний"},{"домашийн","домашний"}, 1349 {"вйафай","вайфай"}, 1350 {"пдводит","подводит"}, 1351 {"свяи","связи"},{"свзяи","связи"}, 1352 {"ждц","жду"},{"ждуу","жду"}, 1353 {"рндера","рендера"},{"рнедера","рендера"}, 1354 {"варинат","вариант"},{"варинт","вариант"}, 1355 // Space-stuck words 1356 {"тамв","там в"},{"тамвсе","там все"}, 1357 {"засту","за эту"},{"заэту","за эту"}, 1358 {"ато","а то"}, 1359 {"вовремя","во время"},{"вовремья","во время"}, 1360 {"вов","во"},{"ремя","время"}, 1361 {"чегот","чего-то"},{"чегото","чего-то"}, 1362 // Morphology fixes 1363 {"папек","папке"},{"папки","папке"}, 1364 {"должне","должен"}, 1365 {"немонг","немного"}, 1366 {"меян","меня"}, 1367 {"уверн","уверен"}, 1368 {"пру","пару"}, 1369 // Wrong candidate picked by scoring — forcedFix overrides 1370 {"мжешь","можешь"},{"мжеш","можешь"}, 1371 {"пшии","пиши"}, 1372 {"телгу","телегу"},{"тлегу","телегу"}, 1373 {"телфона","телефона"}, 1374 {"рздать","раздать"},{"рзадть","раздать"}, 1375 {"отавливается","отваливается"},{"оталивается","отваливается"}, 1376 {"чег","чего"},// "чег от" → "чего от" → PostMerge → "чего-то" 1377 {"ужасн","ужасно"}, 1378 // "н о..." patterns: "н" before "о"+word = shifted space from "но" 1379 {"нотеперь","но теперь"},{"нотепрь","но теперь"}, 1380 {"носкрость","но скорость"},{"носкорость","но скорость"}, 1381 {"отеперь","теперь"},// "отеперь" = "о"+"теперь" fragment 1382 // Short words with wrong candidate 1383 {"саый","самый"},{"самй","самый"}, 1384 // Slipped dashes 1385 {"изза","из-за"},{"иза","из-за"}, 1386 // === Batch 3: large text errors === 1387 {"неочень","не очень"}, 1388 {"какойт","какой-то"}, 1389 {"см","сам"},// "см" (сантиметр) in dict but means "сам" in conversational context 1390 // === Batch 3: new large text fixes === 1391 {"приет","привет"},{"приет!","привет!"}, 1392 {"пеоезде","поезде"},{"поезед","поезде"}, 1393 {"пвтаюьсь","пытаюсь"},{"пвтаюсь","пытаюсь"},{"пвтаться","пытаться"}, 1394 {"понимю","понимаю"},{"понмиаете","понимаете"}, 1395 {"поднмался","поднимался"},{"подымался","поднимался"}, 1396 {"выдвала","выдавала"},{"выдвалаа","выдавала"}, 1397 {"миут","минут"}, 1398 {"сказла","сказал"},{"скзал","сказал"},{"скзала","сказала"}, 1399 {"смстоятельно","самостоятельно"},{"самстоятельно","самостоятельно"}, 1400 {"хлдно","холодно"},{"хлодно","холодно"}, 1401 {"отсался","остался"},{"отсалась","осталась"}, 1402 {"скрсоть","скорость"},{"скросоть","скорость"}, 1403 {"начл","начал"},{"нчал","начал"}, 1404 {"начла","начала"},{"нчала","начала"}, 1405 {"глова","голова"},{"голва","голова"}, 1406 {"конфигу","конфиг"}, 1407 {"командривока","командировка"},{"командриовка","командировка"},{"камандировка","командировка"}, 1408 {"ноуте","ноуте"},// keep — slang form of "ноутбуке" 1409 {"бкаап","бэкап"},{"бкаеп","бэкап"}, 1410 {"холдоный","холодный"},{"хлодный","холодный"}, 1411 {"зватрак","завтрак"},{"заврак","завтрак"}, 1412 {"кааш","каша"}, 1413 {"тчоже","тоже"}, 1414 {"вынсоить","выносить"}, 1415 // "н очивидно" → need "но очевидно" not "не очевидно" 1416 {"ночивидно","но очевидно"},{"ноочивидно","но очевидно"}, 1417 // "маякните" is slang — protect 1418 {"маякните","маякните"}, 1419 // "патовая" is real word (chess) — protect 1420 {"патовая","патовая"}, 1421 // === Batch 4: text 3 errors === 1422 {"полнйо","полной"}, 1423 {"поому","потому"},{"пому","потому"}, 1424 {"нашвстреча","наша встреча"}, 1425 {"раньшн","раньше"}, 1426 {"слущаються","слушаются"}, 1427 {"кажеться","кажется"}, 1428 {"интерном","интернов"}, 1429 {"полвину","половину"}, 1430 {"востановить","восстановить"},{"восстновить","восстановить"}, 1431 {"грееться","греется"},{"греяться","греется"}, 1432 {"экселе","экселе"},// keep — slang for Excel 1433 {"докучен","документ"},{"дкоумент","документ"}, 1434 {"меч","меня"},// "у меч" = "у меня" 1435 {"присться","приходится"},{"прийдтся","придётся"}, 1436 {"хаз","хз"},// slang keep 1437 {"незабудке","не забудьте"},{"незабдуьте","не забудьте"}, 1438 {"созван","созвон"},{"сзовон","созвон"}, 1439 {"компаратив","корпоратив"},{"корпоратви","корпоратив"}, 1440 {"нужн","нужно"}, 1441 // Wrong merges from SpaceFix/PostMerge 1442 {"ня","но я"},// "н"+"я" → "ня" (in dict as slang) but means "но я" 1443 {"илия","или я"},// "или"+"я" → "илия" (name in dict) but means "или я" 1444 {"понеж","по ней"},// "по"+"ней" → "понеж" (archaic) but means "по ней" 1445 {"подступ","под стук"},// "под"+"стук" → "подступ" but means "под стук" 1446 // Wrong candidate picked by scoring 1447 {"силно","сильно"},{"слино","сильно"}, 1448 {"моент","момент"},{"моемнт","момент"}, 1449 // Capital "Н" at sentence start → "Но" 1450 {"нтеперь","но теперь"}, 1451 // Hyphenated word fixes 1452 {"кайо-то","какой-то"},{"кайото","какой-то"}, 1453 // Trusted words with common misspellings 1454 {"сумашедший","сумасшедший"},{"сумашедшая","сумасшедшая"}, 1455 {"обьект","объект"},{"обьекте","объекте"},{"обьекта","объекта"}, 1456 {"гланый","главный"},{"главый","главный"}, 1457 {"понмаю","понимаю"}, 1458 // === Hotfix 3.0: IT airport text === 1459 // Wrong candidates 1460 {"серв","сервер"},// not "серб" 1461 {"впн","впн"},// self-map: protect from "вон" 1462 {"дожен","должен"},{"дложен","должен"}, 1463 {"помошь","помощь"},// trusted but misspelled 1464 {"процентоа","процентов"},{"процетнов","процентов"}, 1465 {"осталсь","осталась"},{"остлась","осталась"}, 1466 {"даных","данных"},{"даные","данные"}, 1467 {"спрвитесь","справитесь"},{"справтесь","справитесь"}, 1468 {"можт","может"}, 1469 {"прост","просто"},// "прост" is trusted but almost always means "просто" in conversation 1470 // Space-stuck patterns 1471 {"ноинет","но инет"},// "н"+"оинет" → Rule 1 merge → forcedFix 1472 {"оинет","инет"},// fallback if "н" already processed 1473 {"похоже","похоже"},// self-map: PostMerge "похож"+"е"→"похоже" 1474 {"ечто","е что"},// "похож ечто" → "похож"+"е что" → PostMerge → "похоже что" 1475 {"амидны","админы"},{"амидные","админы"},// "наш амидны" 1476 {"атомы","а то мы"},// "Ато"+"мы" merged 1477 // "атоя" already in forcedFix above 1478 {"размы","раз мы"},// "раз"+"мы" merged 1479 {"окомбо","комбо"},// "прост окомбо" 1480 // Prefix merge protection (input words that should NOT merge with "по","под" etc) 1481 {"пордп","по рдп"},// "по"+"рдп" merged="пордп" → forcedFix catches before Lookup 1482 // "кот нибудь" → FixPostfixes makes "кот-нибудь" → then FixPostfixes replaces to "кто-нибудь" 1483 // ка кзакроете → как закроете 1484 {"какзакроете","как закроете"},// "ка"+"кзакроете" → Rule1 merge → forcedFix 1485 {"кзакроете","закроете"},// fallback 1486 // Filename word fixes (filenames split at _ and .) 1487 {"migtae","migrate"},{"migarte","migrate"},{"miigate","migrate"}, 1488 {"sql","sql"},// protect SQL extension (OrdinalIgnoreCase catches Sql too) 1489 {"мигарции","миграции"}, 1490 // === Hotfix 2.0: correct INPUT keys (not output!) === 1491 {"менч","меня"},{"мнеч","меня"},// input="менч" → was picking "меч" 1492 {"незабудьте","не забудьте"},// input="незабудьте" → was picking "незабудке" 1493 {"корпаратива","корпоратива"},{"корпаратив","корпоратив"}, 1494 {"интернов","интернов"},// keep correct — prevent re-ranking to "интерном" 1495 {"грузяться","грузятся"},{"грузться","грузятся"}, 1496 {"докумен","документ"},{"докумет","документ"}, 1497 {"прихоться","приходится"},{"приходтся","приходится"}, 1498 // Compound words: "в ручную" → "вручную" via PostMerge forcedFix 1499 {"вручную","вручную"},// self-map to trigger PostMerge for "в"+"ручную" 1500 {"несладко","несладко"},// self-map: "не"+"сладко" → merge to "несладко" 1501 {"полдня","полдня"},// self-map: "пол"+"дня" → merge to "полдня" 1502 {"вобщем-то","в общем-то"}, 1503 // === Batch 5: heavy IT text (2398 chars) === 1504 // Paragraph 1: project management 1505 {"обем","общем"},{"вобем","в общем"}, 1506 {"решл","решил"}, 1507 {"прэокт","проект"},{"проэкт","проект"}, 1508 {"недлю","неделю"}, 1509 {"унас","у нас"},{"уанс","у нас"},{"уфнс","у нас"}, 1510 {"заакзчки","заказчик"},{"заакзчик","заказчик"}, 1511 {"нвоый","новый"},{"нвоой","новой"}, 1512 {"кторые","которые"},{"котрые","которые"}, 1513 {"пловну","половину"},{"пловцу","половину"}, 1514 {"бизнсе","бизнес"}, 1515 {"смтрел","смотрел"}, 1516 {"вослоы","волосы"},{"весло","волосы"}, 1517 {"голве","голове"},{"голев","голове"}, 1518 {"швеляться","шевелятся"}, 1519 {"хтят","хотят"}, 1520 {"ситсему","систему"}, 1521 {"нвашим","нашим"}, 1522 {"прчием","причем"}, 1523 {"рабтало","работало"}, 1524 {"планшеах","планшетах"}, 1525 {"пытлася","пытался"}, 1526 {"неврзможно","невозможно"}, 1527 {"бзе","без"},{"бизе","без"}, 1528 {"мненджер","менеджер"}, 1529 {"стронне","стороне"}, 1530 {"бзжет","бюджет"},{"бает","бюджет"}, 1531 {"еня","меня"},// "у еня" = "у меня" 1532 // Paragraph 2: backend 1533 {"тперь","теперь"}, 1534 {"бэкэнд","бэкенд"}, 1535 {"слвое","слове"}, 1536 {"изленте","изоленте"},{"излейте","изоленте"}, 1537 {"отпсук","отпуск"},{"отсеку","отпуск"}, 1538 {"заемрджить","замерджить"},{"замердить","замерджить"}, 1539 {"атворизации","авторизации"}, 1540 {"кажый","каждый"},{"какый","каждый"}, 1541 {"юезр","юзер"}, 1542 {"ошбку","ошибку"}, 1543 {"пптыке","попытке"},{"пятые","попытке"}, 1544 {"залогинитсья","залогиниться"}, 1545 {"птыался","пытался"}, 1546 {"асинхроныне","асинхронные"}, 1547 {"ноуг","ногу"}, 1548 {"слмоит","сломит"}, 1549 {"микорсервис","микросервис"}, 1550 {"плтежей","платежей"}, 1551 {"паадают","падают"}, 1552 {"транзкций","транзакций"}, 1553 {"идемпотентнсть","идемпотентность"}, 1554 {"хендлим","хендлим"},// slang keep 1555 {"зарют","зароют"}, 1556 {"пользвателей","пользователей"}, 1557 // Paragraph 3: frontend 1558 {"фрнту","фронту"}, 1559 {"тоеж","тоже"}, 1560 {"жпа","жопа"}, 1561 {"дизйнеры","дизайнеры"}, 1562 {"юайкит","юай кит"},// UI kit 1563 {"фигме","фигме"},// slang keep — Figma 1564 {"нрмально","нормально"},{"нрмлаьный","нормальный"}, 1565 {"расенхрон","рассинхрон"},{"олин","рассинхрон"}, 1566 {"обатной","обратной"},{"обкатной","обратной"}, 1567 {"пдоинги","паддинги"},{"пудинги","паддинги"}, 1568 {"сьехали","съехали"}, 1569 {"двдацать","двадцать"}, 1570 {"левю","левую"}, 1571 {"стнрону","сторону"}, 1572 {"првоерь","проверь"}, 1573 {"крзины","корзины"}, 1574 {"тваров","товаров"},{"тавров","товаров"}, 1575 {"скрол","скролл"},{"скрыл","скролл"}, 1576 {"ищезает","исчезает"}, 1577 {"пользватель","пользователь"}, 1578 {"кнпоку","кнопку"}, 1579 {"оплатиь","оплатить"}, 1580 {"блкер","блокер"},{"бекер","блокер"}, 1581 {"сгеодня","сегодня"}, 1582 // Paragraph 4: devops 1583 {"девпосы","девопсы"}, 1584 {"млодцы","молодцы"}, 1585 {"пднят","поднять"}, 1586 {"ьнам","нам"}, 1587 {"енв","енв"},// env — slang keep 1588 {"пндельник","понедельник"},{"подельник","понедельник"}, 1589 {"натсраивают","настраивают"}, 1590 {"пайплйаны","пайплайны"}, 1591 {"итге","итоге"}, 1592 {"тетсируем","тестируем"}, 1593 {"локалках","локалках"},// slang keep 1594 {"стейдж","стейдж"},// slang keep 1595 {"плвоина","половина"},{"плотина","половина"}, 1596 {"кнтейнеров","контейнеров"}, 1597 {"пдадает","падает"}, 1598 {"нехвтаки","нехватки"}, 1599 {"памти","памяти"}, 1600 {"ндао","надо"}, 1601 {"сроно","срочно"},{"сорно","срочно"}, 1602 {"увелчить","увеличить"}, 1603 {"лимты","лимиты"}, 1604 {"серваке","сервере"}, 1605 {"ианче","иначе"}, 1606 {"нагрузочнео","нагрузочное"}, 1607 {"тетсирование","тестирование"}, 1608 {"смжем","сможем"}, 1609 {"пчмуто","почему-то"},{"путо","почему-то"}, 1610 {"интгерация","интеграция"}, 1611 {"путсоту","пустоту"}, 1612 {"вчеар","вчера"}, 1613 {"плдня","полдня"}, 1614 {"трейс","трейс"},// slang keep 1615 {"пдающего","падающего"},{"подающего","падающего"}, 1616 {"реквста","реквеста"},{"рексат","реквеста"}, 1617 // Paragraph 5: wrap-up 1618 {"ситауция","ситуация"}, 1619 {"кртическая","критическая"}, 1620 {"безнаджена","безнадежна"}, 1621 {"двйате","давайте"}, 1622 {"сберемся","соберемся"}, 1623 {"заврта","завтра"}, 1624 {"распделеим","распределим"},{"раделеим","распределим"}, 1625 {"задчи","задачи"}, 1626 {"созадл","создал"}, 1627 {"джире","джире"},// slang keep — Jira 1628 {"туад","туда"},{"тад","туда"}, 1629 {"бги","баги"},{"буги","баги"}, 1630 {"прсьба","просьба"}, 1631 {"вем","всем"}, 1632 {"берт","берёт"}, 1633 {"рбаоту","работу"}, 1634 {"когот","кого-то"},{"койот","кого-то"}, 1635 {"осдают","сдают"}, 1636 {"нревы","нервы"}, 1637 {"рмашку","ромашку"},{"рюмашку","ромашку"}, 1638 {"увсей","у всей"},{"евсей","у всей"}, 1639 {"кманды","команды"}, 1640 {"отспку","отпуск"}, 1641 {"глваное","главное"}, 1642 {"епаниковать","паниковать"}, 1643 {"длеать","делать"}, 1644 {"движний","движений"}, 1645 {"всме","всем"}, 1646 {"спкойной","спокойной"}, 1647 {"нчи","ночи"}, 1648 {"пшел","пошёл"}, 1649 {"третю","третью"}, 1650 {"кфе","кофе"}, 1651 {"птыаться","пытаться"}, 1652 {"пднять","поднять"}, 1653 // Merged/space forms 1654 {"имы","мы"}, 1655 {"минорнми","минорными"}, 1656 {"фатку","факту"},{"початку","по факту"}, 1657 {"таже","также"},// "таже" → "также" not "таж" 1658 {"перписывать","переписывать"}, 1659 // === Batch 5 extras: remaining errors from user report === 1660 // Semantic fixes 1661 {"тюз","тз"},// ТЮЗ (theatre) vs ТЗ (tech spec) 1662 {"легки","легаси"},// legacy system 1663 {"трейси","трейс"}, 1664 // Space/token fixes 1665 {"обьяснить","объяснить"},// ь→ъ 1666 {"держиться","держится"},// -ться→-тся 1667 {"ивглядят","выглядят"},// "он ивглядят" → "они выглядят" 1668 {"юый","юай"},// "юый кит" → "юай кит" 1669 {"десть","десять"}, 1670 {"нии","ночи"},// "спокойной нии" → "спокойной ночи" 1671 {"ена","енв"},// env 1672 {"локалка","локалках"}, 1673 // Space-stuck: "конь ен валялся" → "конь не валялся" — "ен" is fragment of "не" 1674 {"ен","не"},// common transposition 1675 // "д осях пор" → "до сих пор" 1676 {"осях","сих"},// leftover from "до сих" 1677 // "иу кого-то" → "у кого-то" 1678 {"иу","у"},// leftover 1679 // "оне хендлим" → "не хендлим" 1680 {"оне","не"},// leftover "о" 1681 // Other 1682 {"зарт","зароют"}, 1683 {"джипе","джире"},// Jeep vs Jira 1684 }; 1685 1686 // Execute replace on background thread using WM_CHAR (not SendInput!) 1687 private static char lastBoundaryChar = ' '; 1688 1689 private static void QueueReplace(string original, string replacement, bool switchLayout, Point caretPos) 1690 { 1691 // Capture the boundary char NOW on the calling (hook) thread. The worker below runs after a 1692 // 30ms delay, by which time further keystrokes would have overwritten the shared 1693 // lastBoundaryChar field — reading it inside the worker re-emitted the wrong trailing char. 1694 char boundaryChar = lastBoundaryChar; 1695 System.Threading.ThreadPool.QueueUserWorkItem(delegate 1696 { 1697 try 1698 { 1699 System.Threading.Thread.Sleep(30); 1700 1701 IntPtr fg = WinApi.GetForegroundWindow(); 1702 uint pid2; uint tid2 = WinApi.GetWindowThreadProcessId(fg, out pid2); 1703 var info = new WinApi.GUITHREADINFO(); 1704 info.cbSize = Marshal.SizeOf(typeof(WinApi.GUITHREADINFO)); 1705 IntPtr targetWnd = fg; 1706 if (WinApi.GetGUIThreadInfo(tid2, ref info)) 1707 { 1708 if (info.hwndFocus != IntPtr.Zero) targetWnd = info.hwndFocus; 1709 else if (info.hwndCaret != IntPtr.Zero) targetWnd = info.hwndCaret; 1710 } 1711 Log("ExecReplace: target=0x" + targetWnd.ToString("X") + " del=" + original.Length + " type=\"" + replacement + "\""); 1712 1713 keybd_event(0xA0, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1714 keybd_event(0xA1, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1715 keybd_event(0xA2, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1716 keybd_event(0xA3, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1717 1718 // Select the word + boundary using Shift+Ctrl+Left then Shift+Left 1719 // This is faster and more reliable than individual backspaces 1720 int delCount = original.Length + 1; 1721 1722 // Method: rapid-fire backspaces (minimize gaps for speed) 1723 for (int i = 0; i < delCount; i++) 1724 { 1725 keybd_event(0x08, 0, 0, UIntPtr.Zero); 1726 keybd_event(0x08, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1727 } 1728 System.Threading.Thread.Sleep(30); 1729 1730 // Switch layout 1731 if (switchLayout) 1732 { 1733 WinApi.PostMessage(targetWnd, WinApi.WM_INPUTLANGCHANGEREQUEST, (IntPtr)2, (IntPtr)1); 1734 Log(" Layout switch posted to 0x" + targetWnd.ToString("X")); 1735 System.Threading.Thread.Sleep(50); 1736 } 1737 1738 // Type replacement + boundary char via WM_CHAR (no delays — instant) 1739 foreach (char c in replacement) 1740 SendMessageW(targetWnd, WM_CHAR, (IntPtr)c, IntPtr.Zero); 1741 char bc = boundaryChar; 1742 SendMessageW(targetWnd, WM_CHAR, (IntPtr)bc, IntPtr.Zero); 1743 Log(" Done: \"" + replacement + bc + "\""); 1744 1745 // Tooltip 1746 string tt = original + " → " + replacement; 1747 if (TrayApp.Instance != null && !TrayApp.Instance.IsDisposed) 1748 TrayApp.Instance.BeginInvoke(new Action(() => ShowTooltipInternal(tt, caretPos))); 1749 } 1750 catch (Exception ex) { Log("ExecReplace err: " + ex.Message); } 1751 }); 1752 } 1753 1754 // Raw replace: delete exactly delCount chars, type replacement 1755 private static void QueueReplaceRaw(int delCount, string replacement, Point caretPos) 1756 { 1757 System.Threading.ThreadPool.QueueUserWorkItem(delegate 1758 { 1759 try 1760 { 1761 System.Threading.Thread.Sleep(50); 1762 IntPtr fg = WinApi.GetForegroundWindow(); 1763 uint pid2; uint tid2 = WinApi.GetWindowThreadProcessId(fg, out pid2); 1764 var info = new WinApi.GUITHREADINFO(); 1765 info.cbSize = Marshal.SizeOf(typeof(WinApi.GUITHREADINFO)); 1766 IntPtr targetWnd = fg; 1767 if (WinApi.GetGUIThreadInfo(tid2, ref info)) 1768 { 1769 if (info.hwndFocus != IntPtr.Zero) targetWnd = info.hwndFocus; 1770 else if (info.hwndCaret != IntPtr.Zero) targetWnd = info.hwndCaret; 1771 } 1772 Log("RawReplace: del=" + delCount + " type=\"" + replacement + "\""); 1773 keybd_event(0xA0, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1774 keybd_event(0xA1, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1775 keybd_event(0xA2, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1776 keybd_event(0xA3, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1777 for (int i = 0; i < delCount; i++) 1778 { 1779 keybd_event(0x08, 0, 0, UIntPtr.Zero); 1780 keybd_event(0x08, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1781 System.Threading.Thread.Sleep(5); 1782 } 1783 System.Threading.Thread.Sleep(60); 1784 foreach (char c in replacement) 1785 { 1786 SendMessageW(targetWnd, WM_CHAR, (IntPtr)c, IntPtr.Zero); 1787 System.Threading.Thread.Sleep(2); 1788 } 1789 Log(" RawReplace done"); 1790 } 1791 catch (Exception ex) { Log("RawReplace err: " + ex.Message); } 1792 }); 1793 } 1794 1795 // Full sentence punctuation check on Enter 1796 private static void CheckFullSentence() 1797 { 1798 if (sentenceHistory.Count < 3) return; 1799 string[] words = sentenceHistory.ToArray(); 1800 Log("CheckSentence: " + string.Join(" ", words)); 1801 1802 // Build list of corrections: (position, insertComma) 1803 var fixes = new List<int>(); // indices where comma is needed BEFORE the word 1804 1805 for (int i = 1; i < words.Length; i++) 1806 { 1807 string cur = words[i].ToLower(); 1808 string prev = words[i - 1].ToLower(); 1809 1810 // Skip if prev already has punctuation context 1811 if (commaBeforeWords.Contains(prev) || commaConjunctions.Contains(prev)) continue; 1812 1813 // Subordinating conjunctions 1814 if (commaBeforeWords.Contains(cur)) 1815 { 1816 if (cur == "что" && prev == "потому") continue; 1817 if (i <= 1) continue; 1818 fixes.Add(i); 1819 } 1820 // Coordinating conjunctions 1821 else if (commaConjunctions.Contains(cur) && i >= 2) 1822 fixes.Add(i); 1823 // "а" as conjunction 1824 else if (cur == "а" && i >= 2) 1825 fixes.Add(i); 1826 } 1827 1828 if (fixes.Count > 0) 1829 { 1830 Log(" Sentence needs " + fixes.Count + " comma(s) at positions: " + string.Join(",", fixes.ConvertAll(x => x.ToString()).ToArray())); 1831 // We can't easily go back and insert commas into already-typed text 1832 // Instead, show a tooltip with the corrected sentence 1833 string corrected = ""; 1834 for (int i = 0; i < words.Length; i++) 1835 { 1836 if (fixes.Contains(i)) corrected += ","; 1837 if (i > 0) corrected += " "; 1838 corrected += words[i]; 1839 } 1840 Log(" Corrected: " + corrected); 1841 1842 // Show tooltip with corrected sentence 1843 Point cp = GetCaretScreenPoint(); 1844 if (TrayApp.Instance != null && !TrayApp.Instance.IsDisposed) 1845 TrayApp.Instance.BeginInvoke(new Action(() => 1846 ShowTooltipInternal("Пунктуация: " + corrected, cp))); 1847 } 1848 } 1849 1850 // ===== Full text correction: Ctrl+Shift+Space ===== 1851 // Reads text from active window, fixes ALL words + punctuation, pastes back 1852 public static void CorrectEntireText() 1853 { 1854 try 1855 { 1856 Log("=== CorrectEntireText START ==="); 1857 1858 // Step 1: Release all modifiers and wait for user to physically release keys 1859 keybd_event(0xA0, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); // LShift 1860 keybd_event(0xA1, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); // RShift 1861 keybd_event(0xA2, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); // LCtrl 1862 keybd_event(0xA3, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); // RCtrl 1863 keybd_event(0x20, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); // Space 1864 // Wait until user physically releases Ctrl+Shift 1865 for (int wait = 0; wait < 40; wait++) // max 2 seconds 1866 { 1867 System.Threading.Thread.Sleep(50); 1868 bool ctrlStill = (WinApi.GetAsyncKeyState(0xA2) & 0x8000) != 0 || (WinApi.GetAsyncKeyState(0xA3) & 0x8000) != 0; 1869 bool shiftStill = (WinApi.GetAsyncKeyState(0xA0) & 0x8000) != 0 || (WinApi.GetAsyncKeyState(0xA1) & 0x8000) != 0; 1870 if (!ctrlStill && !shiftStill) break; 1871 } 1872 System.Threading.Thread.Sleep(50); 1873 1874 // Save original clipboard, then clear it to detect selection 1875 string origClipboard = null; 1876 var clearDone = new System.Threading.ManualResetEvent(false); 1877 if (TrayApp.Instance != null) 1878 { 1879 TrayApp.Instance.BeginInvoke(new Action(() => 1880 { 1881 try 1882 { 1883 if (Clipboard.ContainsText()) origClipboard = Clipboard.GetText(); 1884 Clipboard.Clear(); // clear so we can detect if Ctrl+C copied anything 1885 } 1886 catch { } 1887 clearDone.Set(); 1888 })); 1889 } 1890 clearDone.WaitOne(500); 1891 1892 // Copy current SELECTION only (Ctrl+C, NO Ctrl+A!) 1893 // Re-release modifiers right before to be safe 1894 keybd_event(0xA0, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1895 keybd_event(0xA1, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1896 keybd_event(0xA2, 0, 0, UIntPtr.Zero); 1897 keybd_event(0x43, 0, 0, UIntPtr.Zero); 1898 keybd_event(0x43, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1899 keybd_event(0xA2, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1900 System.Threading.Thread.Sleep(200); 1901 1902 // Step 2: Read clipboard — if empty, nothing was selected 1903 string text = null; 1904 var readDone = new System.Threading.ManualResetEvent(false); 1905 if (TrayApp.Instance != null) 1906 { 1907 TrayApp.Instance.BeginInvoke(new Action(() => 1908 { 1909 try { if (Clipboard.ContainsText()) text = Clipboard.GetText(); } catch { } 1910 readDone.Set(); 1911 })); 1912 } 1913 readDone.WaitOne(1000); 1914 1915 if (string.IsNullOrEmpty(text)) 1916 { 1917 Log(" No text selected — nothing to correct"); 1918 // Restore original clipboard 1919 if (origClipboard != null) 1920 { 1921 var restDone = new System.Threading.ManualResetEvent(false); 1922 if (TrayApp.Instance != null) 1923 { 1924 TrayApp.Instance.BeginInvoke(new Action(() => 1925 { 1926 try { Clipboard.SetText(origClipboard); } catch { } 1927 restDone.Set(); 1928 })); 1929 } 1930 restDone.WaitOne(500); 1931 } 1932 return; 1933 } 1934 Log(" Original (" + text.Length + " chars): " + (text.Length > 6000 ? text.Substring(0, 6000) + "..." : text)); 1935 1936 // Step 3: Correct ALL text. Prefer the SAGE neural corrector (spelling+punctuation+ 1937 // case, holistic & contextual) when its Python helper is installed; otherwise fall 1938 // back to the heuristic pipeline. SAGE runs on this background thread (~8s model load). 1939 string corrected = null; 1940 if (SageClient.IsAvailable) 1941 { 1942 corrected = SageClient.Correct(text); 1943 if (corrected != null) Log(" SAGE corrected (" + corrected.Length + " chars)"); 1944 } 1945 if (corrected == null) corrected = CorrectFullText(text); 1946 Log(" Corrected: " + (corrected.Length > 6000 ? corrected.Substring(0, 6000) + "..." : corrected)); 1947 1948 if (corrected == text) { Log(" No changes needed"); return; } 1949 1950 // Step 4: Put corrected text to clipboard and paste 1951 var writeDone = new System.Threading.ManualResetEvent(false); 1952 if (TrayApp.Instance != null) 1953 { 1954 TrayApp.Instance.BeginInvoke(new Action(() => 1955 { 1956 try { Clipboard.SetText(corrected); } catch { } 1957 writeDone.Set(); 1958 })); 1959 } 1960 writeDone.WaitOne(500); 1961 System.Threading.Thread.Sleep(50); 1962 1963 // Ctrl+V to paste (replaces current selection or all text) 1964 keybd_event(0xA2, 0, 0, UIntPtr.Zero); 1965 keybd_event(0x56, 0, 0, UIntPtr.Zero); 1966 keybd_event(0x56, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1967 keybd_event(0xA2, 0, KEYEVENTF_KEYUP_FLAG, UIntPtr.Zero); 1968 System.Threading.Thread.Sleep(100); 1969 1970 // Restore original clipboard if we had something different before 1971 if (origClipboard != null && origClipboard != corrected) 1972 { 1973 var restoreDone = new System.Threading.ManualResetEvent(false); 1974 if (TrayApp.Instance != null) 1975 { 1976 TrayApp.Instance.BeginInvoke(new Action(() => 1977 { 1978 try { Clipboard.SetText(origClipboard); } catch { } 1979 restoreDone.Set(); 1980 })); 1981 } 1982 restoreDone.WaitOne(500); 1983 } 1984 1985 Log("=== CorrectEntireText DONE ==="); 1986 } 1987 catch (Exception ex) { Log("CorrectEntireText err: " + ex.Message); } 1988 } 1989 1990 // Public entry point for testing 1991 public static string TestCorrectText(string text) { return CorrectFullText(text); } 1992 1993 // Process entire text: fix spelling + add punctuation 1994 private static string CorrectFullText(string text) 1995 { 1996 try 1997 { 1998 string result = CorrectFullTextInner(text); 1999 // Cleanup double commas and comma-space-comma 2000 while (result.Contains(",,")) result = result.Replace(",,", ","); 2001 while (result.Contains(", ,")) result = result.Replace(", ,", ","); 2002 return result; 2003 } 2004 catch (Exception ex) { Log("CorrectFullText CRASH: " + ex.Message + "\n" + ex.StackTrace); return text; } 2005 } 2006 2007 // Slenг whitelist — never correct these words 2008 private static readonly HashSet<string> slangWhitelist = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { 2009 "стим","дота","кс","кд","гг","лол","рофл","имба","нерф","бафф","респ","фарм", 2010 "комп","инет","прога","дров","винда","линукс","юзер","логин","пасс", 2011 "ппц","пипец","капец","офигеть","нифига","пофиг","блин","хрен", 2012 "короче","чел","тип","типа","норм","ок","окей","лан","ладно","ваще","чёт", 2013 "провайдер","роутер","браузер","аккаунт","пост","репост","лайк","хайп", 2014 "контент","стрим","донат","подписка","трафик","сервер","хостинг", 2015 "нейросеть","нейронка","капслок","маркетплейс", 2016 "видос","видосик","фотка","селфи","мем","тролль", 2017 // IT/tech slang 2018 "рендер","рендера","рендеринг","рендерить","рендерил", 2019 "вайфай","вифи","wifi", 2020 "телега","телегу","телеграм","телеграмм", 2021 "ноут","ноутбук","здарова","здаров", 2022 "финалочка","финалка", 2023 "бэкап","бэкапы","бэкапить","бэкапил","бэкапнуть", 2024 "конфиг","конфига","конфиги","конфигурить", 2025 "дедлайн","дедлайна","дедлайны","дедлайну", 2026 "ребутнуть","ребутнул","ребутнулся","ребутнуться","ребут", 2027 "пофиксить","пофиксил","пофиксили","фиксить","фикс", 2028 "личка","личку","личке","личку", 2029 "логин","логины","логинить","логиниться", 2030 "мерж","мержить","мержил","коммит","коммиты","коммитить", 2031 "деплой","деплоить","деплоил","деплоится", 2032 "фронт","фронтенд","бэкенд","бэк","апи", 2033 "баг","баги","багфикс","дебаг","дебажить", 2034 "тикет","тикеты","таска","таски", 2035 "слака","слаке","слак", 2036 "маякни","маякните","маякнуть","маякнул", 2037 "ноуте","ноута","ноутом", 2038 "патовая","патовый","патовое", 2039 "коннект","коннекте","коннекта", 2040 "админ","админы","админа","админов", 2041 "lte","edge","3g","4g","5g","vpn","ssh","sql","api","Sql", 2042 "migrate","migration","script","backup","deploy","config", 2043 "щас","хз","эксель","экселе","эксельке", 2044 "плиз","плз","созвон","созвона","созвоне", 2045 "корпоратив","корпоратива","корпоративе", 2046 "кеш","кеша","кешировать", 2047 "рдп","rdp","серв","серва","серваки","сервак", 2048 "впн","vpn","чекнуть","чекнул","чекни","чекните", 2049 "кибана","кибане","кибаны","кибану", 2050 "разрабы","разрабам","разраба","разрабов", 2051 "бекап","бекапы","бекапить","бекапил", 2052 "репо","репозиторий", 2053 "миграция","миграции","мигрировать", 2054 "обнова","обнову","обновы", 2055 "легаси","апи","бэкенд","бэкэнд","фронтенд", 2056 "юзер","юзера","юзеров","юзеры", 2057 "замерджить","мерджить","мердж", 2058 "хендлим","хендлить","хендлер", 2059 "идемпотентность","идемпотентный", 2060 "микросервис","микросервисы","микросервиса", 2061 "паддинги","паддинг","маргин","маргины", 2062 "скролл","скроллить","скроллбар", 2063 "рассинхрон","рассинхронизация", 2064 "блокер","блокеры", 2065 "девопсы","девопс", 2066 "стейдж","стейджинг","прод","продакшн","продакшен", 2067 "пайплайн","пайплайны","пайплайна", 2068 "енв","env","docker","докер", 2069 "фигма","фигме","фигмы", 2070 "джира","джире","джиры", 2071 "реквест","реквеста","реквесты", 2072 "трейс","трейсы","трейсинг", 2073 "эластик","эластика", 2074 "эпик","эпики", 2075 "стендап","стендапе", 2076 "локалка","локалках","локалке", 2077 "нагрузочное","нагрузочный", 2078 "консоль","консоли", 2079 "скрипт","скрипты","скриптов", 2080 }; 2081 2082 private static string CorrectFullTextInner(string text) 2083 { 2084 // === PREPROCESS: cleanup before word-by-word correction === 2085 text = PreProcess(text); 2086 2087 BloomFilter bloom = bloomRu; 2088 SymSpell sym = symRu; 2089 if (compactRu == null || !compactRu.IsReady) 2090 { 2091 if (sym == null || !sym.IsReady) return text; 2092 } 2093 2094 var result = new StringBuilder(); 2095 var wordBuf = new StringBuilder(); 2096 bool lastWasSentenceEnd = true; // start of text = sentence start 2097 string prevWord = ""; 2098 2099 for (int i = 0; i < text.Length; i++) 2100 { 2101 char c = text[i]; 2102 2103 if (char.IsLetter(c) || c == '\'' || c == '-') 2104 { 2105 wordBuf.Append(c); 2106 } 2107 else 2108 { 2109 // Non-letter: process accumulated word 2110 if (wordBuf.Length > 0) 2111 { 2112 string word = wordBuf.ToString(); 2113 string fixedWord = CorrectSingleWord(word, bloom, sym, lastWasSentenceEnd, prevWord); 2114 2115 // Insert comma after intro words: "Короче ..." → "Короче, ..." 2116 // But NOT if the next char is already a comma 2117 if (ShouldInsertCommaAfter(fixedWord) && c == ' ' && (i + 1 >= text.Length || text[i + 1] != ',')) 2118 { 2119 result.Append(fixedWord); 2120 result.Append(','); 2121 Log(" IntroComma after \"" + fixedWord + "\""); 2122 } 2123 else 2124 { 2125 result.Append(fixedWord); 2126 } 2127 2128 prevWord = fixedWord.ToLower(); 2129 wordBuf.Clear(); 2130 lastWasSentenceEnd = false; 2131 } 2132 2133 // Smart merge (NEVER across sentence boundaries, NEVER for standalone words) 2134 bool afterSentEnd = result.Length > 0 && ".!?".IndexOf(result[result.Length - 1]) >= 0; 2135 // Words that should NEVER be merged as prefix (conjunctions, particles, pronouns) 2136 bool prevIsStandalone = "но,и,а,я,ты,он,она,мы,вы,они,да,не,ни,ну,же,бы,ли,как,что,так,то,вот,вон,уже,ещё,его,её,их,нет,там,тут,нас,вас,нам,вам".IndexOf(prevWord) >= 0; 2137 if (c == ' ' && wordBuf.Length == 0 && result.Length > 0 && !afterSentEnd && !lastWasSentenceEnd && !prevIsStandalone) 2138 { 2139 int nextStart = i + 1; 2140 while (nextStart < text.Length && text[nextStart] == ' ') nextStart++; 2141 int nextEnd = nextStart; 2142 while (nextEnd < text.Length && (char.IsLetter(text[nextEnd]) || text[nextEnd] == '-')) nextEnd++; 2143 if (nextEnd > nextStart && prevWord.Length >= 1 && prevWord.Length <= 8) 2144 { 2145 string nextWordRaw = text.Substring(nextStart, nextEnd - nextStart); 2146 string nextWord = nextWordRaw.ToLower(); 2147 string merged = prevWord + nextWord; 2148 2149 // Check forcedFix first 2150 string mergedFixed; 2151 bool hasForcedMerge = forcedFix.TryGetValue(merged, out mergedFixed); 2152 2153 // Check if merged form exists in dict 2154 bool mergedInDict = compactRu != null && compactRu.ContainsExact(merged); 2155 2156 // Also check if corrected merge is in dict (e.g. "постовить" → correct → "поставить") 2157 if (!mergedInDict && !hasForcedMerge && compactRu != null) 2158 { 2159 var mergedCands = compactRu.Lookup(merged, 2); 2160 if (mergedCands.Count > 0 && mergedCands[0].Distance <= 1) 2161 { 2162 mergedFixed = mergedCands[0].Word; 2163 hasForcedMerge = true; 2164 } 2165 } 2166 2167 // Common prefixes that should always merge with verbs/nouns 2168 bool isPrefixMerge = false; 2169 // ONLY real prefixes — NOT prepositions/conjunctions/particles 2170 // Excluded: "на","но","как","не" (almost always separate words) 2171 string[] prefixes = {"по","за","вы","от","об","при","пере","под","над","про","до","раз","рас", 2172 "видео","аудио","авто","само","маркет","капс","кар","воз","нейро","неро"}; 2173 foreach (string pfx in prefixes) 2174 { 2175 if (prevWord == pfx && (mergedInDict || hasForcedMerge)) 2176 { isPrefixMerge = true; break; } 2177 } 2178 2179 // Only merge if: prefix match OR (merged exists AND next word is NOT a real word) 2180 bool nextIsWord = compactRu != null && compactRu.ContainsTrusted(nextWord); 2181 bool nextInDict = compactRu != null && compactRu.ContainsExact(nextWord); 2182 // Also block merge if next word is a common standalone word 2183 bool nextIsStandalone = "и,а,в,с,к,о,у,я,на,но,за,по,от,из,до,не,ни,же,бы,ли,то,он,мы,ты,вы".IndexOf(nextWord) >= 0; 2184 // Block prefix merge if next word exists in dictionary (it's a real word, not a fragment) 2185 // "по" + "ней" → "ней" is in dict → don't merge 2186 // "по" + "днмался" → "днмался" not in dict → merge to "поднимался" 2187 if (isPrefixMerge && nextInDict) isPrefixMerge = false; 2188 if (isPrefixMerge && !nextIsStandalone) 2189 { 2190 // prefix merge: only if next word is NOT trusted standalone 2191 } 2192 else if (mergedInDict && !nextIsWord && !nextIsStandalone) 2193 { 2194 // dict merge: merged exists AND next word unknown 2195 isPrefixMerge = true; // reuse flag 2196 } 2197 else isPrefixMerge = false; 2198 2199 if (isPrefixMerge) 2200 { 2201 string correctedMerge = hasForcedMerge ? mergedFixed : merged; 2202 if (!hasForcedMerge) correctedMerge = CorrectSingleWord(merged, bloom, sym, false, ""); 2203 Log(" Merge: \"" + prevWord + "\" + \"" + nextWord + "\" → \"" + correctedMerge + "\""); 2204 int removeLen = prevWord.Length; 2205 result.Remove(result.Length - removeLen, removeLen); 2206 string display = correctedMerge; 2207 if (prevWord.Length > 0 && char.IsUpper(prevWord[0])) 2208 display = char.ToUpper(display[0]) + display.Substring(1); 2209 result.Append(display); 2210 prevWord = correctedMerge.ToLower(); 2211 i = nextEnd - 1; 2212 continue; 2213 } 2214 } 2215 } 2216 2217 // Auto-punctuation: insert comma before conjunctions 2218 if (c == ' ') 2219 { 2220 int nextStart = i + 1; 2221 while (nextStart < text.Length && text[nextStart] == ' ') nextStart++; 2222 int nextEnd = nextStart; 2223 while (nextEnd < text.Length && (char.IsLetter(text[nextEnd]) || text[nextEnd] == '-')) nextEnd++; 2224 if (nextEnd > nextStart) 2225 { 2226 string nextWordRaw = text.Substring(nextStart, nextEnd - nextStart).ToLower(); 2227 // Check forcedFix for corrected form (e.g. "што"→"что") 2228 string nextWordCorrected; 2229 if (!forcedFix.TryGetValue(nextWordRaw, out nextWordCorrected)) 2230 nextWordCorrected = nextWordRaw; 2231 if (ShouldInsertComma(nextWordCorrected, prevWord, result)) 2232 result.Append(','); 2233 } 2234 } 2235 2236 result.Append(c); 2237 2238 if (c == '.' || c == '!' || c == '?') lastWasSentenceEnd = true; 2239 } 2240 } 2241 2242 // Process last word 2243 if (wordBuf.Length > 0) 2244 { 2245 string word = wordBuf.ToString(); 2246 string fixedWord = CorrectSingleWord(word, bloom, sym, lastWasSentenceEnd, prevWord); 2247 result.Append(fixedWord); 2248 } 2249 2250 // Add period at end if missing 2251 string trimmed = result.ToString().TrimEnd(); 2252 if (trimmed.Length > 0) 2253 { 2254 char lastC = trimmed[trimmed.Length - 1]; 2255 if (lastC != '.' && lastC != '!' && lastC != '?' && char.IsLetter(lastC)) 2256 { 2257 result.Clear(); 2258 result.Append(trimmed); 2259 result.Append('.'); 2260 } 2261 } 2262 2263 // Post-process 1: try merging adjacent short words that form a real word 2264 string preResult = PostMergePass(result.ToString()); 2265 2266 // Post-process 2: fix -то/-либо/-нибудь postfixes 2267 preResult = FixPostfixes(preResult); 2268 2269 // Final pass: capitalize at sentence start 2270 string final = preResult; 2271 var sb = new StringBuilder(final.Length); 2272 bool capNext = true; // first letter of text 2273 bool afterPunct = false; 2274 for (int i = 0; i < final.Length; i++) 2275 { 2276 char c = final[i]; 2277 if (afterPunct && (c == ' ' || c == '\n' || c == '\r')) 2278 { 2279 capNext = true; 2280 afterPunct = false; 2281 } 2282 else if (afterPunct && char.IsLetter(c)) 2283 { 2284 // No space after punctuation — still capitalize 2285 capNext = true; 2286 afterPunct = false; 2287 } 2288 2289 if (capNext && char.IsLetter(c)) 2290 { 2291 sb.Append(char.ToUpper(c)); 2292 capNext = false; 2293 } 2294 else 2295 { 2296 sb.Append(c); 2297 } 2298 2299 if (c == '.' || c == '!' || c == '?') afterPunct = true; 2300 else if (c == '\n') capNext = true; 2301 else if (!char.IsWhiteSpace(c)) afterPunct = false; 2302 } 2303 2304 return sb.ToString(); 2305 } 2306 2307 // Correct a single word using all available engines 2308 private static string CorrectSingleWord(string word, BloomFilter bloom, SymSpell sym, bool sentenceStart, string prevWord) 2309 { 2310 if (word.Length < 2) return word; 2311 2312 string lower = word.ToLower(); 2313 if (userDict.Contains(lower)) return word; 2314 if (slangWhitelist.Contains(lower)) return word; // never correct slang 2315 2316 // RulesEngine + forcedFix 2317 string rfix = RulesEngine.IsReady ? RulesEngine.Lookup(lower) : null; 2318 if (rfix == null) { string ff; if (forcedFix.TryGetValue(lower, out ff)) rfix = ff; } 2319 if (rfix != null) return MatchCase(word, rfix); 2320 2321 // Hyphenated: try dehyphenating and checking 2322 if (lower.Contains("-")) 2323 { 2324 string dehyphed = lower.Replace("-", ""); 2325 // Check forcedFix for dehyphenated version 2326 string dehypFix = RulesEngine.IsReady ? RulesEngine.Lookup(dehyphed) : null; 2327 if (dehypFix == null) { string ff2; if (forcedFix.TryGetValue(dehyphed, out ff2)) dehypFix = ff2; } 2328 if (dehypFix != null) return MatchCase(word, dehypFix); 2329 // Check if dehyphenated exists in dict 2330 if (compactRu != null && compactRu.ContainsExact(dehyphed)) return MatchCase(word, dehyphed); 2331 // Try SymSpell on dehyphenated 2332 if (compactRu != null && compactRu.IsReady) 2333 { 2334 var hCands = compactRu.Lookup(dehyphed, 3); 2335 if (hCands.Count > 0) return MatchCase(word, hCands[0].Word); 2336 } 2337 return word; // keep as-is if nothing found 2338 } 2339 2340 string best = null; 2341 List<SymSpell.Candidate> cands = null; 2342 2343 if (compactRu != null && compactRu.IsReady) 2344 { 2345 bool trusted = compactRu.ContainsTrusted(lower); 2346 bool inFull = !trusted && compactRu.ContainsExact(lower); 2347 Log(" CSW: \"" + lower + "\" trusted=" + trusted + " inFull=" + inFull); 2348 2349 if (trusted) return word; // definitely correct (top 87k) 2350 2351 // Word is in full dict but NOT trusted — might be obscure/error form 2352 // Check if there's a better trusted word at dist=1 2353 if (inFull) 2354 { 2355 cands = compactRu.Lookup(lower, 3); 2356 // Only replace if there's a trusted word at dist=1 2357 bool hasTrustedFix = false; 2358 for (int ci = 0; ci < cands.Count; ci++) 2359 { 2360 if (cands[ci].Distance == 1 && compactRu.ContainsTrusted(cands[ci].Word)) 2361 { hasTrustedFix = true; break; } 2362 } 2363 if (!hasTrustedFix) return word; // no better trusted word → keep as-is 2364 Log(" CSW: untrusted, has trusted fix"); 2365 } 2366 else 2367 { 2368 cands = compactRu.Lookup(lower, 5); 2369 } 2370 Log(" CSW: " + cands.Count + " candidates" + (cands.Count > 0 ? " best=\"" + cands[0].Word + "\" d=" + cands[0].Distance : "")); 2371 } 2372 else if (sym != null && sym.IsReady) 2373 { 2374 bool isC; 2375 cands = sym.LookupTop(lower, 5, out isC); 2376 if (isC) return word; 2377 } 2378 2379 if (cands != null && cands.Count > 0) 2380 { 2381 // Score: bigram context + first/last char + neural nets 2382 foreach (var c in cands) 2383 { 2384 int score = 0; 2385 2386 // BIGRAM CONTEXT — strongest signal! "на карте" >> "на катере" 2387 if (bigramLM != null && bigramLM.IsReady && prevWord.Length > 0) 2388 score += bigramLM.Score(prevWord, c.Word) * 5; // up to +5000 2389 2390 // First/last char match 2391 if (c.Word[0] == lower[0]) score += 3000; 2392 if (c.Word[c.Word.Length - 1] == lower[lower.Length - 1]) score += 2000; 2393 2394 // Same length bonus 2395 if (c.Word.Length == lower.Length) score += 1000; 2396 2397 // Consonant skeleton match — T9 style 2398 string inputCons = GetConsonants(lower); 2399 string candCons = GetConsonants(c.Word); 2400 if (inputCons == candCons) score += 2500; // consonants match perfectly 2401 else if (inputCons.Length > 0 && candCons.Length > 0 && inputCons[0] == candCons[0]) score += 500; 2402 2403 // Trusted 2404 if (compactRu != null && compactRu.ContainsTrusted(c.Word)) score += 1500; 2405 2406 // Neural nets 2407 if (CharNN.IsReady) score += (int)(CharNN.NeuralScore(lower, c.Word, c.FreqIdx, 80000, prevWord) * 800); 2408 if (embedNet != null && embedNet.IsReady) 2409 score += (int)(embedNet.Similarity(lower, c.Word) * 1500); 2410 2411 // PENALTY: if consonant skeleton completely differs → likely wrong word 2412 if (inputCons.Length >= 3 && candCons.Length >= 3) 2413 { 2414 int consMatch = 0; 2415 int minCons = Math.Min(inputCons.Length, candCons.Length); 2416 for (int ci2 = 0; ci2 < minCons; ci2++) 2417 if (inputCons[ci2] == candCons[ci2]) consMatch++; 2418 float consRatio = (float)consMatch / Math.Max(inputCons.Length, candCons.Length); 2419 if (consRatio < 0.3f) score -= 5000; // massive penalty for totally different consonants 2420 } 2421 2422 c.ContextScore = score; 2423 } 2424 cands.Sort(delegate(SymSpell.Candidate a, SymSpell.Candidate b) 2425 { 2426 int cmp = a.Distance.CompareTo(b.Distance); 2427 if (cmp != 0) return cmp; 2428 return b.ContextScore.CompareTo(a.ContextScore); 2429 }); 2430 best = cands[0].Word; 2431 } 2432 2433 // Seq2Spell and GRU DISABLED — they hallucinate (e.g. "сдачи"→"ино", "как"→"кок") 2434 // Only deterministic corrections allowed: forcedFix + SymSpell + CascadeRepair 2435 2436 // Cascade repair for dist>2 2437 if (best == null) 2438 best = CascadeRepair(lower, bloom, compactRu != null && compactRu.IsReady ? null : sym); 2439 2440 if (best != null) 2441 return MatchCase(word, best); 2442 2443 // Word splitting: both parts must be TRUSTED words, both >= 3 chars 2444 if (lower.Length >= 7 && compactRu != null && compactRu.IsReady) 2445 { 2446 string bestSplit = null; 2447 int bestLen = 0; 2448 for (int sp = 3; sp <= lower.Length - 3; sp++) 2449 { 2450 string left = lower.Substring(0, sp); 2451 string right = lower.Substring(sp); 2452 if (compactRu.ContainsTrusted(left) && compactRu.ContainsTrusted(right)) 2453 { 2454 int len = left.Length + right.Length; 2455 if (len > bestLen) { bestLen = len; bestSplit = sp.ToString(); } 2456 } 2457 } 2458 if (bestSplit != null) 2459 { 2460 int sp = int.Parse(bestSplit); 2461 string fixedLeft = CorrectSingleWord(lower.Substring(0, sp), bloom, sym, false, prevWord); 2462 string fixedRight = CorrectSingleWord(lower.Substring(sp), bloom, sym, false, fixedLeft); 2463 Log(" WordSplit: \"" + lower + "\" → \"" + fixedLeft + " " + fixedRight + "\""); 2464 return MatchCase(word.Substring(0, sp), fixedLeft) + " " + MatchCase(word.Substring(sp), fixedRight); 2465 } 2466 } 2467 2468 return word; 2469 } 2470 2471 // Should we insert a comma before this word in full-text mode? 2472 private static bool ShouldInsertComma(string nextWord, string prevWord, StringBuilder currentResult) 2473 { 2474 if (currentResult.Length < 3) return false; 2475 char lastChar = currentResult[currentResult.Length - 1]; 2476 if (lastChar == ',' || lastChar == '.' || lastChar == '!' || lastChar == '?' || lastChar == ':' || lastChar == ';' || lastChar == '—') 2477 return false; 2478 2479 // "потому что" — comma before "потому", not before "что" 2480 if (nextWord == "что" && prevWord == "потому") return false; 2481 if (nextWord == "потому") return true; 2482 2483 if (commaBeforeWords.Contains(nextWord)) return true; 2484 if (commaConjunctions.Contains(nextWord)) return true; 2485 if (nextWord == "а") return true; 2486 2487 // Comma BEFORE intro words (вводные слова) when mid-sentence 2488 if (introWords.Contains(nextWord)) return true; 2489 2490 return false; 2491 } 2492 2493 // Check if we need comma AFTER the current word (intro words) 2494 private static bool ShouldInsertCommaAfter(string word) 2495 { 2496 return introWords.Contains(word.ToLower()); 2497 } 2498 2499 // Check if we need dash between two nouns (жизнь — боль) 2500 private static bool ShouldInsertDash(string prevWord, string nextWord) 2501 { 2502 // Simple heuristic: two short nouns in a row, both not verbs/adjectives 2503 if (prevWord.Length < 3 || nextWord.Length < 3) return false; 2504 // Both should NOT end in verb/adjective suffixes 2505 bool prevIsNoun = !EndsWithAny(prevWord, "ть", "ет", "ит", "ал", "ла", "ый", "ий", "ая", "ое"); 2506 bool nextIsNoun = !EndsWithAny(nextWord, "ть", "ет", "ит", "ал", "ла", "ый", "ий", "ая", "ое"); 2507 // Both in nominative (common noun endings) 2508 bool prevNom = EndsWithAny(prevWord, "ь", "а", "о", "е", "й", "к", "г", "н", "т", "д"); 2509 bool nextNom = EndsWithAny(nextWord, "ь", "а", "о", "е", "й", "к", "г", "н", "т", "д"); 2510 return prevIsNoun && nextIsNoun && prevNom && nextNom; 2511 } 2512 2513 // === PREPROCESS: cleanup text before correction === 2514 private static string PreProcess(string text) 2515 { 2516 var sb = new StringBuilder(text.Length); 2517 2518 // Pass 1: Cleanup punctuation noise 2519 // Deduplicate 3+ identical chars → 1 ("!!!" → "!", "ппп" → "пп") 2520 for (int i = 0; i < text.Length; i++) 2521 { 2522 sb.Append(text[i]); 2523 if (i >= 2 && text[i] == text[i - 1] && text[i] == text[i - 2]) 2524 sb.Remove(sb.Length - 1, 1); 2525 } 2526 text = sb.ToString(); 2527 // Remove digits stuck to punctuation ("!!1" → "!", "!!!11" → "!") 2528 sb.Clear(); 2529 for (int i = 0; i < text.Length; i++) 2530 { 2531 char c = text[i]; 2532 // Skip digits that follow punctuation (typing noise like "!!1") 2533 if (char.IsDigit(c) && i > 0 && "!?.".IndexOf(text[i - 1]) >= 0) continue; 2534 sb.Append(c); 2535 } 2536 text = sb.ToString(); 2537 2538 // Pass 2: Check each word — is it 100% Latin? → maybe wrong keyboard layout 2539 sb.Clear(); 2540 var wordBuf = new StringBuilder(); 2541 for (int i = 0; i <= text.Length; i++) 2542 { 2543 char c = i < text.Length ? text[i] : ' '; 2544 if (char.IsLetter(c)) 2545 { 2546 wordBuf.Append(c); 2547 } 2548 else 2549 { 2550 if (wordBuf.Length >= 2) 2551 { 2552 string w = wordBuf.ToString(); 2553 // All Latin and not a common English word? → try remap to Russian 2554 if (IsAllLatin(w) && !IsCommonEnglish(w.ToLower())) 2555 { 2556 string remapped = RemapWord(w, true); // EN→RU 2557 if (remapped != null && compactRu != null && compactRu.ContainsExact(remapped.ToLower())) 2558 { 2559 Log(" LayoutFix: \"" + w + "\" → \"" + remapped + "\""); 2560 sb.Append(remapped); 2561 } 2562 else sb.Append(w); 2563 } 2564 else sb.Append(w); 2565 } 2566 else if (wordBuf.Length > 0) sb.Append(wordBuf); 2567 wordBuf.Clear(); 2568 if (i < text.Length) sb.Append(c); 2569 } 2570 } 2571 text = sb.ToString(); 2572 2573 // Pass 3: Safe space fixer — merge ONLY if merged form is a real word 2574 if (compactRu != null && compactRu.IsReady) 2575 { 2576 var realShort = new HashSet<string> { 2577 "а","в","и","к","о","с","у","я","бы","вы","да","до", 2578 "её","за","из","их","ко","ли","мы","на","не","ни","но", 2579 "ну","об","он","от","по","со","та","те","то","ту","ты", 2580 "уж","же","вот","вон","как","что","так","это","кто","тут", 2581 "там","уже","ещё","его","нет","все","для","при","без", 2582 }; 2583 2584 sb.Clear(); 2585 string[] words = text.Split(' '); 2586 2587 // Known fragment expansions: single broken consonants → try expanding 2588 // "н" → "не","но","на" "т" → "то","ты","те" "кт" → "кто" "ил" → "или" 2589 var fragmentExpand = new Dictionary<string, string[]>(StringComparer.Ordinal) { 2590 {"н", new[]{"не","но","на"}}, 2591 {"т", new[]{"то","ты","те","ту","та"}}, 2592 {"кт", new[]{"кто"}}, 2593 {"ил", new[]{"или"}}, 2594 {"чт", new[]{"что"}}, 2595 {"эт", new[]{"это"}}, 2596 {"оя", new[]{"я"}}, // "а т оя" → "а то я" (leftover fragment) 2597 {"ив", new[]{"в"}}, // "ил ив рис" → "или в рис" (leftover fragment) 2598 {"ог", new[]{"ого"}}, // broken "ничего","моего" etc 2599 }; 2600 2601 for (int wi = 0; wi < words.Length; wi++) 2602 { 2603 string w = words[wi]; 2604 string wl = w.ToLower(); 2605 bool wReal = realShort.Contains(wl) || compactRu.ContainsTrusted(wl); 2606 string ffv; if (forcedFix.TryGetValue(wl, out ffv)) wReal = true; 2607 2608 // RULE 1: Short non-real fragment (≤2 chars) → merge with next 2609 // ONLY if merged form is EXACTLY in dict or forcedFix (no Lookup!) 2610 if (!wReal && wl.Length <= 2 && wi + 1 < words.Length) 2611 { 2612 string next = words[wi + 1]; 2613 string merged = (w + next).ToLower(); 2614 // Strip trailing punctuation for dict check 2615 string mergedClean = merged.TrimEnd('.', ',', '!', '?', ';', ':'); 2616 string mff; 2617 bool ok = compactRu.ContainsExact(mergedClean) || forcedFix.TryGetValue(mergedClean, out mff); 2618 if (ok) 2619 { 2620 Log(" SpaceFix→: \"" + w + "\"+\"" + next + "\"→\"" + merged + "\""); 2621 if (sb.Length > 0) sb.Append(' '); 2622 sb.Append(w + next); 2623 wi++; 2624 continue; 2625 } 2626 } 2627 2628 // RULE 0 (fallback): Known fragment expansion — only when Rule 1 failed 2629 // "н пожелаю" → "не пожелаю" (Rule 1 failed because "нпожелаю" not in dict) 2630 // "методом н, очевидно" → "методом но, очевидно" 2631 // "кт кто-нибудь" → "кто-нибудь" (duplicate removal) 2632 string[] expansions; 2633 if (!wReal && fragmentExpand.TryGetValue(wl.TrimEnd(',','.','!','?',';',':'), out expansions)) 2634 { 2635 string punct = ""; 2636 for (int pi = wl.Length - 1; pi >= 0; pi--) 2637 { 2638 if (".,!?;:".IndexOf(wl[pi]) >= 0) punct = wl[pi] + punct; 2639 else break; 2640 } 2641 string bestExpansion = null; 2642 int bestScore = 0; 2643 foreach (string exp in expansions) 2644 { 2645 int score = 0; 2646 // Check if next word starts with the suffix we're adding 2647 // "кт" → "кто", next="кто-нибудь" → duplicate, remove fragment 2648 if (wi + 1 < words.Length) 2649 { 2650 string nextLower = words[wi + 1].ToLower(); 2651 // Duplicate check: expanded form is prefix of next word 2652 if (nextLower.StartsWith(exp) || nextLower.StartsWith(exp.TrimEnd(','))) 2653 { 2654 bestExpansion = ""; // empty = remove this fragment 2655 bestScore = 9999; 2656 break; 2657 } 2658 // Context check: "exp + next" makes sense? 2659 if (bigramLM != null && bigramLM.IsReady) 2660 score += bigramLM.Score(exp, nextLower.TrimEnd(',','.','!','?')); 2661 } 2662 // Check if prev word + exp makes sense 2663 if (wi > 0 && bigramLM != null && bigramLM.IsReady) 2664 score += bigramLM.Score(words[wi - 1].ToLower().TrimEnd(',','.','!','?'), exp); 2665 2666 // Context-aware "н" disambiguation: "но" before pronouns, "не" before verbs 2667 if (exp == "но" || exp == "не") 2668 { 2669 // "н," with trailing comma/period → almost certainly "но," (conjunction at clause boundary) 2670 if (punct.Length > 0 && (punct[0] == ',' || punct[0] == '.')) 2671 score += (exp == "но") ? 800 : 0; 2672 // Check next word: pronouns/standalone words follow "но", verbs follow "не" 2673 if (wi + 1 < words.Length) 2674 { 2675 string nxClean = words[wi + 1].ToLower().TrimEnd(',','.','!','?',';',':'); 2676 // Words that commonly follow "но": pronouns, demonstratives 2677 bool nextIsNoBuddy = nxClean == "я" || nxClean == "мне" || nxClean == "мы" || 2678 nxClean == "он" || nxClean == "она" || nxClean == "они" || nxClean == "его" || 2679 nxClean == "её" || nxClean == "их" || nxClean == "это" || nxClean == "всё" || 2680 nxClean == "ведь" || nxClean == "вот" || nxClean == "тут" || nxClean == "там"; 2681 if (nextIsNoBuddy) score += (exp == "но") ? 500 : 0; 2682 // If next word is trusted and NOT a pronoun → likely verb/adj → "не" 2683 bool nextTrusted = compactRu.ContainsTrusted(nxClean); 2684 if (nextTrusted && !nextIsNoBuddy) score += (exp == "не") ? 400 : 0; 2685 } 2686 } 2687 // Frequency bonus for single-expansion fragments 2688 if (exp == "не") score += 500; 2689 else if (exp == "но") score += 300; 2690 else if (exp == "на") score += 200; 2691 else if (exp == "то") score += 400; 2692 else if (exp == "это") score += 300; 2693 else if (exp == "что") score += 300; 2694 else if (exp == "или") score += 400; 2695 else if (exp == "кто") score += 400; 2696 else if (exp == "я") score += 500; 2697 else if (exp == "в") score += 500; 2698 else if (exp == "ого") score += 300; 2699 // Single-option expansions get extra bonus (no ambiguity) 2700 if (expansions.Length == 1) score += 300; 2701 2702 if (score > bestScore) { bestScore = score; bestExpansion = exp; } 2703 } 2704 if (bestExpansion != null && bestScore > 100) 2705 { 2706 if (bestExpansion == "") 2707 { 2708 // Duplicate fragment — skip entirely 2709 Log(" SpaceFix frag-dup: \"" + w + "\" removed (duplicate of next)"); 2710 continue; 2711 } 2712 Log(" SpaceFix frag: \"" + w + "\" → \"" + bestExpansion + punct + "\""); 2713 if (sb.Length > 0) sb.Append(' '); 2714 sb.Append(MatchCase(w.TrimEnd(',','.','!','?',';',':'), bestExpansion) + punct); 2715 continue; 2716 } 2717 } 2718 2719 // RULE 2: Next word ≤2 letters (ignoring punctuation) → merge 2720 if (wi + 1 < words.Length) 2721 { 2722 string next = words[wi + 1]; 2723 string nl = next.ToLower(); 2724 // Strip punctuation to get actual letter content 2725 string nlClean = nl.TrimEnd('.', ',', '!', '?', ';', ':', ')'); 2726 nlClean = nlClean.TrimStart('('); 2727 if (nlClean.Length >= 1 && nlClean.Length <= 2) 2728 { 2729 string merged = (w + next).ToLower(); 2730 string mergedClean2 = merged.TrimEnd('.', ',', '!', '?', ';', ':'); 2731 string mff2; 2732 bool mergedExists = compactRu.ContainsExact(mergedClean2) || forcedFix.TryGetValue(mergedClean2, out mff2); 2733 // Guard: NEVER merge if current word is a real standalone (а, и, о, у, к, с, в, я, он, на, но...) 2734 // This prevents "а"+"он"→"аон", "и"+"он"→"ион" 2735 bool wIsStandalone = realShort.Contains(wl); 2736 bool nReal = realShort.Contains(nlClean); 2737 bool mergedTrusted = compactRu.ContainsTrusted(mergedClean2); 2738 // Merge if: current NOT standalone AND (merged is trusted OR merged exists and next not standalone) 2739 // "ошибк"+"у" → "ошибку" exists, current not standalone → merge 2740 // "а"+"он" → current IS standalone → skip 2741 if (mergedExists && !wIsStandalone) 2742 { 2743 Log(" SpaceFix←: \"" + w + "\"+\"" + next + "\"→\"" + merged + "\""); 2744 if (sb.Length > 0) sb.Append(' '); 2745 sb.Append(w + next); 2746 wi++; 2747 continue; 2748 } 2749 } 2750 } 2751 2752 // 3b: SpaceFix split removed — caused more harm than good. 2753 // Word splitting is handled later in CorrectSingleWord. 2754 2755 if (sb.Length > 0) sb.Append(' '); 2756 sb.Append(w); 2757 } 2758 text = sb.ToString(); 2759 } 2760 2761 // Pass 4: Normalize spaces 2762 while (text.Contains(" ")) text = text.Replace(" ", " "); 2763 2764 return text; 2765 } 2766 2767 private static bool IsAllLatin(string w) 2768 { 2769 foreach (char c in w) 2770 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) return false; 2771 return true; 2772 } 2773 2774 private static readonly HashSet<string> commonEn = new HashSet<string> { 2775 "the","be","to","of","and","a","in","that","have","i","it","for","not","on","with", 2776 "he","as","you","do","at","this","but","his","by","from","they","we","her","she", 2777 "or","an","will","my","one","all","would","there","their","what","so","up","out", 2778 "if","about","who","get","which","go","me","when","make","can","like","time","no", 2779 "just","him","know","take","people","into","year","good","some","could","them", 2780 "ok","hi","hello","please","sorry","yes","no","thanks","thank","bye", 2781 }; 2782 private static bool IsCommonEnglish(string w) { return commonEn.Contains(w); } 2783 2784 // Post-merge: try combining adjacent words into one real word 2785 private static string PostMergePass(string text) 2786 { 2787 if (compactRu == null || !compactRu.IsReady) return text; 2788 2789 string[] parts = text.Split(' '); 2790 var sb = new System.Text.StringBuilder(); 2791 2792 for (int i = 0; i < parts.Length; i++) 2793 { 2794 // Dedup: "кто кто-нибудь" → "кто-нибудь" (short word is prefix of next hyphenated word) 2795 if (i + 1 < parts.Length) 2796 { 2797 string aClean = parts[i].ToLower().TrimEnd(',', '.', '!', '?', ';', ':'); 2798 string bClean = parts[i + 1].ToLower().TrimEnd(',', '.', '!', '?', ';', ':'); 2799 // Short word (≤4) is exact prefix of next hyphenated word → skip it 2800 if (aClean.Length <= 4 && aClean.Length >= 2 && bClean.Contains("-") && bClean.StartsWith(aClean)) 2801 { 2802 Log(" PostMerge dedup: \"" + parts[i] + "\" + \"" + parts[i + 1] + "\" → \"" + parts[i + 1] + "\""); 2803 continue; // skip the short prefix, next iteration will add the full word 2804 } 2805 // Exact duplicate adjacent words → keep only one 2806 if (aClean == bClean && aClean.Length >= 2) 2807 { 2808 Log(" PostMerge dedup: \"" + parts[i] + "\" duplicate → skipped"); 2809 continue; 2810 } 2811 } 2812 2813 // Try merging current + next 2814 if (i + 1 < parts.Length) 2815 { 2816 string a = parts[i].ToLower().TrimEnd(',', '.', '!', '?', ';', ':'); 2817 string b = parts[i + 1].ToLower().TrimStart(',', '.', '!', '?', ';', ':'); 2818 string merged = a + b; 2819 2820 // Check if merged form is a real word OR has a forcedFix 2821 string forcedM; 2822 bool mergedOk = false; 2823 if (forcedFix.TryGetValue(merged, out forcedM)) mergedOk = true; 2824 else if (compactRu.ContainsTrusted(merged)) mergedOk = true; 2825 2826 // Only merge if: one of the parts is short (≤3) OR neither part is trusted standalone 2827 bool aShort = a.Length <= 3 && !compactRu.ContainsTrusted(a); 2828 bool bShort = b.Length <= 3 && !compactRu.ContainsTrusted(b); 2829 2830 // ForcedFix merge = always, dict merge = only if one part is short 2831 bool isForcedMerge = forcedM != null; 2832 if (mergedOk && (isForcedMerge || aShort || bShort)) 2833 { 2834 string corrected = forcedM ?? merged; 2835 Log(" PostMerge: \"" + parts[i] + "\" + \"" + parts[i + 1] + "\" → \"" + corrected + "\""); 2836 if (sb.Length > 0) sb.Append(' '); 2837 // Preserve case 2838 if (char.IsUpper(parts[i][0])) 2839 corrected = char.ToUpper(corrected[0]) + corrected.Substring(1); 2840 sb.Append(corrected); 2841 i++; // skip next 2842 continue; 2843 } 2844 } 2845 if (sb.Length > 0) sb.Append(' '); 2846 sb.Append(parts[i]); 2847 } 2848 return sb.ToString(); 2849 } 2850 2851 // Fix words like "какойто"→"какой-то", "чтонибудь"→"что-нибудь" 2852 private static string FixPostfixes(string text) 2853 { 2854 // Replace merged postfixes with hyphenated versions 2855 string[] postfixes = { "то", "либо", "нибудь" }; 2856 string[] pronouns = { "как", "какой", "какая", "какое", "какие", "что", "кто", "кот", "где", "куда", "когда", "откуда", "почему", "зачем", "сколько" }; 2857 2858 foreach (string pfx in postfixes) 2859 { 2860 foreach (string pro in pronouns) 2861 { 2862 // Fix merged: "ктонибудь" → "кто-нибудь" 2863 string merged = pro + pfx; 2864 string correct = pro + "-" + pfx; 2865 int idx = 0; 2866 while ((idx = text.ToLower().IndexOf(merged, idx)) >= 0) 2867 { 2868 bool leftOk = idx == 0 || !char.IsLetter(text[idx - 1]); 2869 bool rightOk = idx + merged.Length >= text.Length || !char.IsLetter(text[idx + merged.Length]); 2870 if (leftOk && rightOk) 2871 { 2872 string orig = text.Substring(idx, merged.Length); 2873 string repl = orig.Substring(0, pro.Length) + "-" + orig.Substring(pro.Length); 2874 text = text.Substring(0, idx) + repl + text.Substring(idx + merged.Length); 2875 idx += repl.Length; 2876 } 2877 else idx++; 2878 } 2879 // Fix separated: "кто нибудь" → "кто-нибудь" 2880 string spaced = pro + " " + pfx; 2881 idx = 0; 2882 while ((idx = text.ToLower().IndexOf(spaced, idx)) >= 0) 2883 { 2884 bool leftOk = idx == 0 || !char.IsLetter(text[idx - 1]); 2885 bool rightOk = idx + spaced.Length >= text.Length || !char.IsLetter(text[idx + spaced.Length]); 2886 if (leftOk && rightOk) 2887 { 2888 string orig = text.Substring(idx, spaced.Length); 2889 string repl = orig.Substring(0, pro.Length) + "-" + orig.Substring(pro.Length + 1); 2890 text = text.Substring(0, idx) + repl + text.Substring(idx + spaced.Length); 2891 idx += repl.Length; 2892 } 2893 else idx++; 2894 } 2895 } 2896 } 2897 // Fix "кот-нибудь" → "кто-нибудь" (typo кот↔кто in hyphenated words) 2898 text = text.Replace("кот-нибудь", "кто-нибудь").Replace("Кот-нибудь", "Кто-нибудь"); 2899 text = text.Replace("кот-то", "кто-то").Replace("Кот-то", "Кто-то"); 2900 text = text.Replace("кот-либо", "кто-либо").Replace("Кот-либо", "Кто-либо"); 2901 return text; 2902 } 2903 2904 // Extract consonant skeleton (T9 style) — "привет" → "првт" 2905 private static string GetConsonants(string word) 2906 { 2907 var sb = new System.Text.StringBuilder(); 2908 string vowels = "аеёиоуыэюя"; 2909 foreach (char c in word) 2910 if (char.IsLetter(c) && vowels.IndexOf(c) < 0) sb.Append(c); 2911 return sb.ToString(); 2912 } 2913 2914 private static string RemapWord(string word, bool currentIsEn) 2915 { 2916 var map = currentIsEn ? enToRu : ruToEn; 2917 var sb = new StringBuilder(word.Length); 2918 foreach (char c in word) 2919 { 2920 char lc = char.ToLower(c); 2921 char mapped; 2922 if (map.TryGetValue(lc, out mapped)) 2923 sb.Append(char.IsUpper(c) ? char.ToUpper(mapped) : mapped); 2924 else return null; 2925 } 2926 return sb.ToString(); 2927 } 2928 2929 /// Is the current word at the start of a sentence? 2930 private static bool IsSentenceStart() 2931 { 2932 return isFirstWordAfterBoundary; 2933 } 2934 2935 /// Smart case: preserves user's casing, but also auto-capitalizes at sentence start 2936 private static string SmartCase(string original, string corrected) 2937 { 2938 if (original.Length == 0 || corrected.Length == 0) return corrected; 2939 2940 // All uppercase → keep all uppercase 2941 bool allUp = true; 2942 foreach (char c in original) if (char.IsLetter(c) && !char.IsUpper(c)) { allUp = false; break; } 2943 if (allUp) return corrected.ToUpper(); 2944 2945 // User typed first letter uppercase → keep it 2946 if (char.IsUpper(original[0])) 2947 return char.ToUpper(corrected[0]) + corrected.Substring(1); 2948 2949 // User typed lowercase but it's sentence start → capitalize 2950 if (IsSentenceStart()) 2951 return char.ToUpper(corrected[0]) + corrected.Substring(1); 2952 2953 return corrected; 2954 } 2955 2956 // ===== Cascade repair: for words with dist > 2, multi-step fixes ===== 2957 private static string CascadeRepair(string word, BloomFilter bloom, SymSpell sym) 2958 { 2959 if (bloom == null || word.Length < 4) return null; 2960 2961 // Step 1: Remove duplicate adjacent chars + try fix 2962 string w = word; 2963 for (int pass = 0; pass < 3; pass++) 2964 { 2965 bool changed = false; 2966 for (int i = 0; i < w.Length - 1; i++) 2967 { 2968 if (w[i] == w[i + 1]) 2969 { 2970 string d = w.Remove(i, 1); 2971 string r = ValidateCandidate(d, word, bloom); 2972 if (r != null) return r; 2973 r = TrySimpleFix(d, word, bloom); 2974 if (r != null) return r; 2975 w = d; changed = true; break; 2976 } 2977 } 2978 if (!changed) break; 2979 } 2980 2981 // Step 2: Swap + fix 2982 w = word; 2983 for (int i = 0; i < w.Length - 1; i++) 2984 { 2985 char[] arr = w.ToCharArray(); 2986 char t = arr[i]; arr[i] = arr[i + 1]; arr[i + 1] = t; 2987 string s = new string(arr); 2988 string r = ValidateCandidate(s, word, bloom); 2989 if (r != null) return r; 2990 r = TrySimpleFix(s, word, bloom); 2991 if (r != null) return r; 2992 } 2993 2994 // Step 3: Remove char + fix 2995 for (int i = 0; i < w.Length; i++) 2996 { 2997 string rem = w.Remove(i, 1); 2998 if (rem.Length < 3) continue; 2999 string r = ValidateCandidate(rem, word, bloom); 3000 if (r != null) return r; 3001 r = TrySimpleFix(rem, word, bloom); 3002 if (r != null) return r; 3003 } 3004 3005 // Step 4: SymSpell on partial fixes 3006 if (sym != null && sym.IsReady) 3007 { 3008 for (int i = 0; i < word.Length; i++) 3009 { 3010 string partial = word.Remove(i, 1); 3011 if (partial.Length < 3) continue; 3012 bool isC; 3013 var cands = sym.LookupTop(partial, 1, out isC); 3014 if (isC && IsSimilar(partial, word)) return partial; 3015 if (cands.Count > 0 && cands[0].Distance <= 1 && IsSimilar(cands[0].Word, word)) 3016 return cands[0].Word; 3017 } 3018 } 3019 3020 return null; 3021 } 3022 3023 // Validate: candidate must pass Bloom AND be similar to original (>40% shared chars) 3024 private static string ValidateCandidate(string candidate, string original, BloomFilter bloom) 3025 { 3026 if (candidate.Length < 3) return null; 3027 if (!bloom.MayContain(candidate)) return null; 3028 if (!IsSimilar(candidate, original)) return null; 3029 return candidate; 3030 } 3031 3032 // Check character overlap — filters Bloom false positives 3033 private static bool IsSimilar(string a, string b) 3034 { 3035 if (a.Length == 0 || b.Length == 0) return false; 3036 // First char should match 3037 if (a[0] != b[0]) return false; 3038 // Count shared chars (order-independent) 3039 int shared = 0; 3040 var used = new bool[b.Length]; 3041 for (int i = 0; i < a.Length; i++) 3042 { 3043 for (int j = 0; j < b.Length; j++) 3044 { 3045 if (!used[j] && a[i] == b[j]) { shared++; used[j] = true; break; } 3046 } 3047 } 3048 float ratio = (float)shared / Math.Max(a.Length, b.Length); 3049 return ratio >= 0.5f; 3050 } 3051 3052 // Single fix: swap or remove → check Bloom + validate 3053 private static string TrySimpleFix(string w, string original, BloomFilter bloom) 3054 { 3055 if (w.Length < 3) return null; 3056 for (int i = 0; i < w.Length - 1; i++) 3057 { 3058 char[] arr = w.ToCharArray(); 3059 char t = arr[i]; arr[i] = arr[i + 1]; arr[i + 1] = t; 3060 string s = ValidateCandidate(new string(arr), original, bloom); 3061 if (s != null) return s; 3062 } 3063 for (int i = 0; i < w.Length; i++) 3064 { 3065 string s = ValidateCandidate(w.Remove(i, 1), original, bloom); 3066 if (s != null) return s; 3067 } 3068 return null; 3069 } 3070 3071 // ===== Auto-punctuation: Russian comma rules ===== 3072 private static readonly HashSet<string> commaBeforeWords = new HashSet<string> { 3073 "что", "чтобы", "который", "которая", "которое", "которые", "которого", "которой", "которому", 3074 "которым", "которых", 3075 "когда", "где", "куда", "откуда", 3076 "если", "хотя", "пока", "чем", "как", 3077 "потому", "поскольку", "ибо", 3078 }; 3079 private static readonly HashSet<string> commaConjunctions = new HashSet<string> { 3080 "но", "однако", "зато", "тем" 3081 }; 3082 private static readonly HashSet<string> introWords = new HashSet<string> { 3083 // Вводные слова — запятая ПОСЛЕ них 3084 "короче", "впрочем", "кстати", "например", "конечно", "видимо", "наверное", 3085 "вероятно", "безусловно", "разумеется", "кажется", "пожалуй", 3086 "действительно", "естественно", "очевидно", "несомненно", 3087 "значит", "следовательно", "итак", "правда", "честно", 3088 "собственно", "допустим", "предположим", 3089 }; 3090 3091 private static string NeedsCommaBefore(string currentWord, List<string> history) 3092 { 3093 if (history.Count == 0) return null; 3094 string cur = currentWord.ToLower(); 3095 string prev = history[history.Count - 1].ToLower(); 3096 3097 // Don't add comma if previous word already ends with comma 3098 // (we can't check this directly, but if prev is a conjunction, skip) 3099 if (commaBeforeWords.Contains(prev) || commaConjunctions.Contains(prev)) 3100 return null; 3101 3102 // Rule 1: Comma before subordinating conjunctions (что, который, когда, если, etc.) 3103 if (commaBeforeWords.Contains(cur)) 3104 { 3105 // Exception: "потому что" — comma before "потому", not before "что" 3106 if (cur == "что" && prev == "потому") return null; 3107 // Exception: first word in sentence — no comma 3108 if (history.Count <= 1) return null; 3109 return ","; 3110 } 3111 3112 // Rule 2: Comma before "но", "однако", "зато" (coordinating) 3113 if (commaConjunctions.Contains(cur)) 3114 { 3115 if (history.Count <= 1) return null; 3116 return ","; 3117 } 3118 3119 // Rule 3: Comma after introductory word (check if PREVIOUS word was introductory) 3120 // This is handled differently — we'd need to insert comma after the intro word 3121 // For now skip — too complex for real-time 3122 3123 // Rule 4: Comma before "а" when it means "but" (not as particle) 3124 if (cur == "а" && history.Count >= 2) 3125 return ","; 3126 3127 // Rule 5: Comma before "чтобы" and compound words 3128 if (cur == "чтобы" || cur == "потому") 3129 { 3130 if (history.Count >= 2) return ","; 3131 } 3132 3133 // Rule 6: Comma before relative/interrogative words used as conjunctions 3134 if ((cur == "почему" || cur == "зачем" || cur == "сколько") && history.Count >= 2) 3135 { 3136 // Only if mid-sentence (after a verb-like word) 3137 if (EndsWithAny(prev, "ть", "ет", "ит", "ал", "ла", "ем", "ют", "ат")) 3138 return ","; 3139 } 3140 3141 return null; 3142 } 3143 3144 // ===== Context scoring: morphological hints from previous word ===== 3145 private static readonly HashSet<string> prepositions = new HashSet<string> { 3146 "в","на","по","к","с","у","за","от","из","до","для","без","при","через", 3147 "между","под","над","перед","про","обо","ко","со","во" 3148 }; 3149 3150 private static int ScoreContext(string candidate, string prev) 3151 { 3152 if (string.IsNullOrEmpty(prev) || candidate.Length < 3) return 0; 3153 string prevLow = prev.ToLower(); 3154 int score = 0; 3155 3156 // After preposition → prefer nouns (oblique cases: -ом,-ем,-ой,-ам,-ях,-ую,-ым,-ей,-ов) 3157 if (prepositions.Contains(prevLow)) 3158 { 3159 if (EndsWithAny(candidate, "ом","ем","ой","ам","ях","ую","ым","ей","ов","ах","ий","ие","ию")) 3160 score += 3; 3161 // Penalize infinitives after prepositions (rare: "для делать" → wrong) 3162 if (EndsWithAny(candidate, "ть","ться")) 3163 score -= 2; 3164 } 3165 3166 // After "не" → prefer verbs 3167 if (prevLow == "не" || prevLow == "ни") 3168 { 3169 if (EndsWithAny(candidate, "ть","ет","ит","ал","ла","ся","ли","ём","ёт","ют","ат","ят","ешь","ишь")) 3170 score += 3; 3171 } 3172 3173 // After adjective → prefer noun 3174 if (EndsWithAny(prevLow, "ый","ий","ой","ая","яя","ое","ее","ые","ие")) 3175 { 3176 // Nouns typically don't end in verb suffixes 3177 if (!EndsWithAny(candidate, "ть","ет","ит","ал","ла","ся","ли","ют","ат")) 3178 score += 1; 3179 } 3180 3181 // After "это","вот","мой","твой","наш","ваш","его","её","их" → prefer noun 3182 if (prevLow == "это" || prevLow == "вот" || prevLow == "мой" || prevLow == "твой" || 3183 prevLow == "наш" || prevLow == "ваш" || prevLow == "его" || prevLow == "её" || prevLow == "их") 3184 { 3185 if (!EndsWithAny(candidate, "ть","ся")) 3186 score += 2; 3187 } 3188 3189 // Prefer words closer in length to common word lengths (5-8 chars) 3190 if (candidate.Length >= 4 && candidate.Length <= 10) score += 1; 3191 3192 return score; 3193 } 3194 3195 private static bool EndsWithAny(string word, params string[] suffixes) 3196 { 3197 foreach (var s in suffixes) 3198 if (word.Length >= s.Length && word.EndsWith(s, StringComparison.Ordinal)) 3199 return true; 3200 return false; 3201 } 3202 3203 private static string MatchCase(string orig, string corr) 3204 { 3205 if (orig.Length == 0 || corr.Length == 0) return corr; 3206 bool allUp = true; 3207 foreach (char c in orig) if (char.IsLetter(c) && !char.IsUpper(c)) { allUp = false; break; } 3208 if (allUp) return corr.ToUpper(); 3209 if (char.IsUpper(orig[0])) return char.ToUpper(corr[0]) + corr.Substring(1); 3210 return corr; 3211 } 3212 3213 public static void AddToUserDictionary(string word) 3214 { 3215 string lower = word.Trim().ToLower(); 3216 if (lower.Length == 0 || userDict.Contains(lower)) return; 3217 userDict.Add(lower); 3218 try { if (userDictPath != null) { string d = Path.GetDirectoryName(userDictPath); if (!Directory.Exists(d)) Directory.CreateDirectory(d); File.AppendAllText(userDictPath, lower + Environment.NewLine); } } catch { } 3219 } 3220 3221 private static Point GetCaretScreenPoint() 3222 { 3223 try 3224 { 3225 IntPtr fg = WinApi.GetForegroundWindow(); 3226 if (fg != IntPtr.Zero) 3227 { 3228 uint pid; uint tid = WinApi.GetWindowThreadProcessId(fg, out pid); 3229 var info = new WinApi.GUITHREADINFO(); 3230 info.cbSize = Marshal.SizeOf(typeof(WinApi.GUITHREADINFO)); 3231 if (WinApi.GetGUIThreadInfo(tid, ref info) && info.hwndCaret != IntPtr.Zero) 3232 { 3233 var pt = new WinApi.POINT(info.rcCaret.Left, info.rcCaret.Top); 3234 WinApi.ClientToScreen(info.hwndCaret, ref pt); 3235 return new Point(pt.X, pt.Y); 3236 } 3237 } 3238 } 3239 catch { } 3240 WinApi.POINT mp; WinApi.GetCursorPos(out mp); 3241 return new Point(mp.X, mp.Y); 3242 } 3243 3244 // Public entry: cursor moved (mouse click) — flush word then reset 3245 public static void OnCursorMoved() 3246 { 3247 if (wordBuffer.Length > 0) 3248 { 3249 lastBoundaryChar = ' '; 3250 FlushWord(); 3251 } 3252 wordBuffer.Clear(); 3253 pendingMerge = null; 3254 sentenceRaw.Length = 0; 3255 lastContextWords = null; // invalidate cached context 3256 } 3257 3258 // Analyze words around cursor when user moves cursor (arrow keys / mouse click) 3259 private static long lastAnalyzeTime = 0; 3260 private static void AnalyzeAtCursor() 3261 { 3262 try 3263 { 3264 // Throttle: max once per 500ms 3265 long now = Environment.TickCount; 3266 if (now - lastAnalyzeTime < 500) return; 3267 lastAnalyzeTime = now; 3268 3269 if (!Settings.AutoT9) return; 3270 if (compactRu == null || !compactRu.IsReady) return; 3271 3272 // Already on STA thread — call UIA directly 3273 UpdateWindowContextUIA(5); 3274 string[] ctx = lastContextWords; 3275 if (ctx != null && ctx.Length > 0) 3276 { 3277 string lastW = ctx[ctx.Length - 1]; 3278 bool hasCyrillic = false; 3279 foreach (char ch in lastW) 3280 if (ch >= '\u0400' && ch <= '\u04FF') { hasCyrillic = true; break; } 3281 if (hasCyrillic) 3282 { 3283 prevWord = lastW; 3284 Log(" CursorCtx: prevWord=\"" + prevWord + "\""); 3285 } 3286 } 3287 } 3288 catch (Exception ex) { Log(" AnalyzeAtCursor err: " + ex.Message); } 3289 } 3290 3291 // Read context from active window via UI Automation TextPattern 3292 // Non-invasive: doesn't touch clipboard, doesn't move cursor 3293 // Works in: Notepad, Chrome, VS Code, Electron, WPF, WinForms 3294 private static long lastContextTime = 0; 3295 private static string[] lastContextWords = null; 3296 private static volatile bool contextUpdateRunning = false; 3297 3298 // Called from hook context — returns cached result, schedules background refresh 3299 private static string[] GetWindowContext(int maxWords = 5) 3300 { 3301 // Schedule background update if not already running 3302 long now = Environment.TickCount; 3303 if (!contextUpdateRunning && (now - lastContextTime > 300 || lastContextWords == null)) 3304 { 3305 contextUpdateRunning = true; 3306 // UIA requires STA thread — use dedicated thread, not ThreadPool (MTA) 3307 var t = new System.Threading.Thread(() => 3308 { 3309 try { UpdateWindowContextUIA(maxWords); } 3310 catch { } 3311 finally { contextUpdateRunning = false; } 3312 }); 3313 t.SetApartmentState(System.Threading.ApartmentState.STA); 3314 t.IsBackground = true; 3315 t.Start(); 3316 } 3317 return lastContextWords; // return cached (may be from previous call) 3318 } 3319 3320 // Runs on ThreadPool — safe for COM/UIA calls 3321 private static void UpdateWindowContextUIA(int maxWords) 3322 { 3323 try 3324 { 3325 lastContextTime = Environment.TickCount; 3326 3327 string contextText = null; 3328 3329 // Method 1: UI Automation TextPattern (Chrome, WPF, UWP, etc.) 3330 try 3331 { 3332 var focused = System.Windows.Automation.AutomationElement.FocusedElement; 3333 if (focused != null) 3334 { 3335 object patternObj; 3336 if (focused.TryGetCurrentPattern(System.Windows.Automation.TextPattern.Pattern, out patternObj)) 3337 { 3338 var textPattern = patternObj as System.Windows.Automation.TextPattern; 3339 if (textPattern != null) 3340 { 3341 var selections = textPattern.GetSelection(); 3342 if (selections != null && selections.Length > 0) 3343 { 3344 var contextRange = selections[0].Clone(); 3345 contextRange.MoveEndpointByUnit( 3346 System.Windows.Automation.Text.TextPatternRangeEndpoint.Start, 3347 System.Windows.Automation.Text.TextUnit.Word, 3348 -maxWords); 3349 contextText = contextRange.GetText(500); 3350 } 3351 } 3352 } 3353 } 3354 } 3355 catch { } 3356 3357 // Method 2: WM_GETTEXT + EM_GETSEL fallback (Notepad, Scintilla, Win32 edit) 3358 if (string.IsNullOrEmpty(contextText)) 3359 { 3360 try 3361 { 3362 IntPtr fg = WinApi.GetForegroundWindow(); 3363 if (fg != IntPtr.Zero) 3364 { 3365 uint pid; uint tid = WinApi.GetWindowThreadProcessId(fg, out pid); 3366 var info = new WinApi.GUITHREADINFO(); 3367 info.cbSize = System.Runtime.InteropServices.Marshal.SizeOf(typeof(WinApi.GUITHREADINFO)); 3368 if (WinApi.GetGUIThreadInfo(tid, ref info)) 3369 { 3370 IntPtr hwnd = info.hwndFocus != IntPtr.Zero ? info.hwndFocus : fg; 3371 int textLen = (int)WinApi.SendMessage(hwnd, WinApi.WM_GETTEXTLENGTH, IntPtr.Zero, IntPtr.Zero); 3372 if (textLen > 0 && textLen < 50000) 3373 { 3374 var sb2 = new System.Text.StringBuilder(textLen + 1); 3375 WinApi.SendMessage(hwnd, WinApi.WM_GETTEXT, (IntPtr)(textLen + 1), sb2); 3376 string fullText = sb2.ToString(); 3377 IntPtr selResult = WinApi.SendMessage(hwnd, WinApi.EM_GETSEL, IntPtr.Zero, IntPtr.Zero); 3378 int caretPos = (int)(selResult.ToInt64() & 0xFFFF); 3379 if (caretPos > 0 && caretPos <= fullText.Length) 3380 contextText = fullText.Substring(0, caretPos); 3381 else 3382 contextText = fullText; // no caret info — use full text 3383 } 3384 } 3385 } 3386 } 3387 catch { } 3388 } 3389 3390 if (string.IsNullOrEmpty(contextText)) return; 3391 3392 var words = new System.Collections.Generic.List<string>(); 3393 var wb = new System.Text.StringBuilder(); 3394 foreach (char c in contextText) 3395 { 3396 if (char.IsLetter(c) || c == '-' || c == '\'') 3397 wb.Append(c); 3398 else if (wb.Length > 0) 3399 { 3400 words.Add(wb.ToString().ToLower()); 3401 wb.Clear(); 3402 } 3403 } 3404 if (wb.Length > 0) words.Add(wb.ToString().ToLower()); 3405 if (words.Count == 0) return; 3406 3407 int start = words.Count > maxWords ? words.Count - maxWords : 0; 3408 var result = new string[words.Count - start]; 3409 for (int i = 0; i < result.Length; i++) 3410 result[i] = words[start + i]; 3411 3412 lastContextWords = result; 3413 } 3414 catch (Exception ex) { Log(" UIA err: " + ex.GetType().Name + ": " + ex.Message); } 3415 } 3416 3417 private static void ShowTooltipInternal(string text, Point caretPos) 3418 { CloseTooltip(); activeTooltip = new CorrectionTooltip(text, false, caretPos); activeTooltip.ShowNoActivate(); } 3419 3420 private static void ShowAddToDictTooltip(string word) 3421 { 3422 waitingForDictConfirm = true; 3423 pendingDictWord = word; 3424 Point pos = GetCaretScreenPoint(); 3425 if (TrayApp.Instance == null || TrayApp.Instance.IsDisposed) return; 3426 TrayApp.Instance.BeginInvoke(new Action(() => 3427 { CloseTooltip(); activeTooltip = new CorrectionTooltip("Add \"" + word + "\"? [Enter]", true, pos); activeTooltip.WordToAdd = word; activeTooltip.ShowNoActivate(); })); 3428 } 3429 3430 private static void CloseTooltip() 3431 { if (activeTooltip != null && !activeTooltip.IsDisposed) { try { activeTooltip.Close(); } catch { } activeTooltip = null; } } 3432 3433 private class CorrectionTooltip : Form 3434 { 3435 private string text; private bool isAddPrompt; public string WordToAdd; 3436 private Timer autoClose; private float fade = 1f; private Timer fadeTimer; private Font bebasFont; 3437 [DllImport("user32.dll")] private static extern bool ShowWindow(IntPtr h, int c); 3438 [DllImport("user32.dll")] private static extern bool SetWindowPos(IntPtr h, IntPtr a, int X, int Y, int cx, int cy, uint f); 3439 private static readonly IntPtr HWND_TOPMOST = (IntPtr)(-1); 3440 3441 public CorrectionTooltip(string text, bool isAddPrompt, Point caretPos) 3442 { 3443 this.text = text; this.isAddPrompt = isAddPrompt; 3444 FormBorderStyle = FormBorderStyle.None; ShowInTaskbar = false; TopMost = true; 3445 StartPosition = FormStartPosition.Manual; 3446 BackColor = Color.FromArgb(Settings.BlurTintColor.R, Settings.BlurTintColor.G, Settings.BlurTintColor.B); 3447 DoubleBuffered = true; 3448 SetStyle(ControlStyles.OptimizedDoubleBuffer | ControlStyles.AllPaintingInWmPaint | ControlStyles.UserPaint, true); 3449 bebasFont = FontHelper.CreateFont(14f); 3450 Size tsz; using (var bmp = new Bitmap(1, 1)) using (var g = Graphics.FromImage(bmp)) tsz = Size.Ceiling(g.MeasureString(text, bebasFont)); 3451 Size = new Size(tsz.Width + 24, tsz.Height + 12); 3452 var scr = Screen.FromPoint(caretPos).WorkingArea; 3453 int x = caretPos.X, y = caretPos.Y - Height - 4; 3454 if (x + Width > scr.Right) x = scr.Right - Width; if (y < scr.Top) y = caretPos.Y + 24; if (x < scr.Left) x = scr.Left; 3455 Location = new Point(x, y); 3456 Load += (s, e) => { BlurHelper.Apply(Handle); WinApi.TryEnableRoundedCorners(Handle); }; 3457 autoClose = new Timer(); autoClose.Interval = isAddPrompt ? 5000 : 2500; 3458 autoClose.Tick += delegate { StartFadeOut(); }; autoClose.Start(); 3459 } 3460 public void ShowNoActivate() { Visible = true; ShowWindow(Handle, 4); SetWindowPos(Handle, HWND_TOPMOST, 0, 0, 0, 0, 0x13); } 3461 private void StartFadeOut() { if (autoClose != null) autoClose.Stop(); if (fadeTimer != null) return; fadeTimer = new Timer(); fadeTimer.Interval = 16; fadeTimer.Tick += delegate { fade -= 0.1f; if (fade <= 0) { fadeTimer.Stop(); Close(); return; } Opacity = fade; }; fadeTimer.Start(); } 3462 protected override void WndProc(ref Message m) { if (m.Msg == 0x0021) { m.Result = (IntPtr)4; return; } base.WndProc(ref m); } 3463 protected override void OnPaintBackground(PaintEventArgs e) { } 3464 protected override void OnPaint(PaintEventArgs e) 3465 { 3466 var g = e.Graphics; g.SmoothingMode = SmoothingMode.HighSpeed; g.TextRenderingHint = TextRenderingHint.AntiAliasGridFit; 3467 g.Clear(Color.FromArgb(Settings.BlurTintAlpha, Settings.BlurTintColor)); 3468 using (var p = new Pen(Color.FromArgb(55, 60, 72))) g.DrawRectangle(p, 0, 0, Width - 1, Height - 1); 3469 using (var b = new SolidBrush(Color.FromArgb(210, 218, 235))) g.DrawString(text, bebasFont, b, 12, 6); 3470 } 3471 protected override CreateParams CreateParams { get { var cp = base.CreateParams; cp.ExStyle |= 0x80 | 0x8 | 0x08000000; return cp; } } 3472 protected override void Dispose(bool d) { if (d) { if (autoClose != null) autoClose.Dispose(); if (fadeTimer != null) fadeTimer.Dispose(); if (bebasFont != null) bebasFont.Dispose(); } base.Dispose(d); } 3473 } 3474 } 3475}