1using System; 2using System.Globalization; 3using System.IO; 4using System.Net; 5using System.Text; 6using System.Threading; 7 8namespace WindowCapture.Helpers 9{ 10 /// <summary> 11 /// Per-word CONTEXT rescorer — "stage 2" of the autocorrect pipeline (see 12 /// TSF_AND_TINY_NN_DESIGN.md). A tiny masked-LM (default cointegrated/rubert-tiny2, ~29M params, 13 /// the same size class as Apple's on-device autocorrect transformer) scores, for a small set of 14 /// DICTIONARY candidates produced by the noisy-channel SpellScore, the length-normalized 15 /// pseudo-log-likelihood P(word | left/right context). Because it only RESCORES caller-supplied 16 /// candidates, it can re-rank but never hallucinate a word. 17 /// 18 /// Talks to the warm server's /rescore endpoint over localhost (Spell/wc_spell_server.py), 19 /// reusing SageClient's single python process (the model loads lazily on first call). Returns 20 /// null on any failure so the caller keeps the noisy-channel/bigram ranking. Never throws. 21 /// </summary> 22 public static class RescoreClient 23 { 24 public static bool IsAvailable { get { return SageClient.IsAvailable; } } 25 26 /// <summary>Length-normalized log P(word|context) per candidate (higher = better fit), 27 /// or null on any failure. Right context is usually empty during real-time typing 28 /// (the user hasn't typed ahead yet) — left context alone still rescores well.</summary> 29 public static double[] Rescore(string left, string right, string[] cands, int timeoutMs = 4000) 30 { 31 if (cands == null || cands.Length == 0) return null; 32 if (!SageClient.IsAvailable) return null; 33 int port = SageClient.EnsurePort(); 34 if (port < 0) return null; 35 try 36 { 37 var sb = new StringBuilder(); 38 sb.Append(Clean(left)).Append('\n').Append(Clean(right)); 39 foreach (var c in cands) sb.Append('\n').Append(Clean(c)); 40 41 var req = (HttpWebRequest)WebRequest.Create("http://127.0.0.1:" + port + "/rescore"); 42 req.Method = "POST"; 43 req.Timeout = timeoutMs; 44 req.ContentType = "text/plain; charset=utf-8"; 45 byte[] body = Encoding.UTF8.GetBytes(sb.ToString()); 46 req.ContentLength = body.Length; 47 using (var s = req.GetRequestStream()) s.Write(body, 0, body.Length); 48 49 string resp; 50 using (var r = (HttpWebResponse)req.GetResponse()) 51 using (var rs = r.GetResponseStream()) 52 using (var sr = new StreamReader(rs, Encoding.UTF8)) 53 resp = sr.ReadToEnd(); 54 55 if (string.IsNullOrEmpty(resp)) return null; 56 string[] parts = resp.Split(','); 57 if (parts.Length != cands.Length) return null; 58 var outp = new double[parts.Length]; 59 for (int i = 0; i < parts.Length; i++) 60 outp[i] = double.Parse(parts[i], CultureInfo.InvariantCulture); 61 return outp; 62 } 63 catch { return null; } 64 } 65 66 private static string Clean(string s) 67 { 68 if (string.IsNullOrEmpty(s)) return ""; 69 return s.Replace('\n', ' ').Replace('\r', ' ').Trim(); 70 } 71 72 /// <summary>Preload the context model in the background (long timeout — the first call may 73 /// download/load the model) so the first real rescore during typing is fast.</summary> 74 public static void WarmUp() 75 { 76 if (!IsAvailable) return; 77 ThreadPool.QueueUserWorkItem(delegate 78 { 79 try { Rescore("привет", "", new[] { "мир", "мор" }, 90000); } catch { } 80 }); 81 } 82 } 83}