windowcapture
исходный код / Helpers/RescoreClient.cs

RescoreClient.cs

83 строк · 3,828 байт · модуль Helpers
 1using System;
 2using System.Globalization;
 3using System.IO;
 4using System.Net;
 5using System.Text;
 6using System.Threading;
 7
 8namespace WindowCapture.Helpers
 9{
10    /// <summary>
11    /// Per-word CONTEXT rescorer — "stage 2" of the autocorrect pipeline (see
12    /// TSF_AND_TINY_NN_DESIGN.md). A tiny masked-LM (default cointegrated/rubert-tiny2, ~29M params,
13    /// the same size class as Apple's on-device autocorrect transformer) scores, for a small set of
14    /// DICTIONARY candidates produced by the noisy-channel SpellScore, the length-normalized
15    /// pseudo-log-likelihood P(word | left/right context). Because it only RESCORES caller-supplied
16    /// candidates, it can re-rank but never hallucinate a word.
17    ///
18    /// Talks to the warm server's /rescore endpoint over localhost (Spell/wc_spell_server.py),
19    /// reusing SageClient's single python process (the model loads lazily on first call). Returns
20    /// null on any failure so the caller keeps the noisy-channel/bigram ranking. Never throws.
21    /// </summary>
22    public static class RescoreClient
23    {
24        public static bool IsAvailable { get { return SageClient.IsAvailable; } }
25
26        /// <summary>Length-normalized log P(word|context) per candidate (higher = better fit),
27        /// or null on any failure. Right context is usually empty during real-time typing
28        /// (the user hasn't typed ahead yet) — left context alone still rescores well.</summary>
29        public static double[] Rescore(string left, string right, string[] cands, int timeoutMs = 4000)
30        {
31            if (cands == null || cands.Length == 0) return null;
32            if (!SageClient.IsAvailable) return null;
33            int port = SageClient.EnsurePort();
34            if (port < 0) return null;
35            try
36            {
37                var sb = new StringBuilder();
38                sb.Append(Clean(left)).Append('\n').Append(Clean(right));
39                foreach (var c in cands) sb.Append('\n').Append(Clean(c));
40
41                var req = (HttpWebRequest)WebRequest.Create("http://127.0.0.1:" + port + "/rescore");
42                req.Method = "POST";
43                req.Timeout = timeoutMs;
44                req.ContentType = "text/plain; charset=utf-8";
45                byte[] body = Encoding.UTF8.GetBytes(sb.ToString());
46                req.ContentLength = body.Length;
47                using (var s = req.GetRequestStream()) s.Write(body, 0, body.Length);
48
49                string resp;
50                using (var r = (HttpWebResponse)req.GetResponse())
51                using (var rs = r.GetResponseStream())
52                using (var sr = new StreamReader(rs, Encoding.UTF8))
53                    resp = sr.ReadToEnd();
54
55                if (string.IsNullOrEmpty(resp)) return null;
56                string[] parts = resp.Split(',');
57                if (parts.Length != cands.Length) return null;
58                var outp = new double[parts.Length];
59                for (int i = 0; i < parts.Length; i++)
60                    outp[i] = double.Parse(parts[i], CultureInfo.InvariantCulture);
61                return outp;
62            }
63            catch { return null; }
64        }
65
66        private static string Clean(string s)
67        {
68            if (string.IsNullOrEmpty(s)) return "";
69            return s.Replace('\n', ' ').Replace('\r', ' ').Trim();
70        }
71
72        /// <summary>Preload the context model in the background (long timeout — the first call may
73        /// download/load the model) so the first real rescore during typing is fast.</summary>
74        public static void WarmUp()
75        {
76            if (!IsAvailable) return;
77            ThreadPool.QueueUserWorkItem(delegate
78            {
79                try { Rescore("привет", "", new[] { "мир", "мор" }, 90000); } catch { }
80            });
81        }
82    }
83}