1// WCTip.cpp — WindowCapture TSF Text Input Processor (TIP). 2// 3// Milestone 1: proves UNIVERSAL IN-PLACE correction the way the system autocorrect / IMEs do it — 4// via the Text Services Framework, NOT keystroke injection or the clipboard. The TIP is an in-proc 5// COM DLL that ctfmon loads into TSF-aware apps. On a word boundary (space / Enter) it reads the 6// just-typed word straight from the document buffer (ITfRange) and, if it is a known typo, replaces 7// exactly that range in place inside an edit session — no clipboard, no focus change, correct undo. 8// 9// The correction "brain" here is a tiny built-in typo table on purpose: M1 isolates and proves the 10// hardest, novel part (the TSF read+replace plumbing) independently. M2 swaps CorrectWord() for a 11// localhost call to the real brain (C# noisy-channel SpellScore + rubert-tiny2 rescorer). 12// 13// Per-user registration (HKCU) so regsvr32 needs no admin. See Tip/README_TSF.md to build/register/test. 14// 15// Build: powershell -File build_tip.ps1 -> bin\WCTip.dll 16// Reg: regsvr32 bin\WCTip.dll (unreg: regsvr32 /u bin\WCTip.dll) 17 18#include <windows.h> 19#include <msctf.h> 20#include <ctffunc.h> 21#include <winhttp.h> 22#include <string> 23 24// Our own GUIDs are defined locally; TSF's GUIDs come from uuid.lib (msctf.h declares them extern). 25#include <initguid.h> 26// {6B2D4F8A-1C3E-4A7B-9F21-3D5E8C7A4B12} 27DEFINE_GUID(CLSID_WCTextService, 0x6b2d4f8a, 0x1c3e, 0x4a7b, 0x9f, 0x21, 0x3d, 0x5e, 0x8c, 0x7a, 0x4b, 0x12); 28// {A1B2C3D4-5E6F-4071-8293-0A1B2C3D4E5F} 29DEFINE_GUID(GUID_WCProfile, 0xa1b2c3d4, 0x5e6f, 0x4071, 0x82, 0x93, 0x0a, 0x1b, 0x2c, 0x3d, 0x4e, 0x5f); 30 31static HINSTANCE g_hInst = NULL; 32static LONG g_cDllRef = 0; // outstanding COM objects -> keeps the DLL loaded 33 34// --------------------------------------------------------------------------------------------------- 35// The "brain" (M1: built-in typo table) — in a shared header so the headless self-test exercises it. 36// M2 swaps wctip::CorrectWord for a localhost call to the C# brain (SpellScore + rubert-tiny2). 37// --------------------------------------------------------------------------------------------------- 38#include "Correct.h" 39using namespace wctip; 40 41// --------------------------------------------------------------------------------------------------- 42// M2: the REAL brain. POST "word\nleftContext" to the C# bridge (Helpers/TipBridge.cs) on 43// 127.0.0.1:BRAIN_PORT and use the corrected word it returns (noisy-channel SpellScore + rubert-tiny2 44// rescore + precision gate). Tight timeouts so a missing bridge never janks typing; returns L"" on 45// any failure, and the caller falls back to the built-in wctip::CorrectWord typo table. 46// --------------------------------------------------------------------------------------------------- 47static const INTERNET_PORT BRAIN_PORT = 8766; // MUST match TipBridge.Port 48 49static std::string Utf8FromUtf16(const std::wstring& w) 50{ 51 if (w.empty()) return std::string(); 52 int n = WideCharToMultiByte(CP_UTF8, 0, w.c_str(), (int)w.size(), NULL, 0, NULL, NULL); 53 std::string s((size_t)(n > 0 ? n : 0), '\0'); 54 if (n > 0) WideCharToMultiByte(CP_UTF8, 0, w.c_str(), (int)w.size(), &s[0], n, NULL, NULL); 55 return s; 56} 57 58static std::wstring Utf16FromUtf8(const char* p, int len) 59{ 60 if (len <= 0) return std::wstring(); 61 int n = MultiByteToWideChar(CP_UTF8, 0, p, len, NULL, 0); 62 std::wstring w((size_t)(n > 0 ? n : 0), L'\0'); 63 if (n > 0) MultiByteToWideChar(CP_UTF8, 0, p, len, &w[0], n); 64 return w; 65} 66 67static std::wstring BrainCorrect(const std::wstring& word, const std::wstring& leftCtx) 68{ 69 std::string body = Utf8FromUtf16(word) + "\n" + Utf8FromUtf16(leftCtx); 70 std::wstring result; 71 BOOL ok = FALSE; 72 73 HINTERNET hS = WinHttpOpen(L"WCTip/1.0", WINHTTP_ACCESS_TYPE_NO_PROXY, 74 WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0); 75 if (!hS) return L""; 76 WinHttpSetTimeouts(hS, 300, 300, 800, 800); 77 HINTERNET hC = WinHttpConnect(hS, L"127.0.0.1", BRAIN_PORT, 0); 78 HINTERNET hR = NULL; 79 if (hC) 80 hR = WinHttpOpenRequest(hC, L"POST", L"/correctword", NULL, 81 WINHTTP_NO_REFERER, WINHTTP_DEFAULT_ACCEPT_TYPES, 0); 82 if (hR) 83 { 84 ok = WinHttpSendRequest(hR, WINHTTP_NO_ADDITIONAL_HEADERS, 0, 85 (LPVOID)body.data(), (DWORD)body.size(), (DWORD)body.size(), 0); 86 if (ok) ok = WinHttpReceiveResponse(hR, NULL); 87 if (ok) 88 { 89 std::string resp; 90 for (;;) 91 { 92 DWORD avail = 0; 93 if (!WinHttpQueryDataAvailable(hR, &avail) || avail == 0) break; 94 std::string chunk((size_t)avail, '\0'); 95 DWORD read = 0; 96 if (!WinHttpReadData(hR, &chunk[0], avail, &read) || read == 0) break; 97 resp.append(chunk.data(), read); 98 } 99 result = Utf16FromUtf8(resp.data(), (int)resp.size()); 100 } 101 } 102 if (hR) WinHttpCloseHandle(hR); 103 if (hC) WinHttpCloseHandle(hC); 104 if (hS) WinHttpCloseHandle(hS); 105 return ok ? result : L""; 106} 107 108// --------------------------------------------------------------------------------------------------- 109// Edit session: runs inside a document lock; reads the word before the caret and replaces it in place. 110// --------------------------------------------------------------------------------------------------- 111class CEditSession : public ITfEditSession 112{ 113public: 114 CEditSession(ITfContext* pContext) : _cRef(1), _pContext(pContext) 115 { 116 _pContext->AddRef(); 117 InterlockedIncrement(&g_cDllRef); 118 } 119 120 // IUnknown 121 STDMETHODIMP QueryInterface(REFIID riid, void** ppv) 122 { 123 if (!ppv) return E_INVALIDARG; 124 if (IsEqualIID(riid, IID_IUnknown) || IsEqualIID(riid, IID_ITfEditSession)) 125 *ppv = (ITfEditSession*)this; 126 else { *ppv = NULL; return E_NOINTERFACE; } 127 AddRef(); 128 return S_OK; 129 } 130 STDMETHODIMP_(ULONG) AddRef() { return InterlockedIncrement(&_cRef); } 131 STDMETHODIMP_(ULONG) Release() 132 { 133 LONG c = InterlockedDecrement(&_cRef); 134 if (c == 0) delete this; 135 return c; 136 } 137 138 // ITfEditSession 139 STDMETHODIMP DoEditSession(TfEditCookie ec) 140 { 141 TF_SELECTION sel; 142 ULONG nFetched = 0; 143 if (FAILED(_pContext->GetSelection(ec, TF_DEFAULT_SELECTION, 1, &sel, &nFetched)) || nFetched == 0) 144 return S_OK; 145 146 ITfRange* pRange = NULL; 147 HRESULT hr = sel.range->Clone(&pRange); 148 sel.range->Release(); 149 if (FAILED(hr) || !pRange) return S_OK; 150 151 const LONG MAXREAD = 64; 152 LONG shifted = 0; 153 pRange->ShiftStart(ec, -MAXREAD, &shifted, NULL); // span = [caret-MAXREAD .. caret] 154 155 WCHAR buf[MAXREAD + 1]; 156 ULONG cch = 0; 157 hr = pRange->GetText(ec, 0, buf, MAXREAD, &cch); 158 if (FAILED(hr)) { pRange->Release(); return S_OK; } 159 buf[cch] = 0; 160 161 // Locate the last word: skip trailing separators (the space/Enter just typed), then walk back. 162 LONG end = (LONG)cch; 163 LONG wordEnd = end; 164 while (wordEnd > 0 && IsSep(buf[wordEnd - 1])) wordEnd--; 165 LONG wordStart = wordEnd; 166 while (wordStart > 0 && !IsSep(buf[wordStart - 1])) wordStart--; 167 LONG wordLen = wordEnd - wordStart; 168 if (wordLen <= 0) { pRange->Release(); return S_OK; } 169 170 std::wstring word(buf + wordStart, buf + wordEnd); 171 std::wstring leftCtx(buf, buf + wordStart); // text before the word = left context 172 std::wstring fixed = BrainCorrect(word, leftCtx); // real brain via the C# bridge 173 if (fixed.empty()) fixed = CorrectWord(word); // bridge down -> built-in typo table 174 if (fixed.empty() || fixed == word) { pRange->Release(); return S_OK; } 175 176 // Build a range covering exactly [wordStart, wordEnd) within the read span and replace it. 177 ITfRange* pWord = NULL; 178 if (SUCCEEDED(pRange->Clone(&pWord)) && pWord) 179 { 180 LONG s1 = 0, s2 = 0; 181 pWord->ShiftStart(ec, wordStart, &s1, NULL); // move start forward to word 182 pWord->ShiftEnd(ec, -(LONG)(cch - wordEnd), &s2, NULL); // move end back to word end 183 pWord->SetText(ec, 0, fixed.c_str(), (LONG)fixed.length()); 184 pWord->Release(); 185 } 186 pRange->Release(); 187 return S_OK; 188 } 189 190private: 191 ~CEditSession() 192 { 193 _pContext->Release(); 194 InterlockedDecrement(&g_cDllRef); 195 } 196 LONG _cRef; 197 ITfContext* _pContext; 198}; 199 200// --------------------------------------------------------------------------------------------------- 201// The text service: ITfTextInputProcessorEx + ITfKeyEventSink. 202// --------------------------------------------------------------------------------------------------- 203class CWCTextService : public ITfTextInputProcessorEx, 204 public ITfKeyEventSink 205{ 206public: 207 CWCTextService() : _cRef(1), _pThreadMgr(NULL), _clientId(TF_CLIENTID_NULL) 208 { 209 InterlockedIncrement(&g_cDllRef); 210 } 211 212 // IUnknown 213 STDMETHODIMP QueryInterface(REFIID riid, void** ppv) 214 { 215 if (!ppv) return E_INVALIDARG; 216 if (IsEqualIID(riid, IID_IUnknown) || 217 IsEqualIID(riid, IID_ITfTextInputProcessor) || 218 IsEqualIID(riid, IID_ITfTextInputProcessorEx)) 219 *ppv = (ITfTextInputProcessorEx*)this; 220 else if (IsEqualIID(riid, IID_ITfKeyEventSink)) 221 *ppv = (ITfKeyEventSink*)this; 222 else { *ppv = NULL; return E_NOINTERFACE; } 223 AddRef(); 224 return S_OK; 225 } 226 STDMETHODIMP_(ULONG) AddRef() { return InterlockedIncrement(&_cRef); } 227 STDMETHODIMP_(ULONG) Release() 228 { 229 LONG c = InterlockedDecrement(&_cRef); 230 if (c == 0) delete this; 231 return c; 232 } 233 234 // ITfTextInputProcessor 235 STDMETHODIMP Activate(ITfThreadMgr* ptim, TfClientId tid) { return ActivateEx(ptim, tid, 0); } 236 STDMETHODIMP Deactivate() 237 { 238 if (_pThreadMgr) 239 { 240 ITfKeystrokeMgr* pksm = NULL; 241 if (SUCCEEDED(_pThreadMgr->QueryInterface(IID_ITfKeystrokeMgr, (void**)&pksm)) && pksm) 242 { 243 pksm->UnadviseKeyEventSink(_clientId); 244 pksm->Release(); 245 } 246 _pThreadMgr->Release(); 247 _pThreadMgr = NULL; 248 } 249 _clientId = TF_CLIENTID_NULL; 250 return S_OK; 251 } 252 253 // ITfTextInputProcessorEx 254 STDMETHODIMP ActivateEx(ITfThreadMgr* ptim, TfClientId tid, DWORD /*dwFlags*/) 255 { 256 _pThreadMgr = ptim; 257 _pThreadMgr->AddRef(); 258 _clientId = tid; 259 260 ITfKeystrokeMgr* pksm = NULL; 261 if (SUCCEEDED(_pThreadMgr->QueryInterface(IID_ITfKeystrokeMgr, (void**)&pksm)) && pksm) 262 { 263 pksm->AdviseKeyEventSink(_clientId, (ITfKeyEventSink*)this, TRUE); 264 pksm->Release(); 265 } 266 return S_OK; 267 } 268 269 // ITfKeyEventSink — we never EAT keys; we let them through and correct the finished word. 270 STDMETHODIMP OnSetFocus(BOOL /*fForeground*/) { return S_OK; } 271 STDMETHODIMP OnTestKeyDown(ITfContext* /*pic*/, WPARAM /*w*/, LPARAM /*l*/, BOOL* pfEaten) { *pfEaten = FALSE; return S_OK; } 272 STDMETHODIMP OnTestKeyUp(ITfContext* /*pic*/, WPARAM /*w*/, LPARAM /*l*/, BOOL* pfEaten) { *pfEaten = FALSE; return S_OK; } 273 STDMETHODIMP OnKeyUp(ITfContext* /*pic*/, WPARAM /*w*/, LPARAM /*l*/, BOOL* pfEaten) { *pfEaten = FALSE; return S_OK; } 274 STDMETHODIMP OnPreservedKey(ITfContext* /*pic*/, REFGUID /*rguid*/, BOOL* pfEaten) { *pfEaten = FALSE; return S_OK; } 275 276 STDMETHODIMP OnKeyDown(ITfContext* pic, WPARAM wParam, LPARAM /*lParam*/, BOOL* pfEaten) 277 { 278 *pfEaten = FALSE; // do not consume the key — the boundary char is still typed normally 279 if (_pThreadMgr && pic && (wParam == VK_SPACE || wParam == VK_RETURN)) 280 { 281 CEditSession* pes = new CEditSession(pic); 282 HRESULT hrSession; 283 // Async R/W session: runs after the boundary char is committed, then fixes the word. 284 pic->RequestEditSession(_clientId, pes, TF_ES_ASYNCDONTCARE | TF_ES_READWRITE, &hrSession); 285 pes->Release(); 286 } 287 return S_OK; 288 } 289 290private: 291 ~CWCTextService() 292 { 293 if (_pThreadMgr) _pThreadMgr->Release(); 294 InterlockedDecrement(&g_cDllRef); 295 } 296 LONG _cRef; 297 ITfThreadMgr* _pThreadMgr; 298 TfClientId _clientId; 299}; 300 301// --------------------------------------------------------------------------------------------------- 302// Class factory. 303// --------------------------------------------------------------------------------------------------- 304class CClassFactory : public IClassFactory 305{ 306public: 307 CClassFactory() : _cRef(1) { InterlockedIncrement(&g_cDllRef); } 308 309 STDMETHODIMP QueryInterface(REFIID riid, void** ppv) 310 { 311 if (!ppv) return E_INVALIDARG; 312 if (IsEqualIID(riid, IID_IUnknown) || IsEqualIID(riid, IID_IClassFactory)) 313 *ppv = (IClassFactory*)this; 314 else { *ppv = NULL; return E_NOINTERFACE; } 315 AddRef(); 316 return S_OK; 317 } 318 STDMETHODIMP_(ULONG) AddRef() { return InterlockedIncrement(&_cRef); } 319 STDMETHODIMP_(ULONG) Release() 320 { 321 LONG c = InterlockedDecrement(&_cRef); 322 if (c == 0) { InterlockedDecrement(&g_cDllRef); delete this; } 323 return c; 324 } 325 326 STDMETHODIMP CreateInstance(IUnknown* pUnkOuter, REFIID riid, void** ppv) 327 { 328 if (ppv) *ppv = NULL; 329 if (pUnkOuter) return CLASS_E_NOAGGREGATION; 330 CWCTextService* p = new CWCTextService(); 331 if (!p) return E_OUTOFMEMORY; 332 HRESULT hr = p->QueryInterface(riid, ppv); 333 p->Release(); 334 return hr; 335 } 336 STDMETHODIMP LockServer(BOOL fLock) 337 { 338 if (fLock) InterlockedIncrement(&g_cDllRef); 339 else InterlockedDecrement(&g_cDllRef); 340 return S_OK; 341 } 342 343private: 344 ~CClassFactory() {} 345 LONG _cRef; 346}; 347 348// --------------------------------------------------------------------------------------------------- 349// Registration (per-user, HKCU — no admin needed). 350// --------------------------------------------------------------------------------------------------- 351static std::wstring ClsidString() 352{ 353 WCHAR s[64] = {0}; 354 StringFromGUID2(CLSID_WCTextService, s, 64); 355 return std::wstring(s); 356} 357 358static HRESULT RegisterInprocServer() 359{ 360 WCHAR dll[MAX_PATH] = {0}; 361 GetModuleFileNameW(g_hInst, dll, MAX_PATH); 362 363 std::wstring base = L"Software\\Classes\\CLSID\\" + ClsidString(); 364 std::wstring inproc = base + L"\\InprocServer32"; 365 366 HKEY hk = NULL; 367 if (RegCreateKeyExW(HKEY_CURRENT_USER, inproc.c_str(), 0, NULL, 0, KEY_WRITE, NULL, &hk, NULL) != ERROR_SUCCESS) 368 return E_FAIL; 369 RegSetValueExW(hk, NULL, 0, REG_SZ, (const BYTE*)dll, (DWORD)((wcslen(dll) + 1) * sizeof(WCHAR))); 370 const WCHAR* model = L"Apartment"; 371 RegSetValueExW(hk, L"ThreadingModel", 0, REG_SZ, (const BYTE*)model, (DWORD)((wcslen(model) + 1) * sizeof(WCHAR))); 372 RegCloseKey(hk); 373 374 // Friendly name on the CLSID key (optional). 375 HKEY hkBase = NULL; 376 if (RegCreateKeyExW(HKEY_CURRENT_USER, base.c_str(), 0, NULL, 0, KEY_WRITE, NULL, &hkBase, NULL) == ERROR_SUCCESS) 377 { 378 const WCHAR* name = L"WindowCapture Autocorrect"; 379 RegSetValueExW(hkBase, NULL, 0, REG_SZ, (const BYTE*)name, (DWORD)((wcslen(name) + 1) * sizeof(WCHAR))); 380 RegCloseKey(hkBase); 381 } 382 return S_OK; 383} 384 385static void UnregisterInprocServer() 386{ 387 std::wstring base = L"Software\\Classes\\CLSID\\" + ClsidString(); 388 RegDeleteTreeW(HKEY_CURRENT_USER, base.c_str()); 389} 390 391STDAPI DllRegisterServer() 392{ 393 HRESULT hr = RegisterInprocServer(); 394 if (FAILED(hr)) return hr; 395 396 BOOL didInit = SUCCEEDED(CoInitializeEx(NULL, COINIT_APARTMENTTHREADED)); 397 398 ITfInputProcessorProfiles* pProfiles = NULL; 399 if (SUCCEEDED(CoCreateInstance(CLSID_TF_InputProcessorProfiles, NULL, CLSCTX_INPROC_SERVER, 400 IID_ITfInputProcessorProfiles, (void**)&pProfiles)) && pProfiles) 401 { 402 pProfiles->Register(CLSID_WCTextService); 403 const WCHAR* desc = L"WindowCapture Autocorrect"; 404 pProfiles->AddLanguageProfile(CLSID_WCTextService, 405 MAKELANGID(LANG_RUSSIAN, SUBLANG_RUSSIAN_RUSSIA), 406 GUID_WCProfile, desc, (ULONG)wcslen(desc), NULL, 0, 0); 407 pProfiles->Release(); 408 } 409 410 ITfCategoryMgr* pCat = NULL; 411 if (SUCCEEDED(CoCreateInstance(CLSID_TF_CategoryMgr, NULL, CLSCTX_INPROC_SERVER, 412 IID_ITfCategoryMgr, (void**)&pCat)) && pCat) 413 { 414 pCat->RegisterCategory(CLSID_WCTextService, GUID_TFCAT_TIP_KEYBOARD, CLSID_WCTextService); 415 pCat->Release(); 416 } 417 418 if (didInit) CoUninitialize(); 419 return S_OK; 420} 421 422STDAPI DllUnregisterServer() 423{ 424 BOOL didInit = SUCCEEDED(CoInitializeEx(NULL, COINIT_APARTMENTTHREADED)); 425 426 ITfCategoryMgr* pCat = NULL; 427 if (SUCCEEDED(CoCreateInstance(CLSID_TF_CategoryMgr, NULL, CLSCTX_INPROC_SERVER, 428 IID_ITfCategoryMgr, (void**)&pCat)) && pCat) 429 { 430 pCat->UnregisterCategory(CLSID_WCTextService, GUID_TFCAT_TIP_KEYBOARD, CLSID_WCTextService); 431 pCat->Release(); 432 } 433 434 ITfInputProcessorProfiles* pProfiles = NULL; 435 if (SUCCEEDED(CoCreateInstance(CLSID_TF_InputProcessorProfiles, NULL, CLSCTX_INPROC_SERVER, 436 IID_ITfInputProcessorProfiles, (void**)&pProfiles)) && pProfiles) 437 { 438 pProfiles->Unregister(CLSID_WCTextService); 439 pProfiles->Release(); 440 } 441 442 if (didInit) CoUninitialize(); 443 UnregisterInprocServer(); 444 return S_OK; 445} 446 447// --------------------------------------------------------------------------------------------------- 448// Standard in-proc COM exports. 449// --------------------------------------------------------------------------------------------------- 450STDAPI DllGetClassObject(REFCLSID rclsid, REFIID riid, void** ppv) 451{ 452 if (ppv) *ppv = NULL; 453 if (!IsEqualCLSID(rclsid, CLSID_WCTextService)) return CLASS_E_CLASSNOTAVAILABLE; 454 CClassFactory* p = new CClassFactory(); 455 if (!p) return E_OUTOFMEMORY; 456 HRESULT hr = p->QueryInterface(riid, ppv); 457 p->Release(); 458 return hr; 459} 460 461STDAPI DllCanUnloadNow() 462{ 463 return (g_cDllRef <= 0) ? S_OK : S_FALSE; 464} 465 466BOOL WINAPI DllMain(HINSTANCE hInst, DWORD reason, LPVOID /*reserved*/) 467{ 468 if (reason == DLL_PROCESS_ATTACH) 469 { 470 g_hInst = hInst; 471 DisableThreadLibraryCalls(hInst); 472 } 473 return TRUE; 474}