windowcapture
исходный код / Tip/Correct.h

Correct.h

51 строк · 1,981 байт · модуль Tip
 1// Correct.h — the TIP's correction "brain", isolated so the headless self-test can exercise it.
 2// M1: a tiny built-in typo table (proves the TSF in-place mechanism end to end).
 3// M2: replace wctip::CorrectWord with a localhost call to the real brain
 4//     (C# noisy-channel SpellScore + rubert-tiny2 context rescorer over the warm server).
 5#pragma once
 6#include <string>
 7
 8namespace wctip {
 9
10inline std::wstring ToLowerRu(const std::wstring& s)
11{
12    std::wstring r = s;
13    for (size_t i = 0; i < r.size(); ++i)
14    {
15        wchar_t c = r[i];
16        if (c >= L'A' && c <= L'Z') r[i] = (wchar_t)(c - L'A' + L'a');
17        else if (c >= 0x0410 && c <= 0x042F) r[i] = (wchar_t)(c + 0x20);  // А..Я -> а..я
18        else if (c == 0x0401) r[i] = 0x0451;                              // Ё -> ё
19    }
20    return r;
21}
22
23inline bool IsSep(wchar_t c)
24{
25    return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r' ||
26           c == L'.' || c == L',' || c == L'!' || c == L'?' || c == L';' || c == L':';
27}
28
29// Returns the correction, or L"" if the word should be left unchanged.
30inline std::wstring CorrectWord(const std::wstring& word)
31{
32    struct Pair { const wchar_t* bad; const wchar_t* good; };
33    static const Pair TABLE[] = {
34        { L"превет",      L"привет" },
35        { L"пливет",      L"привет" },
36        { L"здраствуй",   L"здравствуй" },
37        { L"здраствуйте", L"здравствуйте" },
38        { L"карова",      L"корова" },
39        { L"сабака",      L"собака" },
40        { L"малако",      L"молоко" },
41        { L"симпотичный", L"симпатичный" },
42        { L"тестовае",    L"тестовое" },
43    };
44    std::wstring low = ToLowerRu(word);
45    for (size_t i = 0; i < sizeof(TABLE) / sizeof(TABLE[0]); ++i)
46        if (low == TABLE[i].bad)
47            return std::wstring(TABLE[i].good);
48    return L"";
49}
50
51} // namespace wctip