|
1 | 1 | #!/usr/bin/env python3 |
2 | 2 |
|
| 3 | +import json |
| 4 | +import os |
| 5 | +import re |
| 6 | +import tempfile |
| 7 | + |
| 8 | +MAX_ENTRIES = 200 |
| 9 | + |
3 | 10 |
|
4 | 11 | class CorrectionStore: |
5 | 12 | def __init__(self, path: str): |
6 | 13 | self.path = path |
| 14 | + self._cache = None # loaded lazily |
| 15 | + |
| 16 | + def _get(self): |
| 17 | + if self._cache is None: |
| 18 | + self._cache = self.load() |
| 19 | + return self._cache |
7 | 20 |
|
8 | 21 | def load(self): |
| 22 | + if not os.path.exists(self.path): |
| 23 | + return {} |
| 24 | + try: |
| 25 | + with open(self.path, encoding="utf-8") as f: |
| 26 | + data = json.load(f) |
| 27 | + if isinstance(data, dict): |
| 28 | + return data |
| 29 | + except Exception: |
| 30 | + pass |
9 | 31 | return {} |
10 | 32 |
|
11 | 33 | def save(self, wrong: str, right: str): |
12 | | - return None |
| 34 | + wrong = (wrong or "").strip() |
| 35 | + right = (right or "").strip() |
| 36 | + if not wrong or not right: |
| 37 | + return |
| 38 | + # Only store if strings differ beyond case |
| 39 | + if wrong.lower() == right.lower(): |
| 40 | + return |
| 41 | + # Skip very short strings that are too likely to cause false replacements |
| 42 | + if len(wrong.split()) < 2 and len(wrong) < 6: |
| 43 | + return |
| 44 | + key = wrong.lower() |
| 45 | + store = self._get() |
| 46 | + # If already stored with same mapping, nothing to do |
| 47 | + if store.get(key) == right: |
| 48 | + return |
| 49 | + store[key] = right |
| 50 | + # Evict oldest entries beyond limit |
| 51 | + if len(store) > MAX_ENTRIES: |
| 52 | + excess = len(store) - MAX_ENTRIES |
| 53 | + for old_key in list(store.keys())[:excess]: |
| 54 | + del store[old_key] |
| 55 | + self._write(store) |
13 | 56 |
|
14 | 57 | def apply(self, text: str) -> str: |
| 58 | + if not text: |
| 59 | + return text |
| 60 | + store = self._get() |
| 61 | + if not store: |
| 62 | + return text |
| 63 | + # Sort by key length descending so longer phrases match first |
| 64 | + for key in sorted(store.keys(), key=len, reverse=True): |
| 65 | + replacement = store[key] |
| 66 | + # Case-insensitive exact word/phrase boundary match |
| 67 | + try: |
| 68 | + pattern = re.compile(r"(?<!\w)" + re.escape(key) + r"(?!\w)", re.IGNORECASE) |
| 69 | + text = pattern.sub(replacement, text) |
| 70 | + except re.error: |
| 71 | + continue |
15 | 72 | return text |
| 73 | + |
| 74 | + def _write(self, store): |
| 75 | + dir_path = os.path.dirname(self.path) or "." |
| 76 | + try: |
| 77 | + fd, tmp = tempfile.mkstemp(dir=dir_path, suffix=".tmp") |
| 78 | + with os.fdopen(fd, "w", encoding="utf-8") as f: |
| 79 | + json.dump(store, f, ensure_ascii=False, indent=2) |
| 80 | + os.replace(tmp, self.path) |
| 81 | + except Exception: |
| 82 | + pass |
0 commit comments