Skip to main content

lean_ctx/core/terse/
dictionaries.rs

1//! Domain-specific abbreviation dictionaries for terse compression.
2//!
3//! Each dictionary provides whole-word-matching abbreviations for a specific
4//! domain (git, cargo, npm, general). Unlike the legacy ABBREVIATIONS list
5//! (18 blind substring replacements), these use word-boundary-aware matching.
6
7/// A single abbreviation rule: replaces `long` with `short` at word boundaries.
8pub struct Abbreviation {
9    pub long: &'static str,
10    pub short: &'static str,
11}
12
13pub const GENERAL: &[Abbreviation] = &[
14    Abbreviation {
15        long: "function",
16        short: "fn",
17    },
18    Abbreviation {
19        long: "configuration",
20        short: "cfg",
21    },
22    Abbreviation {
23        long: "implementation",
24        short: "impl",
25    },
26    Abbreviation {
27        long: "dependencies",
28        short: "deps",
29    },
30    Abbreviation {
31        long: "dependency",
32        short: "dep",
33    },
34    Abbreviation {
35        long: "request",
36        short: "req",
37    },
38    Abbreviation {
39        long: "response",
40        short: "res",
41    },
42    Abbreviation {
43        long: "context",
44        short: "ctx",
45    },
46    Abbreviation {
47        long: "error",
48        short: "err",
49    },
50    Abbreviation {
51        long: "return",
52        short: "ret",
53    },
54    Abbreviation {
55        long: "argument",
56        short: "arg",
57    },
58    Abbreviation {
59        long: "value",
60        short: "val",
61    },
62    Abbreviation {
63        long: "module",
64        short: "mod",
65    },
66    Abbreviation {
67        long: "package",
68        short: "pkg",
69    },
70    Abbreviation {
71        long: "directory",
72        short: "dir",
73    },
74    Abbreviation {
75        long: "parameter",
76        short: "param",
77    },
78    Abbreviation {
79        long: "variable",
80        short: "var",
81    },
82    Abbreviation {
83        long: "information",
84        short: "info",
85    },
86    Abbreviation {
87        long: "application",
88        short: "app",
89    },
90    Abbreviation {
91        long: "environment",
92        short: "env",
93    },
94    Abbreviation {
95        long: "repository",
96        short: "repo",
97    },
98    Abbreviation {
99        long: "authentication",
100        short: "auth",
101    },
102    Abbreviation {
103        long: "authorization",
104        short: "authz",
105    },
106    Abbreviation {
107        long: "description",
108        short: "desc",
109    },
110    Abbreviation {
111        long: "development",
112        short: "dev",
113    },
114    Abbreviation {
115        long: "production",
116        short: "prod",
117    },
118    Abbreviation {
119        long: "connection",
120        short: "conn",
121    },
122    Abbreviation {
123        long: "database",
124        short: "db",
125    },
126    Abbreviation {
127        long: "temporary",
128        short: "tmp",
129    },
130    Abbreviation {
131        long: "document",
132        short: "doc",
133    },
134    Abbreviation {
135        long: "maximum",
136        short: "max",
137    },
138    Abbreviation {
139        long: "minimum",
140        short: "min",
141    },
142    Abbreviation {
143        long: "number",
144        short: "num",
145    },
146    Abbreviation {
147        long: "reference",
148        short: "ref",
149    },
150    Abbreviation {
151        long: "string",
152        short: "str",
153    },
154    Abbreviation {
155        long: "message",
156        short: "msg",
157    },
158    Abbreviation {
159        long: "command",
160        short: "cmd",
161    },
162    Abbreviation {
163        long: "expression",
164        short: "expr",
165    },
166    Abbreviation {
167        long: "iteration",
168        short: "iter",
169    },
170    Abbreviation {
171        long: "previous",
172        short: "prev",
173    },
174    Abbreviation {
175        long: "current",
176        short: "cur",
177    },
178    Abbreviation {
179        long: "original",
180        short: "orig",
181    },
182    Abbreviation {
183        long: "destination",
184        short: "dst",
185    },
186    Abbreviation {
187        long: "source",
188        short: "src",
189    },
190    Abbreviation {
191        long: "attribute",
192        short: "attr",
193    },
194    Abbreviation {
195        long: "allocation",
196        short: "alloc",
197    },
198    Abbreviation {
199        long: "generation",
200        short: "gen",
201    },
202    Abbreviation {
203        long: "specification",
204        short: "spec",
205    },
206    Abbreviation {
207        long: "initialization",
208        short: "init",
209    },
210    Abbreviation {
211        long: "operation",
212        short: "op",
213    },
214    Abbreviation {
215        long: "optional",
216        short: "opt",
217    },
218    Abbreviation {
219        long: "utility",
220        short: "util",
221    },
222    Abbreviation {
223        long: "execution",
224        short: "exec",
225    },
226    Abbreviation {
227        long: "property",
228        short: "prop",
229    },
230    Abbreviation {
231        long: "statistics",
232        short: "stats",
233    },
234    Abbreviation {
235        long: "accumulator",
236        short: "acc",
237    },
238    Abbreviation {
239        long: "synchronize",
240        short: "sync",
241    },
242    Abbreviation {
243        long: "asynchronous",
244        short: "async",
245    },
246    Abbreviation {
247        long: "certificate",
248        short: "cert",
249    },
250    Abbreviation {
251        long: "identifier",
252        short: "id",
253    },
254];
255
256pub const GIT: &[Abbreviation] = &[
257    Abbreviation {
258        long: "modified",
259        short: "M",
260    },
261    Abbreviation {
262        long: "deleted",
263        short: "D",
264    },
265    Abbreviation {
266        long: "untracked",
267        short: "?",
268    },
269    Abbreviation {
270        long: "renamed",
271        short: "R",
272    },
273    Abbreviation {
274        long: "copied",
275        short: "C",
276    },
277    Abbreviation {
278        long: "insertion",
279        short: "+",
280    },
281    Abbreviation {
282        long: "deletion",
283        short: "-",
284    },
285    Abbreviation {
286        long: "commit",
287        short: "cmt",
288    },
289    Abbreviation {
290        long: "branch",
291        short: "br",
292    },
293    Abbreviation {
294        long: "rebase",
295        short: "rb",
296    },
297    Abbreviation {
298        long: "merge",
299        short: "mrg",
300    },
301    Abbreviation {
302        long: "checkout",
303        short: "co",
304    },
305    Abbreviation {
306        long: "stash",
307        short: "st",
308    },
309    Abbreviation {
310        long: "upstream",
311        short: "u/",
312    },
313    Abbreviation {
314        long: "origin",
315        short: "o/",
316    },
317    Abbreviation {
318        long: "detached",
319        short: "det",
320    },
321    Abbreviation {
322        long: "conflict",
323        short: "!!",
324    },
325    Abbreviation {
326        long: "changes not staged for commit",
327        short: "unstaged",
328    },
329    Abbreviation {
330        long: "Changes to be committed",
331        short: "staged",
332    },
333    Abbreviation {
334        long: "nothing to commit, working tree clean",
335        short: "clean",
336    },
337];
338
339pub const CARGO: &[Abbreviation] = &[
340    Abbreviation {
341        long: "Compiling",
342        short: "CC",
343    },
344    Abbreviation {
345        long: "Downloading",
346        short: "DL",
347    },
348    Abbreviation {
349        long: "Downloaded",
350        short: "DL'd",
351    },
352    Abbreviation {
353        long: "Finished",
354        short: "OK",
355    },
356    Abbreviation {
357        long: "warning",
358        short: "W",
359    },
360    Abbreviation {
361        long: "test result: ok",
362        short: "PASS",
363    },
364    Abbreviation {
365        long: "test result: FAILED",
366        short: "FAIL",
367    },
368    Abbreviation {
369        long: "running",
370        short: "run",
371    },
372    Abbreviation {
373        long: "Blocking waiting for file lock on package cache",
374        short: "LOCK",
375    },
376    Abbreviation {
377        long: "Updating crates.io index",
378        short: "IDX",
379    },
380    Abbreviation {
381        long: "target/debug",
382        short: "t/d",
383    },
384    Abbreviation {
385        long: "target/release",
386        short: "t/r",
387    },
388];
389
390pub const NPM: &[Abbreviation] = &[
391    Abbreviation {
392        long: "added",
393        short: "+",
394    },
395    Abbreviation {
396        long: "removed",
397        short: "-",
398    },
399    Abbreviation {
400        long: "packages",
401        short: "pkgs",
402    },
403    Abbreviation {
404        long: "vulnerabilities",
405        short: "vulns",
406    },
407    Abbreviation {
408        long: "deprecated",
409        short: "depr",
410    },
411    Abbreviation {
412        long: "node_modules",
413        short: "n_m",
414    },
415    Abbreviation {
416        long: "devDependencies",
417        short: "devDeps",
418    },
419    Abbreviation {
420        long: "peerDependencies",
421        short: "peerDeps",
422    },
423    Abbreviation {
424        long: "optionalDependencies",
425        short: "optDeps",
426    },
427    Abbreviation {
428        long: "npm warn",
429        short: "W",
430    },
431    Abbreviation {
432        long: "npm error",
433        short: "E",
434    },
435];
436
437/// Applies whole-word abbreviations from the given dictionaries to the text.
438/// Only replaces at word boundaries to avoid corrupting identifiers.
439pub fn apply_dictionaries(text: &str, level: DictLevel) -> String {
440    let dicts: Vec<&[Abbreviation]> = match level {
441        DictLevel::General => vec![GENERAL],
442        DictLevel::Full => vec![GENERAL, GIT, CARGO, NPM],
443    };
444
445    let mut result = text.to_string();
446    for dict in dicts {
447        for abbr in dict {
448            result = replace_whole_word(&result, abbr.long, abbr.short);
449        }
450    }
451    result
452}
453
454#[derive(Debug, Clone, Copy, PartialEq)]
455pub enum DictLevel {
456    General,
457    Full,
458}
459
460fn replace_whole_word(text: &str, pattern: &str, replacement: &str) -> String {
461    if pattern.is_empty() || !text.contains(pattern) {
462        return text.to_string();
463    }
464
465    let mut result = String::with_capacity(text.len());
466    let mut start = 0;
467
468    while let Some(pos) = text[start..].find(pattern) {
469        let abs_pos = start + pos;
470        let end_pos = abs_pos + pattern.len();
471
472        let before_ok = abs_pos == 0 || !text.as_bytes()[abs_pos - 1].is_ascii_alphanumeric();
473        let after_ok = end_pos >= text.len() || !text.as_bytes()[end_pos].is_ascii_alphanumeric();
474
475        result.push_str(&text[start..abs_pos]);
476
477        if before_ok && after_ok {
478            result.push_str(replacement);
479        } else {
480            result.push_str(pattern);
481        }
482        start = end_pos;
483    }
484    result.push_str(&text[start..]);
485    result
486}
487
488#[cfg(test)]
489mod tests {
490    use super::*;
491
492    #[test]
493    fn whole_word_replaces_standalone() {
494        let r = replace_whole_word("the function works", "function", "fn");
495        assert_eq!(r, "the fn works");
496    }
497
498    #[test]
499    fn whole_word_skips_substring() {
500        let r = replace_whole_word("dysfunction", "function", "fn");
501        assert_eq!(r, "dysfunction");
502    }
503
504    #[test]
505    fn whole_word_at_start() {
506        let r = replace_whole_word("function call", "function", "fn");
507        assert_eq!(r, "fn call");
508    }
509
510    #[test]
511    fn whole_word_at_end() {
512        let r = replace_whole_word("call function", "function", "fn");
513        assert_eq!(r, "call fn");
514    }
515
516    #[test]
517    fn whole_word_with_punctuation() {
518        let r = replace_whole_word("function(arg)", "function", "fn");
519        assert_eq!(r, "fn(arg)");
520    }
521
522    #[test]
523    fn general_dict_applies() {
524        let r = apply_dictionaries("the configuration directory", DictLevel::General);
525        assert!(r.contains("cfg"));
526        assert!(r.contains("dir"));
527    }
528
529    #[test]
530    fn full_dict_includes_domain() {
531        let r = apply_dictionaries("Compiling lean-ctx", DictLevel::Full);
532        assert!(r.contains("CC"), "cargo abbreviation should apply: {r}");
533    }
534
535    #[test]
536    fn dict_count_general() {
537        assert!(
538            GENERAL.len() >= 60,
539            "should have 60+ general abbreviations, got {}",
540            GENERAL.len()
541        );
542    }
543
544    #[test]
545    fn dict_count_git() {
546        assert!(
547            GIT.len() >= 15,
548            "should have 15+ git abbreviations, got {}",
549            GIT.len()
550        );
551    }
552}