Skip to main content

cersei_compression/
toml_rules.rs

1//! Declarative command-output filter engine.
2//!
3//! Each filter is a pipeline of stages (strip_ansi → replace → match_output
4//! → strip/keep_lines → truncate_lines_at → head/tail_lines → max_lines →
5//! on_empty). Filters are keyed by a regex over the invoking command (the
6//! first word(s) of a Bash command — e.g. `^git\s+log`).
7//!
8//! Credits: adapted from rtk (Rust Token Killer) — `rtk/src/core/toml_filter.rs`.
9//! Trust-gating, disk lookup, and per-filter inline tests from rtk have been
10//! removed — this build only consumes compile-time-embedded rules.
11//! MIT © Patrick Szymkowiak. See LICENSE.
12
13use once_cell::sync::Lazy;
14use regex::{Regex, RegexSet};
15use serde::Deserialize;
16use std::collections::BTreeMap;
17
18use crate::ansi;
19
20// ─── Embedded rules ────────────────────────────────────────────────────────
21
22/// Every `src/rules/*.toml` file, parsed independently. Order here is
23/// authoritative: earlier entries (`git`, `cargo`, …) win over the `generic`
24/// catch-all at lookup time.
25const BUILTIN_RULE_FILES: &[(&str, &str)] = &[
26    ("git", include_str!("rules/git.toml")),
27    ("cargo", include_str!("rules/cargo.toml")),
28    ("npm", include_str!("rules/npm.toml")),
29    ("pnpm", include_str!("rules/pnpm.toml")),
30    ("pytest", include_str!("rules/pytest.toml")),
31    ("docker", include_str!("rules/docker.toml")),
32    ("generic", include_str!("rules/generic.toml")),
33];
34
35// ─── TOML schema ────────────────────────────────────────────────────────────
36
37#[derive(Deserialize)]
38#[serde(deny_unknown_fields)]
39struct MatchOutputRule {
40    pattern: String,
41    message: String,
42    #[serde(default)]
43    unless: Option<String>,
44}
45
46#[derive(Deserialize)]
47#[serde(deny_unknown_fields)]
48struct ReplaceRule {
49    pattern: String,
50    replacement: String,
51}
52
53#[derive(Deserialize)]
54struct RuleFile {
55    schema_version: u32,
56    #[serde(default)]
57    filters: BTreeMap<String, FilterDef>,
58}
59
60#[derive(Deserialize)]
61#[serde(deny_unknown_fields)]
62struct FilterDef {
63    #[allow(dead_code)]
64    description: Option<String>,
65    match_command: String,
66    #[serde(default)]
67    strip_ansi: bool,
68    #[serde(default)]
69    replace: Vec<ReplaceRule>,
70    #[serde(default)]
71    match_output: Vec<MatchOutputRule>,
72    #[serde(default)]
73    strip_lines_matching: Vec<String>,
74    #[serde(default)]
75    keep_lines_matching: Vec<String>,
76    truncate_lines_at: Option<usize>,
77    head_lines: Option<usize>,
78    tail_lines: Option<usize>,
79    max_lines: Option<usize>,
80    on_empty: Option<String>,
81}
82
83// ─── Compiled form ──────────────────────────────────────────────────────────
84
85#[derive(Debug)]
86struct CompiledMatchOutput {
87    pattern: Regex,
88    message: String,
89    unless: Option<Regex>,
90}
91
92#[derive(Debug)]
93struct CompiledReplace {
94    pattern: Regex,
95    replacement: String,
96}
97
98#[derive(Debug)]
99enum LineFilter {
100    None,
101    Strip(RegexSet),
102    Keep(RegexSet),
103}
104
105#[derive(Debug)]
106pub struct CompiledFilter {
107    #[allow(dead_code)]
108    pub name: String,
109    match_regex: Regex,
110    strip_ansi: bool,
111    replace: Vec<CompiledReplace>,
112    match_output: Vec<CompiledMatchOutput>,
113    line_filter: LineFilter,
114    truncate_lines_at: Option<usize>,
115    head_lines: Option<usize>,
116    tail_lines: Option<usize>,
117    max_lines: Option<usize>,
118    on_empty: Option<String>,
119}
120
121// ─── Loader ─────────────────────────────────────────────────────────────────
122
123fn compile(name: String, def: FilterDef) -> Result<CompiledFilter, String> {
124    if !def.strip_lines_matching.is_empty() && !def.keep_lines_matching.is_empty() {
125        return Err("strip_lines_matching and keep_lines_matching are mutually exclusive".into());
126    }
127    let match_regex =
128        Regex::new(&def.match_command).map_err(|e| format!("invalid match_command regex: {e}"))?;
129
130    let replace = def
131        .replace
132        .into_iter()
133        .map(|r| {
134            let pat = r.pattern.clone();
135            Regex::new(&r.pattern)
136                .map(|pattern| CompiledReplace {
137                    pattern,
138                    replacement: r.replacement,
139                })
140                .map_err(|e| format!("invalid replace '{pat}': {e}"))
141        })
142        .collect::<Result<Vec<_>, _>>()?;
143
144    let match_output = def
145        .match_output
146        .into_iter()
147        .map(|r| -> Result<CompiledMatchOutput, String> {
148            let pat = r.pattern.clone();
149            let pattern =
150                Regex::new(&r.pattern).map_err(|e| format!("invalid match_output '{pat}': {e}"))?;
151            let unless = r
152                .unless
153                .as_deref()
154                .map(|u| {
155                    Regex::new(u).map_err(|e| format!("invalid match_output unless '{u}': {e}"))
156                })
157                .transpose()?;
158            Ok(CompiledMatchOutput {
159                pattern,
160                message: r.message,
161                unless,
162            })
163        })
164        .collect::<Result<Vec<_>, _>>()?;
165
166    let line_filter = if !def.strip_lines_matching.is_empty() {
167        let set = RegexSet::new(&def.strip_lines_matching)
168            .map_err(|e| format!("invalid strip_lines_matching: {e}"))?;
169        LineFilter::Strip(set)
170    } else if !def.keep_lines_matching.is_empty() {
171        let set = RegexSet::new(&def.keep_lines_matching)
172            .map_err(|e| format!("invalid keep_lines_matching: {e}"))?;
173        LineFilter::Keep(set)
174    } else {
175        LineFilter::None
176    };
177
178    Ok(CompiledFilter {
179        name,
180        match_regex,
181        strip_ansi: def.strip_ansi,
182        replace,
183        match_output,
184        line_filter,
185        truncate_lines_at: def.truncate_lines_at,
186        head_lines: def.head_lines,
187        tail_lines: def.tail_lines,
188        max_lines: def.max_lines,
189        on_empty: def.on_empty,
190    })
191}
192
193fn parse_and_compile(content: &str, source: &str) -> Result<Vec<CompiledFilter>, String> {
194    let file: RuleFile =
195        toml::from_str(content).map_err(|e| format!("TOML parse error in {source}: {e}"))?;
196    if file.schema_version != 1 {
197        return Err(format!(
198            "unsupported schema_version {} in {source} (expected 1)",
199            file.schema_version
200        ));
201    }
202    let mut out = Vec::new();
203    for (name, def) in file.filters {
204        match compile(name.clone(), def) {
205            Ok(f) => out.push(f),
206            Err(e) => tracing::warn!("compression: filter '{name}' in {source}: {e}"),
207        }
208    }
209    Ok(out)
210}
211
212static REGISTRY: Lazy<Vec<CompiledFilter>> = Lazy::new(|| {
213    let mut out = Vec::new();
214    for (source, content) in BUILTIN_RULE_FILES {
215        match parse_and_compile(content, source) {
216            Ok(f) => out.extend(f),
217            Err(e) => tracing::warn!("compression: builtin rules '{source}' failed: {e}"),
218        }
219    }
220    out
221});
222
223/// Look up the first filter matching `command`. O(N) over a small list.
224pub fn find_matching(command: &str) -> Option<&'static CompiledFilter> {
225    REGISTRY.iter().find(|f| f.match_regex.is_match(command))
226}
227
228// ─── Pipeline ───────────────────────────────────────────────────────────────
229
230pub fn apply(filter: &CompiledFilter, stdout: &str) -> String {
231    let mut lines: Vec<String> = stdout.lines().map(String::from).collect();
232
233    // 1. strip_ansi
234    if filter.strip_ansi {
235        lines = lines.into_iter().map(|l| ansi::strip_ansi(&l)).collect();
236    }
237
238    // 2. replace (line-by-line, chainable)
239    if !filter.replace.is_empty() {
240        lines = lines
241            .into_iter()
242            .map(|mut line| {
243                for rule in &filter.replace {
244                    line = rule
245                        .pattern
246                        .replace_all(&line, rule.replacement.as_str())
247                        .into_owned();
248                }
249                line
250            })
251            .collect();
252    }
253
254    // 3. match_output (short-circuit)
255    if !filter.match_output.is_empty() {
256        let blob = lines.join("\n");
257        for rule in &filter.match_output {
258            if rule.pattern.is_match(&blob) {
259                if let Some(ref u) = rule.unless {
260                    if u.is_match(&blob) {
261                        continue;
262                    }
263                }
264                return rule.message.clone();
265            }
266        }
267    }
268
269    // 4. strip / keep
270    match &filter.line_filter {
271        LineFilter::Strip(set) => lines.retain(|l| !set.is_match(l)),
272        LineFilter::Keep(set) => lines.retain(|l| set.is_match(l)),
273        LineFilter::None => {}
274    }
275
276    // 5. truncate_lines_at
277    if let Some(n) = filter.truncate_lines_at {
278        lines = lines.into_iter().map(|l| ansi::truncate(&l, n)).collect();
279    }
280
281    // 6. head / tail
282    let total = lines.len();
283    match (filter.head_lines, filter.tail_lines) {
284        (Some(h), Some(t)) if total > h + t => {
285            let mut r = lines[..h].to_vec();
286            r.push(format!("... ({} lines omitted)", total - h - t));
287            r.extend_from_slice(&lines[total - t..]);
288            lines = r;
289        }
290        (Some(h), None) if total > h => {
291            lines.truncate(h);
292            lines.push(format!("... ({} lines omitted)", total - h));
293        }
294        (None, Some(t)) if total > t => {
295            let omitted = total - t;
296            lines = lines[omitted..].to_vec();
297            lines.insert(0, format!("... ({omitted} lines omitted)"));
298        }
299        _ => {}
300    }
301
302    // 7. max_lines
303    if let Some(max) = filter.max_lines {
304        if lines.len() > max {
305            let truncated = lines.len() - max;
306            lines.truncate(max);
307            lines.push(format!("... ({truncated} lines truncated)"));
308        }
309    }
310
311    // 8. on_empty
312    let result = lines.join("\n");
313    if result.trim().is_empty() {
314        if let Some(ref msg) = filter.on_empty {
315            return msg.clone();
316        }
317    }
318    result
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    fn mk(toml: &str) -> Vec<CompiledFilter> {
326        parse_and_compile(toml, "test").expect("valid test toml")
327    }
328
329    #[test]
330    fn builtin_rules_compile() {
331        let mut total = 0;
332        for (source, content) in BUILTIN_RULE_FILES {
333            let out = parse_and_compile(content, source)
334                .unwrap_or_else(|e| panic!("{source} failed: {e}"));
335            assert!(!out.is_empty(), "{source} had zero filters");
336            total += out.len();
337        }
338        assert!(total >= 7);
339    }
340
341    #[test]
342    fn short_circuit_match_output() {
343        let f = mk(r#"
344schema_version = 1
345[filters.f]
346match_command = "^x"
347match_output = [ { pattern = "Already on", message = "ok" } ]
348"#);
349        assert_eq!(apply(&f[0], "Already on 'main'"), "ok");
350    }
351
352    #[test]
353    fn strip_lines_and_ansi() {
354        let f = mk(r#"
355schema_version = 1
356[filters.f]
357match_command = "^x"
358strip_ansi = true
359strip_lines_matching = ["^noise"]
360"#);
361        let out = apply(&f[0], "\x1b[31mkeep\x1b[0m\nnoise line\nalso keep");
362        assert_eq!(out, "keep\nalso keep");
363    }
364
365    #[test]
366    fn head_tail_collapses_middle() {
367        let f = mk(r#"
368schema_version = 1
369[filters.f]
370match_command = "^x"
371head_lines = 2
372tail_lines = 2
373"#);
374        let src = "a\nb\nc\nd\ne\nf";
375        let out = apply(&f[0], src);
376        assert!(out.starts_with("a\nb\n"));
377        assert!(out.contains("2 lines omitted"));
378        assert!(out.ends_with("e\nf"));
379    }
380
381    #[test]
382    fn builtin_git_log_matches() {
383        let hit = find_matching("git log --oneline -20");
384        assert!(hit.is_some(), "git rule should match `git log`");
385    }
386
387    #[test]
388    fn builtin_cargo_build_matches() {
389        let hit = find_matching("cargo build --release");
390        assert!(hit.is_some(), "cargo rule should match `cargo build`");
391    }
392}