Skip to main content

aft/compress/
toml_filter.rs

1//! Declarative TOML output filters for hoisted bash compression.
2//!
3//! TOML filters are a complement to the Rust `Compressor` modules. They cover
4//! the long tail of CLI tools whose output is amenable to simple
5//! strip + truncate + cap + shortcircuit pipelines without requiring stateful
6//! parsing or invocation rewrite.
7//!
8//! ## Pipeline
9//!
10//! For a matched filter, output flows through:
11//! 1. `[strip]` — drop lines matching any regex (compiled with multiline mode)
12//! 2. `[shortcircuit]` — if remaining content matches `when`, replace with `replacement`
13//!    (compiled without multiline mode; use `(?m)` explicitly for line anchors,
14//!    and use `\A...\z` for full-body anchors such as empty output)
15//! 3. `[truncate]` — middle-truncate lines longer than `line_max`
16//! 4. `[cap]` — keep at most `max_lines` lines (head/tail/middle)
17//!
18//! ## Sources
19//!
20//! Filters come from three sources, layered project > user > builtin by filename:
21//! - **builtin**: shipped via `include_str!()` from `compress/builtin_filters/`
22//! - **user**: `~/.config/aft/filters/*.toml` (or `$XDG_CONFIG_HOME`-aware path)
23//! - **project**: `<project>/.aft/filters/*.toml` — trust-gated, see [`crate::compress::trust`]
24//!
25//! Bad filters are skipped with a warning, never panic.
26
27use std::collections::HashMap;
28use std::fs;
29use std::path::{Path, PathBuf};
30
31use regex::{Regex, RegexBuilder};
32use serde::Deserialize;
33
34use crate::compress::caps::{cap_classified_blocks_with, ClassifiedBlock, DropClass};
35use crate::compress::CompressionResult;
36
37/// Approximate per-regex byte budget. Matches the budget RTK uses for its
38/// declarative filters; far more than any realistic compress regex needs.
39const REGEX_SIZE_LIMIT: usize = 2 * 1024 * 1024;
40
41/// Hard ceiling on a single filter's combined regex set. Prevents pathologically
42/// large filter files from inflating startup cost or memory.
43const MAX_PATTERNS_PER_FILTER: usize = 256;
44
45/// Default per-line truncation when `[truncate]` is omitted entirely. Matches
46/// existing AFT generic compressor behavior of "tolerate long lines unless told
47/// otherwise".
48const DEFAULT_LINE_MAX: usize = usize::MAX;
49
50/// Default line cap when `[cap]` is omitted. Matches the inline-cap budget.
51const DEFAULT_MAX_LINES: usize = usize::MAX;
52
53/// One TOML filter, parsed and ready to apply.
54#[derive(Debug, Clone)]
55pub struct TomlFilter {
56    pub name: String,
57    pub source: FilterSource,
58    pub matches: Vec<String>,
59    pub description: Option<String>,
60    pub strip: Vec<Regex>,
61    pub line_max: usize,
62    pub max_lines: usize,
63    pub keep: KeepMode,
64    pub class_cap: Option<TomlClassCap>,
65    pub shortcircuit_when: Option<Regex>,
66    pub shortcircuit_replacement: Option<String>,
67    pub strip_ansi: bool,
68}
69
70/// Where a filter came from. Drives priority and trust handling.
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub enum FilterSource {
73    Builtin,
74    User { path: PathBuf },
75    Project { path: PathBuf },
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
79pub enum KeepMode {
80    Head,
81    #[default]
82    Tail,
83    Middle,
84}
85
86#[derive(Debug, Clone)]
87pub struct TomlClassCap {
88    pub class: DropClass,
89    pub max: usize,
90    pub patterns: Vec<Regex>,
91}
92
93/// Aggregate registry of all loaded filters across all sources.
94///
95/// Lookup is by command program name (first non-env, non-path token of the
96/// command). Project filters override user filters override builtin filters
97/// when their `matches[]` overlap.
98#[derive(Debug, Default, Clone)]
99pub struct FilterRegistry {
100    /// Map from program name → resolved filter (already merged across sources).
101    by_match: HashMap<String, TomlFilter>,
102    /// All filters, indexed by `(source-priority, name)` for tooling/listing.
103    /// Order is builtin → user → project so lower-priority entries appear first.
104    all: Vec<TomlFilter>,
105    /// Non-fatal load warnings the agent or doctor command should surface.
106    warnings: Vec<String>,
107}
108
109impl FilterRegistry {
110    /// Look up a filter for a command. Returns the highest-priority filter
111    /// whose `matches[]` contains the command's program name.
112    pub fn lookup(&self, command: &str) -> Option<&TomlFilter> {
113        let program = program_name(command)?;
114        self.by_match.get(program)
115    }
116
117    /// All filters loaded into this registry, in builtin → user → project order.
118    pub fn all(&self) -> &[TomlFilter] {
119        &self.all
120    }
121
122    /// Non-fatal warnings emitted during load. Use these for doctor / configure
123    /// warning surfacing.
124    pub fn warnings(&self) -> &[String] {
125        &self.warnings
126    }
127}
128
129/// Build a registry from sources in priority order.
130///
131/// `builtin_inputs` is supplied by the caller (shipped via `include_str!`)
132/// because constants live in `crate::compress::mod`.
133pub fn build_registry(
134    builtin_inputs: &[(&'static str, &'static str)],
135    user_dir: Option<&Path>,
136    project_dir: Option<&Path>,
137) -> FilterRegistry {
138    let mut registry = FilterRegistry::default();
139
140    // Builtin: always loaded.
141    for (name, content) in builtin_inputs {
142        match parse_filter(name, content, FilterSource::Builtin) {
143            Ok(filter) => insert_filter(&mut registry, filter),
144            Err(e) => registry
145                .warnings
146                .push(format!("builtin filter {name}: {e}")),
147        }
148    }
149
150    // User: loaded if dir exists.
151    if let Some(dir) = user_dir {
152        load_dir(dir, &mut registry, |path| FilterSource::User {
153            path: path.to_path_buf(),
154        });
155    }
156
157    // Project: loaded if dir exists. Caller is responsible for trust gating
158    // *before* calling this — pass `None` for `project_dir` if the project
159    // is untrusted.
160    if let Some(dir) = project_dir {
161        load_dir(dir, &mut registry, |path| FilterSource::Project {
162            path: path.to_path_buf(),
163        });
164    }
165
166    registry
167}
168
169fn load_dir<F>(dir: &Path, registry: &mut FilterRegistry, source_for: F)
170where
171    F: Fn(&Path) -> FilterSource,
172{
173    let entries = match fs::read_dir(dir) {
174        Ok(entries) => entries,
175        Err(e) => {
176            // Missing dir is normal; only warn on real IO errors.
177            if e.kind() != std::io::ErrorKind::NotFound {
178                registry
179                    .warnings
180                    .push(format!("filter dir {}: {e}", dir.display()));
181            }
182            return;
183        }
184    };
185
186    let mut paths: Vec<PathBuf> = entries
187        .filter_map(|res| res.ok())
188        .map(|entry| entry.path())
189        .filter(|path| path.extension().and_then(|s| s.to_str()) == Some("toml"))
190        .collect();
191    paths.sort();
192
193    for path in paths {
194        let content = match fs::read_to_string(&path) {
195            Ok(s) => s,
196            Err(e) => {
197                registry
198                    .warnings
199                    .push(format!("filter {}: read failed: {e}", path.display()));
200                continue;
201            }
202        };
203        let name = path
204            .file_stem()
205            .and_then(|s| s.to_str())
206            .unwrap_or("<unknown>")
207            .to_string();
208        let source = source_for(&path);
209        match parse_filter(&name, &content, source) {
210            Ok(filter) => insert_filter(registry, filter),
211            Err(e) => registry
212                .warnings
213                .push(format!("filter {}: {e}", path.display())),
214        }
215    }
216}
217
218fn insert_filter(registry: &mut FilterRegistry, filter: TomlFilter) {
219    // Higher-priority sources (project > user > builtin) overwrite earlier
220    // entries with the same `match` keyword. Filename-keyed override is also
221    // implicit because higher-priority filters arrive later in build order.
222    for keyword in &filter.matches {
223        registry.by_match.insert(keyword.clone(), filter.clone());
224    }
225    // Replace any existing entry in `all` for the same logical name+source so
226    // re-loads don't duplicate (mainly relevant in tests).
227    registry
228        .all
229        .retain(|existing| !(existing.name == filter.name && existing.source == filter.source));
230    registry.all.push(filter);
231}
232
233#[derive(Debug, Deserialize)]
234struct RawFilter {
235    #[serde(default)]
236    filter: RawFilterMeta,
237    #[serde(default)]
238    strip: Option<RawStrip>,
239    #[serde(default)]
240    truncate: Option<RawTruncate>,
241    #[serde(default)]
242    cap: Option<RawCap>,
243    #[serde(default)]
244    class_cap: Option<RawClassCap>,
245    #[serde(default)]
246    shortcircuit: Option<RawShortcircuit>,
247    #[serde(default)]
248    ansi: Option<RawAnsi>,
249}
250
251#[derive(Debug, Deserialize, Default)]
252struct RawFilterMeta {
253    #[serde(default)]
254    matches: Vec<String>,
255    #[serde(default)]
256    description: Option<String>,
257}
258
259#[derive(Debug, Deserialize, Default)]
260struct RawStrip {
261    #[serde(default)]
262    patterns: Vec<String>,
263}
264
265#[derive(Debug, Deserialize, Default)]
266struct RawTruncate {
267    #[serde(default)]
268    line_max: Option<usize>,
269}
270
271#[derive(Debug, Deserialize, Default)]
272struct RawCap {
273    #[serde(default)]
274    max_lines: Option<usize>,
275    #[serde(default)]
276    keep: Option<String>,
277}
278
279#[derive(Debug, Deserialize, Default)]
280struct RawClassCap {
281    #[serde(default)]
282    class: Option<String>,
283    #[serde(default)]
284    max: Option<usize>,
285    #[serde(default)]
286    patterns: Vec<String>,
287}
288
289#[derive(Debug, Deserialize, Default)]
290struct RawShortcircuit {
291    #[serde(default)]
292    when: Option<String>,
293    #[serde(default)]
294    replacement: Option<String>,
295}
296
297#[derive(Debug, Deserialize, Default)]
298struct RawAnsi {
299    #[serde(default)]
300    strip: Option<bool>,
301}
302
303/// Parse one filter from TOML text. Returns a load-time error string suitable
304/// for surfacing in warnings; never panics.
305pub fn parse_filter(name: &str, content: &str, source: FilterSource) -> Result<TomlFilter, String> {
306    let raw: RawFilter = toml::from_str(content).map_err(|e| format!("invalid TOML: {e}"))?;
307
308    let mut matches = raw.filter.matches;
309    if matches.is_empty() {
310        // Default to filename-as-program when [filter].matches is omitted.
311        matches.push(name.to_string());
312    }
313    for keyword in &matches {
314        if keyword.is_empty() || keyword.contains(char::is_whitespace) {
315            return Err(format!("invalid match keyword {keyword:?}"));
316        }
317    }
318
319    let strip_patterns = raw.strip.unwrap_or_default().patterns;
320    if strip_patterns.len() > MAX_PATTERNS_PER_FILTER {
321        return Err(format!(
322            "too many strip patterns ({} > {MAX_PATTERNS_PER_FILTER})",
323            strip_patterns.len()
324        ));
325    }
326    let mut strip = Vec::with_capacity(strip_patterns.len());
327    for pattern in strip_patterns {
328        let regex =
329            build_regex(&pattern, true).map_err(|e| format!("strip pattern {pattern:?}: {e}"))?;
330        strip.push(regex);
331    }
332
333    let line_max = raw
334        .truncate
335        .as_ref()
336        .and_then(|t| t.line_max)
337        .unwrap_or(DEFAULT_LINE_MAX);
338
339    let cap = raw.cap.unwrap_or_default();
340    let max_lines = cap.max_lines.unwrap_or(DEFAULT_MAX_LINES);
341    let keep = match cap.keep.as_deref() {
342        None => KeepMode::default(),
343        Some("head") => KeepMode::Head,
344        Some("tail") => KeepMode::Tail,
345        Some("middle") => KeepMode::Middle,
346        Some(other) => return Err(format!("invalid cap.keep {other:?}")),
347    };
348
349    let class_cap = match raw.class_cap {
350        Some(raw_class_cap) => {
351            if raw_class_cap.patterns.len() > MAX_PATTERNS_PER_FILTER {
352                return Err(format!(
353                    "too many class_cap patterns ({} > {MAX_PATTERNS_PER_FILTER})",
354                    raw_class_cap.patterns.len()
355                ));
356            }
357            let class = parse_drop_class(raw_class_cap.class.as_deref().unwrap_or("list"))?;
358            let mut patterns = Vec::with_capacity(raw_class_cap.patterns.len());
359            for pattern in raw_class_cap.patterns {
360                let regex = build_regex(&pattern, true)
361                    .map_err(|e| format!("class_cap pattern {pattern:?}: {e}"))?;
362                patterns.push(regex);
363            }
364            Some(TomlClassCap {
365                class,
366                max: raw_class_cap.max.unwrap_or_else(|| class.default_cap()),
367                patterns,
368            })
369        }
370        None => None,
371    };
372
373    let shortcircuit = raw.shortcircuit.unwrap_or_default();
374    let (shortcircuit_when, shortcircuit_replacement) =
375        match (shortcircuit.when, shortcircuit.replacement) {
376            (Some(when), Some(replacement)) => {
377                let regex = build_regex(&when, false)
378                    .map_err(|e| format!("shortcircuit.when {when:?}: {e}"))?;
379                (Some(regex), Some(replacement))
380            }
381            (Some(_), None) => return Err("shortcircuit.when set but replacement missing".into()),
382            (None, Some(_)) => return Err("shortcircuit.replacement set but when missing".into()),
383            (None, None) => (None, None),
384        };
385
386    let strip_ansi = raw.ansi.and_then(|a| a.strip).unwrap_or(true);
387
388    Ok(TomlFilter {
389        name: name.to_string(),
390        source,
391        matches,
392        description: raw.filter.description,
393        strip,
394        line_max,
395        max_lines,
396        keep,
397        class_cap,
398        shortcircuit_when,
399        shortcircuit_replacement,
400        strip_ansi,
401    })
402}
403
404fn build_regex(pattern: &str, multiline: bool) -> Result<Regex, String> {
405    RegexBuilder::new(pattern)
406        .size_limit(REGEX_SIZE_LIMIT)
407        .multi_line(multiline)
408        .build()
409        .map_err(|e| e.to_string())
410}
411
412/// Run the filter pipeline on `output`. Returns compressed text.
413///
414/// Pipeline (in order):
415/// 1. ANSI strip (if `filter.strip_ansi`)
416/// 2. `[strip]` — drop matching lines
417/// 3. `[shortcircuit]` — if remainder matches `when`, return `replacement`
418/// 4. `[truncate]` — middle-truncate per line at `line_max`
419/// 5. `[cap]` — apply `max_lines` with `keep` mode
420pub fn apply_filter(filter: &TomlFilter, output: &str) -> CompressionResult {
421    let stripped_ansi = if filter.strip_ansi {
422        crate::compress::generic::strip_ansi(output)
423    } else {
424        output.to_string()
425    };
426
427    // Phase 1: line strip
428    let original_line_count = stripped_ansi.lines().count();
429    let kept: Vec<&str> = stripped_ansi
430        .lines()
431        .filter(|line| !filter.strip.iter().any(|re| re.is_match(line)))
432        .collect();
433    let strip_removed_lines = kept.len() < original_line_count;
434    let after_strip = kept.join("\n");
435
436    // Phase 2: shortcircuit (against the after-strip body)
437    if let (Some(when), Some(replacement)) =
438        (&filter.shortcircuit_when, &filter.shortcircuit_replacement)
439    {
440        if when.is_match(&after_strip) {
441            return CompressionResult::new(replacement.clone());
442        }
443    }
444
445    // Phase 3: per-line truncation
446    let truncated: Vec<String> = if filter.line_max == usize::MAX {
447        kept.iter().map(|s| (*s).to_string()).collect()
448    } else {
449        kept.iter()
450            .map(|line| truncate_line(line, filter.line_max))
451            .collect()
452    };
453
454    // Phase 4: class cap replaces plain [cap] when present; the two never stack.
455    if let Some(class_cap) = &filter.class_cap {
456        return cap_class_lines(&truncated, class_cap);
457    }
458
459    // Phase 5: plain line cap
460    cap_lines(
461        &truncated,
462        filter.max_lines,
463        filter.keep,
464        strip_removed_lines,
465    )
466}
467
468fn truncate_line(line: &str, line_max: usize) -> String {
469    if line.chars().count() <= line_max {
470        return line.to_string();
471    }
472    // Reserve 3 chars for the ellipsis marker.
473    let keep_each_side = line_max.saturating_sub(3) / 2;
474    let head: String = line.chars().take(keep_each_side).collect();
475    let tail: String = line
476        .chars()
477        .rev()
478        .take(keep_each_side)
479        .collect::<Vec<_>>()
480        .into_iter()
481        .rev()
482        .collect();
483    format!("{head}…{tail}")
484}
485
486fn cap_class_lines(lines: &[String], class_cap: &TomlClassCap) -> CompressionResult {
487    let blocks = lines
488        .iter()
489        .map(|line| {
490            if class_cap.patterns.is_empty()
491                || class_cap
492                    .patterns
493                    .iter()
494                    .any(|pattern| pattern.is_match(line))
495            {
496                ClassifiedBlock::new(class_cap.class, line.clone())
497            } else {
498                ClassifiedBlock::unclassified(line.clone())
499            }
500        })
501        .collect();
502    let capped = cap_classified_blocks_with(blocks, |class| {
503        if class == class_cap.class {
504            class_cap.max
505        } else {
506            class.default_cap()
507        }
508    });
509    CompressionResult::with_class_drops(capped.text, capped.dropped_by_class)
510}
511
512fn cap_lines(
513    lines: &[String],
514    max_lines: usize,
515    keep: KeepMode,
516    had_prior_line_drop: bool,
517) -> CompressionResult {
518    if lines.len() <= max_lines || max_lines == usize::MAX {
519        return CompressionResult::new(lines.join("\n"));
520    }
521
522    if max_lines == 0 {
523        return CompressionResult::with_inner_drop(String::new(), false);
524    }
525
526    let kept = match keep {
527        KeepMode::Head => lines.iter().take(max_lines).cloned().collect::<Vec<_>>(),
528        KeepMode::Tail => lines
529            .iter()
530            .skip(lines.len().saturating_sub(max_lines))
531            .cloned()
532            .collect::<Vec<_>>(),
533        KeepMode::Middle => {
534            let head_count = max_lines / 2;
535            let tail_count = max_lines - head_count;
536            let mut kept: Vec<String> = lines.iter().take(head_count).cloned().collect();
537            kept.extend(lines.iter().skip(lines.len() - tail_count).cloned());
538            kept
539        }
540    };
541    if matches!(keep, KeepMode::Tail) && !had_prior_line_drop {
542        let dropped_prefix_lines = lines.len().saturating_sub(max_lines);
543        CompressionResult::with_prefix_drop(kept.join("\n"), dropped_prefix_lines + 1)
544    } else {
545        CompressionResult::with_inner_drop(kept.join("\n"), false)
546    }
547}
548
549fn parse_drop_class(value: &str) -> Result<DropClass, String> {
550    match value {
551        "error" | "errors" => Ok(DropClass::Error),
552        "warning" | "warnings" => Ok(DropClass::Warning),
553        "failure" | "failures" => Ok(DropClass::Failure),
554        "issue" | "issues" => Ok(DropClass::Issue),
555        "list" | "list_item" | "list-items" | "list items" => Ok(DropClass::List),
556        "inventory" | "inventory_item" | "inventory-items" | "inventory items" => {
557            Ok(DropClass::Inventory)
558        }
559        "timing" | "timing_line" | "timing-lines" | "timing lines" => Ok(DropClass::Timing),
560        other => Err(format!("invalid class_cap.class {other:?}")),
561    }
562}
563
564/// Extract the program name from a command line, stripping leading env-var
565/// assignments (`FOO=bar `) and absolute or relative paths (`/usr/bin/make`,
566/// `./node_modules/.bin/eslint`).
567///
568/// Examples:
569/// - `"make build"` → `Some("make")`
570/// - `"FOO=1 BAR=2 make"` → `Some("make")`
571/// - `"/usr/bin/cargo build"` → `Some("cargo")`
572/// - `""` → `None`
573pub fn program_name(command: &str) -> Option<&str> {
574    for token in command.split_whitespace() {
575        // Skip leading env-var assignments (key=value with no shell metachars).
576        if is_env_assignment(token) {
577            continue;
578        }
579        // Strip path prefix.
580        return Some(basename(token));
581    }
582    None
583}
584
585fn is_env_assignment(token: &str) -> bool {
586    let Some(eq) = token.find('=') else {
587        return false;
588    };
589    let key = &token[..eq];
590    !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
591}
592
593fn basename(token: &str) -> &str {
594    // Handle both Unix and Windows separators.
595    let last_unix = token.rfind('/');
596    let last_win = token.rfind('\\');
597    let split_at = match (last_unix, last_win) {
598        (Some(u), Some(w)) => u.max(w),
599        (Some(u), None) => u,
600        (None, Some(w)) => w,
601        (None, None) => return token,
602    };
603    &token[split_at + 1..]
604}
605
606#[cfg(test)]
607mod tests {
608    use super::*;
609
610    fn parse(content: &str) -> TomlFilter {
611        parse_filter("test", content, FilterSource::Builtin).expect("parse")
612    }
613
614    #[test]
615    fn parses_minimal_filter() {
616        let filter = parse(
617            r#"
618[filter]
619matches = ["make"]
620"#,
621        );
622        assert_eq!(filter.matches, vec!["make"]);
623        assert_eq!(filter.line_max, usize::MAX);
624        assert_eq!(filter.max_lines, usize::MAX);
625        assert!(filter.strip.is_empty());
626        assert!(filter.shortcircuit_when.is_none());
627        assert!(filter.strip_ansi);
628    }
629
630    #[test]
631    fn filename_default_match() {
632        // Empty matches array → filter name is used as the program keyword.
633        let filter = parse_filter("ls", "", FilterSource::Builtin).expect("parse");
634        assert_eq!(filter.matches, vec!["ls"]);
635    }
636
637    #[test]
638    fn rejects_invalid_match_keyword() {
639        let err = parse_filter(
640            "bad",
641            r#"[filter]
642matches = ["has whitespace"]
643"#,
644            FilterSource::Builtin,
645        )
646        .unwrap_err();
647        assert!(err.contains("invalid match keyword"), "got: {err}");
648    }
649
650    #[test]
651    fn rejects_bad_strip_regex() {
652        let err = parse_filter(
653            "bad",
654            r#"
655[filter]
656matches = ["bad"]
657
658[strip]
659patterns = ["[unclosed"]
660"#,
661            FilterSource::Builtin,
662        )
663        .unwrap_err();
664        assert!(err.contains("strip pattern"), "got: {err}");
665    }
666
667    #[test]
668    fn strip_drops_matching_lines() {
669        let filter = parse(
670            r#"
671[filter]
672matches = ["x"]
673
674[strip]
675patterns = ['^Entering directory', '^Leaving directory']
676"#,
677        );
678        let input = "Entering directory `/tmp`\ngcc -c foo.c\nLeaving directory `/tmp`";
679        let out = apply_filter(&filter, input).text;
680        assert_eq!(out, "gcc -c foo.c");
681    }
682
683    #[test]
684    fn shortcircuit_replaces_empty_after_strip() {
685        let filter = parse(
686            r#"
687[filter]
688matches = ["x"]
689
690[strip]
691patterns = ['^make\[\d+\]:.*']
692
693[shortcircuit]
694when = '\A\z'
695replacement = "make: ok"
696"#,
697        );
698        let input = "make[1]: Entering directory `/tmp`\nmake[1]: Leaving directory `/tmp`";
699        let out = apply_filter(&filter, input).text;
700        assert_eq!(out, "make: ok");
701    }
702
703    #[test]
704    fn shortcircuit_line_anchors_do_not_match_inner_blank_lines() {
705        let filter = parse(
706            r#"
707[filter]
708matches = ["x"]
709
710[shortcircuit]
711when = '^\s*$'
712replacement = "ok"
713"#,
714        );
715        let out = apply_filter(&filter, "error\n\nhint").text;
716        assert_eq!(out, "error\n\nhint");
717    }
718
719    #[test]
720    fn cap_tail_keeps_last_n_lines() {
721        let filter = parse(
722            r#"
723[filter]
724matches = ["x"]
725
726[cap]
727max_lines = 3
728keep = "tail"
729"#,
730        );
731        let input = "1\n2\n3\n4\n5";
732        let out = apply_filter(&filter, input);
733        assert_eq!(out.text, "3\n4\n5");
734        assert!(out.had_inner_drop);
735        assert!(out.offset_hint_eligible);
736        assert_eq!(out.text.lines().count(), 3);
737    }
738
739    #[test]
740    fn cap_tail_after_strip_disables_offset_hint() {
741        let filter = parse(
742            r#"
743[filter]
744matches = ["x"]
745
746[strip]
747patterns = ["^strip-me"]
748
749[cap]
750max_lines = 2
751keep = "tail"
752"#,
753        );
754        let out = apply_filter(
755            &filter,
756            "strip-me
7571
7582
7593
7604",
761        );
762
763        assert_eq!(
764            out.text,
765            "3
7664"
767        );
768        assert!(out.had_inner_drop);
769        assert!(!out.offset_hint_eligible);
770        assert_eq!(out.offset_start_line, None);
771    }
772
773    #[test]
774    fn cap_head_keeps_first_n_lines() {
775        let filter = parse(
776            r#"
777[filter]
778matches = ["x"]
779
780[cap]
781max_lines = 2
782keep = "head"
783"#,
784        );
785        let input = "1\n2\n3\n4";
786        let out = apply_filter(&filter, input);
787        assert_eq!(out.text, "1\n2");
788        assert!(out.had_inner_drop);
789        assert!(!out.offset_hint_eligible);
790        assert_eq!(out.text.lines().count(), 2);
791    }
792
793    #[test]
794    fn cap_middle_keeps_head_and_tail() {
795        let filter = parse(
796            r#"
797[filter]
798matches = ["x"]
799
800[cap]
801max_lines = 4
802keep = "middle"
803"#,
804        );
805        let input = "1\n2\n3\n4\n5\n6\n7\n8";
806        let out = apply_filter(&filter, input);
807        assert_eq!(out.text, "1\n2\n7\n8");
808        assert!(out.had_inner_drop);
809        assert!(!out.offset_hint_eligible);
810        assert_eq!(out.text.lines().count(), 4);
811    }
812
813    #[test]
814    fn cap_zero_keeps_no_lines() {
815        let filter = parse(
816            r#"
817[filter]
818matches = ["x"]
819
820[cap]
821max_lines = 0
822keep = "head"
823"#,
824        );
825        let out = apply_filter(&filter, "1\n2\n3");
826        assert_eq!(out.text, "");
827        assert!(out.had_inner_drop);
828    }
829
830    #[test]
831    fn cap_one_keeps_one_tail_line_without_marker() {
832        let filter = parse(
833            r#"
834[filter]
835matches = ["x"]
836
837[cap]
838max_lines = 1
839keep = "tail"
840"#,
841        );
842        let out = apply_filter(&filter, "1\n2\n3");
843        assert_eq!(out.text, "3");
844        assert!(out.had_inner_drop);
845        assert!(out.offset_hint_eligible);
846        assert_eq!(out.text.lines().count(), 1);
847    }
848
849    #[test]
850    fn cap_two_keeps_two_tail_lines_without_marker() {
851        let filter = parse(
852            r#"
853[filter]
854matches = ["x"]
855
856[cap]
857max_lines = 2
858keep = "tail"
859"#,
860        );
861        let out = apply_filter(&filter, "1\n2\n3\n4");
862        assert_eq!(out.text, "3\n4");
863        assert!(out.had_inner_drop);
864        assert!(out.offset_hint_eligible);
865        assert_eq!(out.text.lines().count(), 2);
866    }
867
868    #[test]
869    fn class_cap_replaces_plain_cap_without_stacking() {
870        let filter = parse(
871            r#"
872[filter]
873matches = ["x"]
874
875[class_cap]
876class = "warning"
877max = 2
878patterns = ["^warning"]
879
880[cap]
881max_lines = 1
882keep = "head"
883"#,
884        );
885        let out = apply_filter(&filter, "warning 1\nkeep me\nwarning 2\nwarning 3");
886
887        assert!(out.text.contains("warning 1"));
888        assert!(out.text.contains("keep me"));
889        assert!(out.text.contains("warning 2"));
890        assert!(!out.text.contains("warning 3"));
891        assert_eq!(out.dropped_by_class.get(&DropClass::Warning), Some(&1));
892        assert!(out.text.lines().count() > 1, "plain [cap] must not stack");
893    }
894
895    #[test]
896    fn truncate_per_line() {
897        let filter = parse(
898            r#"
899[filter]
900matches = ["x"]
901
902[truncate]
903line_max = 10
904"#,
905        );
906        let input = "shortline\nthis is a very long line indeed";
907        let out = apply_filter(&filter, input).text;
908        assert!(out.contains("shortline"));
909        assert!(out.contains("…"));
910        assert!(out.lines().any(|l| l.chars().count() <= 10));
911    }
912
913    #[test]
914    fn ansi_strip_default_true() {
915        let filter = parse(
916            r#"
917[filter]
918matches = ["x"]
919"#,
920        );
921        let input = "\x1b[31mred\x1b[0m text";
922        let out = apply_filter(&filter, input).text;
923        assert_eq!(out, "red text");
924    }
925
926    #[test]
927    fn ansi_strip_can_be_disabled() {
928        let filter = parse(
929            r#"
930[filter]
931matches = ["x"]
932
933[ansi]
934strip = false
935"#,
936        );
937        let input = "\x1b[31mred\x1b[0m text";
938        let out = apply_filter(&filter, input).text;
939        assert_eq!(out, input);
940    }
941
942    #[test]
943    fn shortcircuit_runs_on_after_strip_body() {
944        // After stripping all lines we have empty string; shortcircuit `^$` matches.
945        let filter = parse(
946            r#"
947[filter]
948matches = ["x"]
949
950[strip]
951patterns = ['^.*$']
952
953[shortcircuit]
954when = '^$'
955replacement = "ok"
956"#,
957        );
958        assert_eq!(apply_filter(&filter, "anything\nat all").text, "ok");
959    }
960
961    #[test]
962    fn program_name_handles_env_and_paths() {
963        assert_eq!(program_name("make build"), Some("make"));
964        assert_eq!(program_name("FOO=1 BAR=2 make build"), Some("make"));
965        assert_eq!(program_name("/usr/bin/cargo build"), Some("cargo"));
966        assert_eq!(program_name("./node_modules/.bin/eslint ."), Some("eslint"));
967        // Path is the program; subsequent tokens are arguments.
968        assert_eq!(program_name("FOO=bar /opt/x/y subcmd"), Some("y"));
969        assert_eq!(program_name(""), None);
970        assert_eq!(program_name("   "), None);
971    }
972
973    #[test]
974    fn program_name_unquoted_windows_path() {
975        // Unquoted Windows paths with spaces won't round-trip cleanly because
976        // split_whitespace breaks on the embedded space. This is acceptable —
977        // bash would fail to execute these without quoting too, and AFT's
978        // shell handlers run the literal command. Document the behavior.
979        // basename strips through the last backslash even on the broken-by-whitespace
980        // first token, leaving "Program".
981        assert_eq!(
982            program_name(r"C:\Program Files\Git\bin\git.exe status"),
983            Some("Program")
984        );
985    }
986
987    #[test]
988    fn program_name_does_not_skip_non_assignment_token_with_equals() {
989        // `=value` (no key) is not an env assignment.
990        assert_eq!(program_name("=oops echo hi"), Some("=oops"));
991    }
992
993    #[test]
994    fn registry_lookup_by_program_name() {
995        let registry = build_registry(
996            &[(
997                "make",
998                r#"[filter]
999matches = ["make"]
1000
1001[strip]
1002patterns = ['^Entering']
1003"#,
1004            )],
1005            None,
1006            None,
1007        );
1008        let f = registry.lookup("make build foo").unwrap();
1009        assert_eq!(f.matches, vec!["make"]);
1010        assert!(matches!(f.source, FilterSource::Builtin));
1011    }
1012
1013    #[test]
1014    fn registry_user_overrides_builtin() {
1015        let tmp = tempfile::tempdir().unwrap();
1016        let user_path = tmp.path().join("make.toml");
1017        fs::write(
1018            &user_path,
1019            r#"[filter]
1020matches = ["make"]
1021description = "user override"
1022"#,
1023        )
1024        .unwrap();
1025
1026        let registry = build_registry(
1027            &[(
1028                "make",
1029                r#"[filter]
1030matches = ["make"]
1031description = "builtin"
1032"#,
1033            )],
1034            Some(tmp.path()),
1035            None,
1036        );
1037        let f = registry.lookup("make build").unwrap();
1038        assert_eq!(f.description.as_deref(), Some("user override"));
1039        assert!(matches!(f.source, FilterSource::User { .. }));
1040    }
1041
1042    #[test]
1043    fn registry_project_overrides_user() {
1044        let user_dir = tempfile::tempdir().unwrap();
1045        let project_dir = tempfile::tempdir().unwrap();
1046        fs::write(
1047            user_dir.path().join("make.toml"),
1048            r#"[filter]
1049matches = ["make"]
1050description = "user"
1051"#,
1052        )
1053        .unwrap();
1054        fs::write(
1055            project_dir.path().join("make.toml"),
1056            r#"[filter]
1057matches = ["make"]
1058description = "project"
1059"#,
1060        )
1061        .unwrap();
1062
1063        let registry = build_registry(&[], Some(user_dir.path()), Some(project_dir.path()));
1064        let f = registry.lookup("make").unwrap();
1065        assert_eq!(f.description.as_deref(), Some("project"));
1066        assert!(matches!(f.source, FilterSource::Project { .. }));
1067    }
1068
1069    #[test]
1070    fn bad_filter_files_warn_not_panic() {
1071        let tmp = tempfile::tempdir().unwrap();
1072        fs::write(
1073            tmp.path().join("good.toml"),
1074            r#"[filter]
1075matches = ["good"]
1076"#,
1077        )
1078        .unwrap();
1079        fs::write(tmp.path().join("bad.toml"), "not valid = toml = at all =").unwrap();
1080
1081        let registry = build_registry(&[], Some(tmp.path()), None);
1082        assert!(registry.lookup("good").is_some());
1083        assert!(registry.lookup("bad").is_none());
1084        assert!(
1085            registry.warnings().iter().any(|w| w.contains("bad.toml")),
1086            "warnings: {:?}",
1087            registry.warnings()
1088        );
1089    }
1090
1091    #[test]
1092    fn missing_dir_does_not_warn() {
1093        let registry = build_registry(&[], Some(Path::new("/nonexistent/path/12345")), None);
1094        assert!(registry.warnings().is_empty());
1095    }
1096}