Skip to main content

aft/compress/
toml_filter.rs

1//! Declarative TOML output filters for hoisted bash compression.
2//!
3//! TOML filters are a complement to the Rust `Compressor` modules. They cover
4//! the long tail of CLI tools whose output is amenable to simple
5//! strip + truncate + cap + shortcircuit pipelines without requiring stateful
6//! parsing or invocation rewrite.
7//!
8//! ## Pipeline
9//!
10//! For a matched filter, output flows through:
11//! 1. `[strip]` — drop lines matching any regex (compiled with multiline mode)
12//! 2. `[shortcircuit]` — if remaining content matches `when`, replace with `replacement`
13//!    (compiled without multiline mode; use `(?m)` explicitly for line anchors,
14//!    and use `\A...\z` for full-body anchors such as empty output)
15//! 3. `[truncate]` — middle-truncate lines longer than `line_max`
16//! 4. `[cap]` — keep at most `max_lines` lines (head/tail/middle)
17//!
18//! ## Sources
19//!
20//! Filters come from three sources, layered project > user > builtin by filename:
21//! - **builtin**: shipped via `include_str!()` from `compress/builtin_filters/`
22//! - **user**: `~/.config/aft/filters/*.toml` (or `$XDG_CONFIG_HOME`-aware path)
23//! - **project**: `<project>/.aft/filters/*.toml` — trust-gated, see [`crate::compress::trust`]
24//!
25//! Bad filters are skipped with a warning, never panic.
26
27use std::collections::HashMap;
28use std::fs;
29use std::path::{Path, PathBuf};
30
31use regex::{Regex, RegexBuilder};
32use serde::Deserialize;
33
34use crate::compress::caps::{cap_classified_blocks_with, ClassifiedBlock, DropClass};
35use crate::compress::CompressionResult;
36
37/// Approximate per-regex byte budget. Matches the budget RTK uses for its
38/// declarative filters; far more than any realistic compress regex needs.
39const REGEX_SIZE_LIMIT: usize = 2 * 1024 * 1024;
40
41/// Hard ceiling on a single filter's combined regex set. Prevents pathologically
42/// large filter files from inflating startup cost or memory.
43const MAX_PATTERNS_PER_FILTER: usize = 256;
44
45/// Default per-line truncation when `[truncate]` is omitted entirely. Matches
46/// existing AFT generic compressor behavior of "tolerate long lines unless told
47/// otherwise".
48const DEFAULT_LINE_MAX: usize = usize::MAX;
49
50/// Default line cap when `[cap]` is omitted. Matches the inline-cap budget.
51const DEFAULT_MAX_LINES: usize = usize::MAX;
52
53/// One TOML filter, parsed and ready to apply.
54#[derive(Debug, Clone)]
55pub struct TomlFilter {
56    pub name: String,
57    pub source: FilterSource,
58    pub matches: Vec<String>,
59    pub description: Option<String>,
60    pub strip: Vec<Regex>,
61    pub line_max: usize,
62    pub max_lines: usize,
63    pub keep: KeepMode,
64    pub class_cap: Option<TomlClassCap>,
65    pub shortcircuit_when: Option<Regex>,
66    pub shortcircuit_replacement: Option<String>,
67    pub strip_ansi: bool,
68}
69
70/// Where a filter came from. Drives priority and trust handling.
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub enum FilterSource {
73    Builtin,
74    User { path: PathBuf },
75    Project { path: PathBuf },
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
79pub enum KeepMode {
80    Head,
81    #[default]
82    Tail,
83    Middle,
84}
85
86#[derive(Debug, Clone)]
87pub struct TomlClassCap {
88    pub class: DropClass,
89    pub max: usize,
90    pub patterns: Vec<Regex>,
91}
92
93/// Aggregate registry of all loaded filters across all sources.
94///
95/// Lookup is by command program name (first non-env, non-path token of the
96/// command). Project filters override user filters override builtin filters
97/// when their `matches[]` overlap.
98#[derive(Debug, Default, Clone)]
99pub struct FilterRegistry {
100    /// Map from program name → resolved filter (already merged across sources).
101    by_match: HashMap<String, TomlFilter>,
102    /// All filters, indexed by `(source-priority, name)` for tooling/listing.
103    /// Order is builtin → user → project so lower-priority entries appear first.
104    all: Vec<TomlFilter>,
105    /// Non-fatal load warnings the agent or doctor command should surface.
106    warnings: Vec<String>,
107}
108
109impl FilterRegistry {
110    /// Look up a filter for a command. Returns the highest-priority filter
111    /// whose `matches[]` contains the command's program name.
112    pub fn lookup(&self, command: &str) -> Option<&TomlFilter> {
113        let program = program_name(command)?;
114        self.by_match.get(program)
115    }
116
117    /// All filters loaded into this registry, in builtin → user → project order.
118    pub fn all(&self) -> &[TomlFilter] {
119        &self.all
120    }
121
122    /// Non-fatal warnings emitted during load. Use these for doctor / configure
123    /// warning surfacing.
124    pub fn warnings(&self) -> &[String] {
125        &self.warnings
126    }
127}
128
129/// Build a registry from sources in priority order.
130///
131/// `builtin_inputs` is supplied by the caller (shipped via `include_str!`)
132/// because constants live in `crate::compress::mod`.
133pub fn build_registry(
134    builtin_inputs: &[(&'static str, &'static str)],
135    user_dir: Option<&Path>,
136    project_dir: Option<&Path>,
137) -> FilterRegistry {
138    let mut registry = FilterRegistry::default();
139
140    // Builtin: always loaded.
141    for (name, content) in builtin_inputs {
142        match parse_filter(name, content, FilterSource::Builtin) {
143            Ok(filter) => insert_filter(&mut registry, filter),
144            Err(e) => registry
145                .warnings
146                .push(format!("builtin filter {name}: {e}")),
147        }
148    }
149
150    // User: loaded if dir exists.
151    if let Some(dir) = user_dir {
152        load_dir(dir, &mut registry, |path| FilterSource::User {
153            path: path.to_path_buf(),
154        });
155    }
156
157    // Project: loaded if dir exists. Caller is responsible for trust gating
158    // *before* calling this — pass `None` for `project_dir` if the project
159    // is untrusted.
160    if let Some(dir) = project_dir {
161        load_dir(dir, &mut registry, |path| FilterSource::Project {
162            path: path.to_path_buf(),
163        });
164    }
165
166    registry
167}
168
169fn load_dir<F>(dir: &Path, registry: &mut FilterRegistry, source_for: F)
170where
171    F: Fn(&Path) -> FilterSource,
172{
173    let entries = match fs::read_dir(dir) {
174        Ok(entries) => entries,
175        Err(e) => {
176            // Missing dir is normal; only warn on real IO errors.
177            if e.kind() != std::io::ErrorKind::NotFound {
178                registry
179                    .warnings
180                    .push(format!("filter dir {}: {e}", dir.display()));
181            }
182            return;
183        }
184    };
185
186    let mut paths: Vec<PathBuf> = entries
187        .filter_map(|res| res.ok())
188        .map(|entry| entry.path())
189        .filter(|path| path.extension().and_then(|s| s.to_str()) == Some("toml"))
190        .collect();
191    paths.sort();
192
193    for path in paths {
194        let content = match fs::read_to_string(&path) {
195            Ok(s) => s,
196            Err(e) => {
197                registry
198                    .warnings
199                    .push(format!("filter {}: read failed: {e}", path.display()));
200                continue;
201            }
202        };
203        let name = path
204            .file_stem()
205            .and_then(|s| s.to_str())
206            .unwrap_or("<unknown>")
207            .to_string();
208        let source = source_for(&path);
209        match parse_filter(&name, &content, source) {
210            Ok(filter) => insert_filter(registry, filter),
211            Err(e) => registry
212                .warnings
213                .push(format!("filter {}: {e}", path.display())),
214        }
215    }
216}
217
218fn insert_filter(registry: &mut FilterRegistry, filter: TomlFilter) {
219    // Higher-priority sources (project > user > builtin) overwrite earlier
220    // entries with the same `match` keyword. Filename-keyed override is also
221    // implicit because higher-priority filters arrive later in build order.
222    for keyword in &filter.matches {
223        registry.by_match.insert(keyword.clone(), filter.clone());
224    }
225    // Replace any existing entry in `all` for the same logical name+source so
226    // re-loads don't duplicate (mainly relevant in tests).
227    registry
228        .all
229        .retain(|existing| !(existing.name == filter.name && existing.source == filter.source));
230    registry.all.push(filter);
231}
232
233#[derive(Debug, Deserialize)]
234struct RawFilter {
235    #[serde(default)]
236    filter: RawFilterMeta,
237    #[serde(default)]
238    strip: Option<RawStrip>,
239    #[serde(default)]
240    truncate: Option<RawTruncate>,
241    #[serde(default)]
242    cap: Option<RawCap>,
243    #[serde(default)]
244    class_cap: Option<RawClassCap>,
245    #[serde(default)]
246    shortcircuit: Option<RawShortcircuit>,
247    #[serde(default)]
248    ansi: Option<RawAnsi>,
249}
250
251#[derive(Debug, Deserialize, Default)]
252struct RawFilterMeta {
253    #[serde(default)]
254    matches: Vec<String>,
255    #[serde(default)]
256    description: Option<String>,
257}
258
259#[derive(Debug, Deserialize, Default)]
260struct RawStrip {
261    #[serde(default)]
262    patterns: Vec<String>,
263}
264
265#[derive(Debug, Deserialize, Default)]
266struct RawTruncate {
267    #[serde(default)]
268    line_max: Option<usize>,
269}
270
271#[derive(Debug, Deserialize, Default)]
272struct RawCap {
273    #[serde(default)]
274    max_lines: Option<usize>,
275    #[serde(default)]
276    keep: Option<String>,
277}
278
279#[derive(Debug, Deserialize, Default)]
280struct RawClassCap {
281    #[serde(default)]
282    class: Option<String>,
283    #[serde(default)]
284    max: Option<usize>,
285    #[serde(default)]
286    patterns: Vec<String>,
287}
288
289#[derive(Debug, Deserialize, Default)]
290struct RawShortcircuit {
291    #[serde(default)]
292    when: Option<String>,
293    #[serde(default)]
294    replacement: Option<String>,
295}
296
297#[derive(Debug, Deserialize, Default)]
298struct RawAnsi {
299    #[serde(default)]
300    strip: Option<bool>,
301}
302
303/// Parse one filter from TOML text. Returns a load-time error string suitable
304/// for surfacing in warnings; never panics.
305pub fn parse_filter(name: &str, content: &str, source: FilterSource) -> Result<TomlFilter, String> {
306    let raw: RawFilter = toml::from_str(content).map_err(|e| format!("invalid TOML: {e}"))?;
307
308    let mut matches = raw.filter.matches;
309    if matches.is_empty() {
310        // Default to filename-as-program when [filter].matches is omitted.
311        matches.push(name.to_string());
312    }
313    for keyword in &matches {
314        if keyword.is_empty() || keyword.contains(char::is_whitespace) {
315            return Err(format!("invalid match keyword {keyword:?}"));
316        }
317    }
318
319    let strip_patterns = raw.strip.unwrap_or_default().patterns;
320    if strip_patterns.len() > MAX_PATTERNS_PER_FILTER {
321        return Err(format!(
322            "too many strip patterns ({} > {MAX_PATTERNS_PER_FILTER})",
323            strip_patterns.len()
324        ));
325    }
326    let mut strip = Vec::with_capacity(strip_patterns.len());
327    for pattern in strip_patterns {
328        let regex =
329            build_regex(&pattern, true).map_err(|e| format!("strip pattern {pattern:?}: {e}"))?;
330        strip.push(regex);
331    }
332
333    let line_max = raw
334        .truncate
335        .as_ref()
336        .and_then(|t| t.line_max)
337        .unwrap_or(DEFAULT_LINE_MAX);
338
339    let cap = raw.cap.unwrap_or_default();
340    let max_lines = cap.max_lines.unwrap_or(DEFAULT_MAX_LINES);
341    let keep = match cap.keep.as_deref() {
342        None => KeepMode::default(),
343        Some("head") => KeepMode::Head,
344        Some("tail") => KeepMode::Tail,
345        Some("middle") => KeepMode::Middle,
346        Some(other) => return Err(format!("invalid cap.keep {other:?}")),
347    };
348
349    let class_cap = match raw.class_cap {
350        Some(raw_class_cap) => {
351            if raw_class_cap.patterns.len() > MAX_PATTERNS_PER_FILTER {
352                return Err(format!(
353                    "too many class_cap patterns ({} > {MAX_PATTERNS_PER_FILTER})",
354                    raw_class_cap.patterns.len()
355                ));
356            }
357            let class = parse_drop_class(raw_class_cap.class.as_deref().unwrap_or("list"))?;
358            let mut patterns = Vec::with_capacity(raw_class_cap.patterns.len());
359            for pattern in raw_class_cap.patterns {
360                let regex = build_regex(&pattern, true)
361                    .map_err(|e| format!("class_cap pattern {pattern:?}: {e}"))?;
362                patterns.push(regex);
363            }
364            Some(TomlClassCap {
365                class,
366                max: raw_class_cap.max.unwrap_or_else(|| class.default_cap()),
367                patterns,
368            })
369        }
370        None => None,
371    };
372
373    let shortcircuit = raw.shortcircuit.unwrap_or_default();
374    let (shortcircuit_when, shortcircuit_replacement) =
375        match (shortcircuit.when, shortcircuit.replacement) {
376            (Some(when), Some(replacement)) => {
377                let regex = build_regex(&when, false)
378                    .map_err(|e| format!("shortcircuit.when {when:?}: {e}"))?;
379                (Some(regex), Some(replacement))
380            }
381            (Some(_), None) => return Err("shortcircuit.when set but replacement missing".into()),
382            (None, Some(_)) => return Err("shortcircuit.replacement set but when missing".into()),
383            (None, None) => (None, None),
384        };
385
386    let strip_ansi = raw.ansi.and_then(|a| a.strip).unwrap_or(true);
387
388    Ok(TomlFilter {
389        name: name.to_string(),
390        source,
391        matches,
392        description: raw.filter.description,
393        strip,
394        line_max,
395        max_lines,
396        keep,
397        class_cap,
398        shortcircuit_when,
399        shortcircuit_replacement,
400        strip_ansi,
401    })
402}
403
404fn build_regex(pattern: &str, multiline: bool) -> Result<Regex, String> {
405    RegexBuilder::new(pattern)
406        .size_limit(REGEX_SIZE_LIMIT)
407        .multi_line(multiline)
408        .build()
409        .map_err(|e| e.to_string())
410}
411
412/// Run the filter pipeline on `output`. Returns compressed text.
413///
414/// Pipeline (in order):
415/// 1. ANSI strip (if `filter.strip_ansi`)
416/// 2. `[strip]` — drop matching lines
417/// 3. `[shortcircuit]` — if remainder matches `when`, return `replacement`
418/// 4. `[truncate]` — middle-truncate per line at `line_max`
419/// 5. `[cap]` — apply `max_lines` with `keep` mode
420pub fn apply_filter(filter: &TomlFilter, output: &str) -> CompressionResult {
421    apply_filter_with_exit_code(filter, output, None)
422}
423
424pub fn apply_filter_with_exit_code(
425    filter: &TomlFilter,
426    output: &str,
427    exit_code: Option<i32>,
428) -> CompressionResult {
429    let stripped_ansi = if filter.strip_ansi {
430        crate::compress::generic::strip_ansi(output)
431    } else {
432        output.to_string()
433    };
434
435    // Phase 1: line strip
436    let original_line_count = stripped_ansi.lines().count();
437    let kept: Vec<&str> = stripped_ansi
438        .lines()
439        .filter(|line| !filter.strip.iter().any(|re| re.is_match(line)))
440        .collect();
441    let strip_removed_lines = kept.len() < original_line_count;
442    let after_strip = kept.join("\n");
443
444    // Phase 2: shortcircuit (against the after-strip body)
445    if !matches!(exit_code, Some(code) if code != 0) {
446        if let (Some(when), Some(replacement)) =
447            (&filter.shortcircuit_when, &filter.shortcircuit_replacement)
448        {
449            if when.is_match(&after_strip) {
450                return CompressionResult::new(replacement.clone());
451            }
452        }
453    }
454
455    // Phase 3: per-line truncation
456    let truncated: Vec<String> = if filter.line_max == usize::MAX {
457        kept.iter().map(|s| (*s).to_string()).collect()
458    } else {
459        kept.iter()
460            .map(|line| truncate_line(line, filter.line_max))
461            .collect()
462    };
463
464    // Phase 4: class cap replaces plain [cap] when present; the two never stack.
465    if let Some(class_cap) = &filter.class_cap {
466        return cap_class_lines(&truncated, class_cap);
467    }
468
469    // Phase 5: plain line cap
470    cap_lines(
471        &truncated,
472        filter.max_lines,
473        filter.keep,
474        strip_removed_lines,
475    )
476}
477
478fn truncate_line(line: &str, line_max: usize) -> String {
479    if line.chars().count() <= line_max {
480        return line.to_string();
481    }
482    // Reserve 3 chars for the ellipsis marker.
483    let keep_each_side = line_max.saturating_sub(3) / 2;
484    let head: String = line.chars().take(keep_each_side).collect();
485    let tail: String = line
486        .chars()
487        .rev()
488        .take(keep_each_side)
489        .collect::<Vec<_>>()
490        .into_iter()
491        .rev()
492        .collect();
493    format!("{head}…{tail}")
494}
495
496fn cap_class_lines(lines: &[String], class_cap: &TomlClassCap) -> CompressionResult {
497    let blocks = lines
498        .iter()
499        .map(|line| {
500            if class_cap.patterns.is_empty()
501                || class_cap
502                    .patterns
503                    .iter()
504                    .any(|pattern| pattern.is_match(line))
505            {
506                ClassifiedBlock::new(class_cap.class, line.clone())
507            } else {
508                ClassifiedBlock::unclassified(line.clone())
509            }
510        })
511        .collect();
512    let capped = cap_classified_blocks_with(blocks, |class| {
513        if class == class_cap.class {
514            class_cap.max
515        } else {
516            class.default_cap()
517        }
518    });
519    CompressionResult::with_class_drops(capped.text, capped.dropped_by_class)
520}
521
522fn cap_lines(
523    lines: &[String],
524    max_lines: usize,
525    keep: KeepMode,
526    had_prior_line_drop: bool,
527) -> CompressionResult {
528    if lines.len() <= max_lines || max_lines == usize::MAX {
529        return CompressionResult::new(lines.join("\n"));
530    }
531
532    if max_lines == 0 {
533        return CompressionResult::with_inner_drop(String::new(), false);
534    }
535
536    let kept = match keep {
537        KeepMode::Head => lines.iter().take(max_lines).cloned().collect::<Vec<_>>(),
538        KeepMode::Tail => lines
539            .iter()
540            .skip(lines.len().saturating_sub(max_lines))
541            .cloned()
542            .collect::<Vec<_>>(),
543        KeepMode::Middle => {
544            let head_count = max_lines / 2;
545            let tail_count = max_lines - head_count;
546            let mut kept: Vec<String> = lines.iter().take(head_count).cloned().collect();
547            kept.extend(lines.iter().skip(lines.len() - tail_count).cloned());
548            kept
549        }
550    };
551    if matches!(keep, KeepMode::Tail) && !had_prior_line_drop {
552        let dropped_prefix_lines = lines.len().saturating_sub(max_lines);
553        CompressionResult::with_prefix_drop(kept.join("\n"), dropped_prefix_lines + 1)
554    } else {
555        CompressionResult::with_inner_drop(kept.join("\n"), false)
556    }
557}
558
559fn parse_drop_class(value: &str) -> Result<DropClass, String> {
560    match value {
561        "error" | "errors" => Ok(DropClass::Error),
562        "warning" | "warnings" => Ok(DropClass::Warning),
563        "failure" | "failures" => Ok(DropClass::Failure),
564        "issue" | "issues" => Ok(DropClass::Issue),
565        "list" | "list_item" | "list-items" | "list items" => Ok(DropClass::List),
566        "inventory" | "inventory_item" | "inventory-items" | "inventory items" => {
567            Ok(DropClass::Inventory)
568        }
569        "timing" | "timing_line" | "timing-lines" | "timing lines" => Ok(DropClass::Timing),
570        other => Err(format!("invalid class_cap.class {other:?}")),
571    }
572}
573
574/// Extract the program name from a command line, stripping leading env-var
575/// assignments (`FOO=bar `) and absolute or relative paths (`/usr/bin/make`,
576/// `./node_modules/.bin/eslint`).
577///
578/// Examples:
579/// - `"make build"` → `Some("make")`
580/// - `"FOO=1 BAR=2 make"` → `Some("make")`
581/// - `"/usr/bin/cargo build"` → `Some("cargo")`
582/// - `""` → `None`
583pub fn program_name(command: &str) -> Option<&str> {
584    for token in command.split_whitespace() {
585        // Skip leading env-var assignments (key=value with no shell metachars).
586        if is_env_assignment(token) {
587            continue;
588        }
589        // Strip path prefix.
590        return Some(basename(token));
591    }
592    None
593}
594
595fn is_env_assignment(token: &str) -> bool {
596    let Some(eq) = token.find('=') else {
597        return false;
598    };
599    let key = &token[..eq];
600    !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
601}
602
603fn basename(token: &str) -> &str {
604    // Handle both Unix and Windows separators.
605    let last_unix = token.rfind('/');
606    let last_win = token.rfind('\\');
607    let split_at = match (last_unix, last_win) {
608        (Some(u), Some(w)) => u.max(w),
609        (Some(u), None) => u,
610        (None, Some(w)) => w,
611        (None, None) => return token,
612    };
613    &token[split_at + 1..]
614}
615
616#[cfg(test)]
617mod tests {
618    use super::*;
619
620    fn parse(content: &str) -> TomlFilter {
621        parse_filter("test", content, FilterSource::Builtin).expect("parse")
622    }
623
624    #[test]
625    fn parses_minimal_filter() {
626        let filter = parse(
627            r#"
628[filter]
629matches = ["make"]
630"#,
631        );
632        assert_eq!(filter.matches, vec!["make"]);
633        assert_eq!(filter.line_max, usize::MAX);
634        assert_eq!(filter.max_lines, usize::MAX);
635        assert!(filter.strip.is_empty());
636        assert!(filter.shortcircuit_when.is_none());
637        assert!(filter.strip_ansi);
638    }
639
640    #[test]
641    fn filename_default_match() {
642        // Empty matches array → filter name is used as the program keyword.
643        let filter = parse_filter("ls", "", FilterSource::Builtin).expect("parse");
644        assert_eq!(filter.matches, vec!["ls"]);
645    }
646
647    #[test]
648    fn rejects_invalid_match_keyword() {
649        let err = parse_filter(
650            "bad",
651            r#"[filter]
652matches = ["has whitespace"]
653"#,
654            FilterSource::Builtin,
655        )
656        .unwrap_err();
657        assert!(err.contains("invalid match keyword"), "got: {err}");
658    }
659
660    #[test]
661    fn rejects_bad_strip_regex() {
662        let err = parse_filter(
663            "bad",
664            r#"
665[filter]
666matches = ["bad"]
667
668[strip]
669patterns = ["[unclosed"]
670"#,
671            FilterSource::Builtin,
672        )
673        .unwrap_err();
674        assert!(err.contains("strip pattern"), "got: {err}");
675    }
676
677    #[test]
678    fn strip_drops_matching_lines() {
679        let filter = parse(
680            r#"
681[filter]
682matches = ["x"]
683
684[strip]
685patterns = ['^Entering directory', '^Leaving directory']
686"#,
687        );
688        let input = "Entering directory `/tmp`\ngcc -c foo.c\nLeaving directory `/tmp`";
689        let out = apply_filter(&filter, input).text;
690        assert_eq!(out, "gcc -c foo.c");
691    }
692
693    #[test]
694    fn shortcircuit_replaces_empty_after_strip() {
695        let filter = parse(
696            r#"
697[filter]
698matches = ["x"]
699
700[strip]
701patterns = ['^make\[\d+\]:.*']
702
703[shortcircuit]
704when = '\A\z'
705replacement = "make: ok"
706"#,
707        );
708        let input = "make[1]: Entering directory `/tmp`\nmake[1]: Leaving directory `/tmp`";
709        let out = apply_filter(&filter, input).text;
710        assert_eq!(out, "make: ok");
711    }
712
713    #[test]
714    fn shortcircuit_line_anchors_do_not_match_inner_blank_lines() {
715        let filter = parse(
716            r#"
717[filter]
718matches = ["x"]
719
720[shortcircuit]
721when = '^\s*$'
722replacement = "ok"
723"#,
724        );
725        let out = apply_filter(&filter, "error\n\nhint").text;
726        assert_eq!(out, "error\n\nhint");
727    }
728
729    #[test]
730    fn cap_tail_keeps_last_n_lines() {
731        let filter = parse(
732            r#"
733[filter]
734matches = ["x"]
735
736[cap]
737max_lines = 3
738keep = "tail"
739"#,
740        );
741        let input = "1\n2\n3\n4\n5";
742        let out = apply_filter(&filter, input);
743        assert_eq!(out.text, "3\n4\n5");
744        assert!(out.had_inner_drop);
745        assert!(out.offset_hint_eligible);
746        assert_eq!(out.text.lines().count(), 3);
747    }
748
749    #[test]
750    fn cap_tail_after_strip_disables_offset_hint() {
751        let filter = parse(
752            r#"
753[filter]
754matches = ["x"]
755
756[strip]
757patterns = ["^strip-me"]
758
759[cap]
760max_lines = 2
761keep = "tail"
762"#,
763        );
764        let out = apply_filter(
765            &filter,
766            "strip-me
7671
7682
7693
7704",
771        );
772
773        assert_eq!(
774            out.text,
775            "3
7764"
777        );
778        assert!(out.had_inner_drop);
779        assert!(!out.offset_hint_eligible);
780        assert_eq!(out.offset_start_line, None);
781    }
782
783    #[test]
784    fn cap_head_keeps_first_n_lines() {
785        let filter = parse(
786            r#"
787[filter]
788matches = ["x"]
789
790[cap]
791max_lines = 2
792keep = "head"
793"#,
794        );
795        let input = "1\n2\n3\n4";
796        let out = apply_filter(&filter, input);
797        assert_eq!(out.text, "1\n2");
798        assert!(out.had_inner_drop);
799        assert!(!out.offset_hint_eligible);
800        assert_eq!(out.text.lines().count(), 2);
801    }
802
803    #[test]
804    fn cap_middle_keeps_head_and_tail() {
805        let filter = parse(
806            r#"
807[filter]
808matches = ["x"]
809
810[cap]
811max_lines = 4
812keep = "middle"
813"#,
814        );
815        let input = "1\n2\n3\n4\n5\n6\n7\n8";
816        let out = apply_filter(&filter, input);
817        assert_eq!(out.text, "1\n2\n7\n8");
818        assert!(out.had_inner_drop);
819        assert!(!out.offset_hint_eligible);
820        assert_eq!(out.text.lines().count(), 4);
821    }
822
823    #[test]
824    fn cap_zero_keeps_no_lines() {
825        let filter = parse(
826            r#"
827[filter]
828matches = ["x"]
829
830[cap]
831max_lines = 0
832keep = "head"
833"#,
834        );
835        let out = apply_filter(&filter, "1\n2\n3");
836        assert_eq!(out.text, "");
837        assert!(out.had_inner_drop);
838    }
839
840    #[test]
841    fn cap_one_keeps_one_tail_line_without_marker() {
842        let filter = parse(
843            r#"
844[filter]
845matches = ["x"]
846
847[cap]
848max_lines = 1
849keep = "tail"
850"#,
851        );
852        let out = apply_filter(&filter, "1\n2\n3");
853        assert_eq!(out.text, "3");
854        assert!(out.had_inner_drop);
855        assert!(out.offset_hint_eligible);
856        assert_eq!(out.text.lines().count(), 1);
857    }
858
859    #[test]
860    fn cap_two_keeps_two_tail_lines_without_marker() {
861        let filter = parse(
862            r#"
863[filter]
864matches = ["x"]
865
866[cap]
867max_lines = 2
868keep = "tail"
869"#,
870        );
871        let out = apply_filter(&filter, "1\n2\n3\n4");
872        assert_eq!(out.text, "3\n4");
873        assert!(out.had_inner_drop);
874        assert!(out.offset_hint_eligible);
875        assert_eq!(out.text.lines().count(), 2);
876    }
877
878    #[test]
879    fn class_cap_replaces_plain_cap_without_stacking() {
880        let filter = parse(
881            r#"
882[filter]
883matches = ["x"]
884
885[class_cap]
886class = "warning"
887max = 2
888patterns = ["^warning"]
889
890[cap]
891max_lines = 1
892keep = "head"
893"#,
894        );
895        let out = apply_filter(&filter, "warning 1\nkeep me\nwarning 2\nwarning 3");
896
897        assert!(out.text.contains("warning 1"));
898        assert!(out.text.contains("keep me"));
899        assert!(out.text.contains("warning 2"));
900        assert!(!out.text.contains("warning 3"));
901        assert_eq!(out.dropped_by_class.get(&DropClass::Warning), Some(&1));
902        assert!(out.text.lines().count() > 1, "plain [cap] must not stack");
903    }
904
905    #[test]
906    fn truncate_per_line() {
907        let filter = parse(
908            r#"
909[filter]
910matches = ["x"]
911
912[truncate]
913line_max = 10
914"#,
915        );
916        let input = "shortline\nthis is a very long line indeed";
917        let out = apply_filter(&filter, input).text;
918        assert!(out.contains("shortline"));
919        assert!(out.contains("…"));
920        assert!(out.lines().any(|l| l.chars().count() <= 10));
921    }
922
923    #[test]
924    fn ansi_strip_default_true() {
925        let filter = parse(
926            r#"
927[filter]
928matches = ["x"]
929"#,
930        );
931        let input = "\x1b[31mred\x1b[0m text";
932        let out = apply_filter(&filter, input).text;
933        assert_eq!(out, "red text");
934    }
935
936    #[test]
937    fn ansi_strip_can_be_disabled() {
938        let filter = parse(
939            r#"
940[filter]
941matches = ["x"]
942
943[ansi]
944strip = false
945"#,
946        );
947        let input = "\x1b[31mred\x1b[0m text";
948        let out = apply_filter(&filter, input).text;
949        assert_eq!(out, input);
950    }
951
952    #[test]
953    fn shortcircuit_runs_on_after_strip_body() {
954        // After stripping all lines we have empty string; shortcircuit `^$` matches.
955        let filter = parse(
956            r#"
957[filter]
958matches = ["x"]
959
960[strip]
961patterns = ['^.*$']
962
963[shortcircuit]
964when = '^$'
965replacement = "ok"
966"#,
967        );
968        assert_eq!(apply_filter(&filter, "anything\nat all").text, "ok");
969    }
970
971    #[test]
972    fn program_name_handles_env_and_paths() {
973        assert_eq!(program_name("make build"), Some("make"));
974        assert_eq!(program_name("FOO=1 BAR=2 make build"), Some("make"));
975        assert_eq!(program_name("/usr/bin/cargo build"), Some("cargo"));
976        assert_eq!(program_name("./node_modules/.bin/eslint ."), Some("eslint"));
977        // Path is the program; subsequent tokens are arguments.
978        assert_eq!(program_name("FOO=bar /opt/x/y subcmd"), Some("y"));
979        assert_eq!(program_name(""), None);
980        assert_eq!(program_name("   "), None);
981    }
982
983    #[test]
984    fn program_name_unquoted_windows_path() {
985        // Unquoted Windows paths with spaces won't round-trip cleanly because
986        // split_whitespace breaks on the embedded space. This is acceptable —
987        // bash would fail to execute these without quoting too, and AFT's
988        // shell handlers run the literal command. Document the behavior.
989        // basename strips through the last backslash even on the broken-by-whitespace
990        // first token, leaving "Program".
991        assert_eq!(
992            program_name(r"C:\Program Files\Git\bin\git.exe status"),
993            Some("Program")
994        );
995    }
996
997    #[test]
998    fn program_name_does_not_skip_non_assignment_token_with_equals() {
999        // `=value` (no key) is not an env assignment.
1000        assert_eq!(program_name("=oops echo hi"), Some("=oops"));
1001    }
1002
1003    #[test]
1004    fn registry_lookup_by_program_name() {
1005        let registry = build_registry(
1006            &[(
1007                "make",
1008                r#"[filter]
1009matches = ["make"]
1010
1011[strip]
1012patterns = ['^Entering']
1013"#,
1014            )],
1015            None,
1016            None,
1017        );
1018        let f = registry.lookup("make build foo").unwrap();
1019        assert_eq!(f.matches, vec!["make"]);
1020        assert!(matches!(f.source, FilterSource::Builtin));
1021    }
1022
1023    #[test]
1024    fn registry_user_overrides_builtin() {
1025        let tmp = tempfile::tempdir().unwrap();
1026        let user_path = tmp.path().join("make.toml");
1027        fs::write(
1028            &user_path,
1029            r#"[filter]
1030matches = ["make"]
1031description = "user override"
1032"#,
1033        )
1034        .unwrap();
1035
1036        let registry = build_registry(
1037            &[(
1038                "make",
1039                r#"[filter]
1040matches = ["make"]
1041description = "builtin"
1042"#,
1043            )],
1044            Some(tmp.path()),
1045            None,
1046        );
1047        let f = registry.lookup("make build").unwrap();
1048        assert_eq!(f.description.as_deref(), Some("user override"));
1049        assert!(matches!(f.source, FilterSource::User { .. }));
1050    }
1051
1052    #[test]
1053    fn registry_project_overrides_user() {
1054        let user_dir = tempfile::tempdir().unwrap();
1055        let project_dir = tempfile::tempdir().unwrap();
1056        fs::write(
1057            user_dir.path().join("make.toml"),
1058            r#"[filter]
1059matches = ["make"]
1060description = "user"
1061"#,
1062        )
1063        .unwrap();
1064        fs::write(
1065            project_dir.path().join("make.toml"),
1066            r#"[filter]
1067matches = ["make"]
1068description = "project"
1069"#,
1070        )
1071        .unwrap();
1072
1073        let registry = build_registry(&[], Some(user_dir.path()), Some(project_dir.path()));
1074        let f = registry.lookup("make").unwrap();
1075        assert_eq!(f.description.as_deref(), Some("project"));
1076        assert!(matches!(f.source, FilterSource::Project { .. }));
1077    }
1078
1079    #[test]
1080    fn bad_filter_files_warn_not_panic() {
1081        let tmp = tempfile::tempdir().unwrap();
1082        fs::write(
1083            tmp.path().join("good.toml"),
1084            r#"[filter]
1085matches = ["good"]
1086"#,
1087        )
1088        .unwrap();
1089        fs::write(tmp.path().join("bad.toml"), "not valid = toml = at all =").unwrap();
1090
1091        let registry = build_registry(&[], Some(tmp.path()), None);
1092        assert!(registry.lookup("good").is_some());
1093        assert!(registry.lookup("bad").is_none());
1094        assert!(
1095            registry.warnings().iter().any(|w| w.contains("bad.toml")),
1096            "warnings: {:?}",
1097            registry.warnings()
1098        );
1099    }
1100
1101    #[test]
1102    fn missing_dir_does_not_warn() {
1103        let registry = build_registry(&[], Some(Path::new("/nonexistent/path/12345")), None);
1104        assert!(registry.warnings().is_empty());
1105    }
1106}