Skip to main content

aft/compress/
mod.rs

1//! Output compression for hoisted bash.
2//!
3//! Compression has five tiers, tried in this order:
4//!
5//! 1. **Specific Rust [`Compressor`] modules** — hand-written parsers for
6//!    specific tools identified by tool tokens (for example `vitest`, `eslint`,
7//!    `cargo`, `git`). These win before broad package-manager compressors.
8//! 2. **Output-shape [`Compressor`] sniffers** — inner-tool parsers that can
9//!    recognize their own private summaries even when invoked through wrappers
10//!    such as `npm test`, `make test`, or `./scripts/check.sh`.
11//! 3. **Package-manager [`Compressor`] modules** — broad head-token matchers
12//!    (`npm`, `pnpm`, `bun`) that compress unclaimed package-manager output.
13//! 4. **TOML filters** — declarative strip + truncate + cap + shortcircuit
14//!    rules for the long tail of CLI tools. Loaded from builtin / user /
15//!    project sources via [`toml_filter::build_registry`]. See
16//!    [`toml_filter`] and [`trust`] for the trust model.
17//! 5. **[`generic`] fallback** — ANSI strip + consecutive-dedup. The
18//!    background bash registry owns the shared final output cap.
19
20pub mod biome;
21pub mod builtin_filters;
22pub mod bun;
23pub mod caps;
24pub mod cargo;
25pub mod eslint;
26pub mod generic;
27pub mod git;
28pub mod go;
29pub mod mypy;
30pub mod next;
31pub mod npm;
32pub mod playwright;
33pub mod pnpm;
34pub mod prettier;
35pub mod pytest;
36pub mod ruff;
37pub mod toml_filter;
38pub mod trust;
39pub mod tsc;
40pub mod vitest;
41
42use crate::context::AppContext;
43use crate::harness::Harness;
44use biome::BiomeCompressor;
45use bun::BunCompressor;
46use caps::DropClass;
47use cargo::CargoCompressor;
48use eslint::EslintCompressor;
49use generic::{strip_ansi, GenericCompressor};
50use git::GitCompressor;
51use go::{GoCompressor, GolangciLintCompressor};
52use mypy::MypyCompressor;
53use next::NextCompressor;
54use npm::NpmCompressor;
55use playwright::PlaywrightCompressor;
56use pnpm::PnpmCompressor;
57use prettier::PrettierCompressor;
58use pytest::PytestCompressor;
59use ruff::RuffCompressor;
60use std::collections::BTreeMap;
61use std::fs;
62use std::path::{Path, PathBuf};
63use std::sync::{Arc, RwLock};
64use toml_filter::{apply_filter, FilterRegistry};
65use tsc::TscCompressor;
66use vitest::VitestCompressor;
67
68/// Thread-safe handle to the TOML filter registry. Shared between
69/// `AppContext::filter_registry()` (for direct use in command handlers) and
70/// `BgTaskRegistry`'s output compression closure (for use from the watchdog
71/// thread).
72pub type SharedFilterRegistry = Arc<RwLock<FilterRegistry>>;
73
74/// How specifically a compressor identifies a command.
75///
76/// `Specific` matchers (vitest, eslint, biome, tsc, pytest, cargo, git)
77/// claim a command by recognising a SPECIFIC tool name as a token anywhere
78/// in the command line — `npx vitest`, `pnpm exec eslint --fix`,
79/// `bun run vitest`, etc.
80///
81/// `PackageManager` matchers (npm, pnpm, bun) claim a command by its
82/// HEAD token alone (e.g. `npm`, `bun`) regardless of what subcommand
83/// follows. They are intentionally broad — when a `bun run vitest` is
84/// not claimed by VitestCompressor, BunCompressor still wants the chance
85/// to compress generic bun output for unknown subcommands.
86///
87/// Dispatch order: Specific command tier first, then output-shape sniffers
88/// (Specific before PackageManager), then PackageManager command tier, then
89/// TOML filters, then GenericCompressor.
90#[derive(Clone, Copy, Debug, PartialEq, Eq)]
91pub enum Specificity {
92    Specific,
93    PackageManager,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct CompressionResult {
98    pub text: String,
99    pub dropped_by_class: BTreeMap<DropClass, usize>,
100    pub had_inner_drop: bool,
101    pub offset_hint_eligible: bool,
102    pub offset_start_line: Option<usize>,
103}
104
105impl CompressionResult {
106    pub fn new(text: impl Into<String>) -> Self {
107        Self {
108            text: text.into(),
109            dropped_by_class: BTreeMap::new(),
110            had_inner_drop: false,
111            offset_hint_eligible: true,
112            offset_start_line: None,
113        }
114    }
115
116    pub fn with_class_drops(
117        text: impl Into<String>,
118        dropped_by_class: BTreeMap<DropClass, usize>,
119    ) -> Self {
120        let had_inner_drop = !dropped_by_class.is_empty();
121        Self {
122            text: text.into(),
123            dropped_by_class,
124            had_inner_drop,
125            offset_hint_eligible: !had_inner_drop,
126            offset_start_line: None,
127        }
128    }
129
130    pub fn with_inner_drop(text: impl Into<String>, offset_hint_eligible: bool) -> Self {
131        Self {
132            text: text.into(),
133            dropped_by_class: BTreeMap::new(),
134            had_inner_drop: true,
135            offset_hint_eligible,
136            offset_start_line: None,
137        }
138    }
139
140    pub fn with_prefix_drop(text: impl Into<String>, offset_start_line: usize) -> Self {
141        Self {
142            text: text.into(),
143            dropped_by_class: BTreeMap::new(),
144            had_inner_drop: true,
145            offset_hint_eligible: true,
146            offset_start_line: Some(offset_start_line),
147        }
148    }
149
150    pub fn has_semantic_drops(&self) -> bool {
151        !self.dropped_by_class.is_empty()
152    }
153
154    pub fn has_any_drop(&self) -> bool {
155        self.had_inner_drop || self.has_semantic_drops()
156    }
157
158    pub fn map_text<F>(mut self, f: F) -> Self
159    where
160        F: FnOnce(&str) -> String,
161    {
162        self.text = f(&self.text);
163        self
164    }
165}
166
167impl std::fmt::Display for CompressionResult {
168    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
169        f.write_str(&self.text)
170    }
171}
172
173impl std::ops::Deref for CompressionResult {
174    type Target = str;
175
176    fn deref(&self) -> &Self::Target {
177        &self.text
178    }
179}
180
181impl PartialEq<&str> for CompressionResult {
182    fn eq(&self, other: &&str) -> bool {
183        self.text == *other
184    }
185}
186
187impl PartialEq<String> for CompressionResult {
188    fn eq(&self, other: &String) -> bool {
189        self.text == *other
190    }
191}
192
193impl From<String> for CompressionResult {
194    fn from(text: String) -> Self {
195        Self::new(text)
196    }
197}
198
199impl From<&str> for CompressionResult {
200    fn from(text: &str) -> Self {
201        Self::new(text)
202    }
203}
204
205/// A `Compressor` knows how to reduce one specific command's output to fewer
206/// tokens while preserving the information the agent needs.
207pub trait Compressor: Send + Sync {
208    /// Returns true if this compressor handles the given command head + args.
209    /// Called after generic detection (ANSI strip, dedup) so this is per-command logic only.
210    fn matches(&self, command: &str) -> bool;
211
212    /// Compress the output. Original is left untouched if compression fails.
213    fn compress(&self, command: &str, output: &str) -> CompressionResult;
214
215    fn specificity(&self) -> Specificity {
216        Specificity::Specific
217    }
218
219    /// Returns true when this compressor recognizes output produced by its
220    /// inner tool even if the command head was a wrapper (`npm test`,
221    /// `make test`, `./scripts/check.sh`, etc.). Wrapper compressors should
222    /// not override this; they remain command-only.
223    fn matches_output(&self, _output: &str) -> bool {
224        false
225    }
226
227    /// Compress output after an output-shape match. Compressors that branch by
228    /// subcommand override this to jump directly to the matched branch.
229    fn compress_output_match(&self, output: &str) -> CompressionResult {
230        self.compress("", output)
231    }
232}
233
234/// Top-level dispatch: try specific Rust modules, output-shape sniffers, package-manager modules, TOML filters, then generic fallback.
235///
236/// Convenience wrapper for command handlers that already hold an `AppContext`.
237/// Backs onto [`compress_with_registry`] which is thread-safe for use from the
238/// `BgTaskRegistry` watchdog.
239pub fn compress(command: &str, output: String, ctx: &AppContext) -> CompressionResult {
240    if !ctx.config().experimental_bash_compress {
241        return CompressionResult::new(output);
242    }
243    let registry_handle = ctx.shared_filter_registry();
244    let guard = match registry_handle.read() {
245        Ok(g) => g,
246        Err(poisoned) => poisoned.into_inner(),
247    };
248    compress_with_registry(command, &output, &guard)
249}
250
251/// Thread-safe dispatch that does not need `AppContext`. Caller is responsible
252/// for the `experimental_bash_compress` gate (the registry has no opinion).
253///
254/// Used from background threads (notably the `BgTaskRegistry` watchdog and
255/// completion-frame emitter) where lock-free access is required.
256pub fn compress_with_registry(
257    command: &str,
258    output: &str,
259    registry: &FilterRegistry,
260) -> CompressionResult {
261    let stripped_for_generic = strip_ansi(output);
262
263    // Normalize the command so shell-prefix idioms like `cd /path && bun test`,
264    // `env FOO=bar npm install`, `timeout 30 cargo build`, and `(cd /path; cmd)`
265    // don't hide the real command head from per-module matchers. Without this,
266    // BunCompressor/NpmCompressor/PnpmCompressor (which match by head-token)
267    // silently fall through to generic in most agent-issued bash calls.
268    let normalized = normalize_command_for_dispatch(command);
269    let dispatch_cmd = normalized.as_deref().unwrap_or(command);
270
271    let compressors: [&dyn Compressor; 17] = [
272        &GitCompressor,
273        &CargoCompressor,
274        &TscCompressor,
275        &NpmCompressor,
276        &BunCompressor,
277        &PnpmCompressor,
278        &PytestCompressor,
279        &EslintCompressor,
280        &VitestCompressor,
281        &BiomeCompressor,
282        &PrettierCompressor,
283        &RuffCompressor,
284        &MypyCompressor,
285        &GoCompressor,
286        &GolangciLintCompressor,
287        &PlaywrightCompressor,
288        &NextCompressor,
289    ];
290
291    // Tier 1a: Specific command compressors win first.
292    for compressor in compressors
293        .iter()
294        .filter(|c| c.specificity() == Specificity::Specific)
295    {
296        if compressor.matches(dispatch_cmd) {
297            return compressor.compress(dispatch_cmd, &stripped_for_generic);
298        }
299    }
300
301    // Tier 1b: Output-shape sniffers handle wrapped inner tools before broad
302    // package managers or TOML filters can consume `npm test`, `make test`,
303    // `just test`, etc. Collision order is deterministic: Specific compressors
304    // in registry order win before PackageManager sniffers (currently Bun's
305    // test-output signature).
306    for specificity in [Specificity::Specific, Specificity::PackageManager] {
307        for compressor in compressors
308            .iter()
309            .filter(|c| c.specificity() == specificity)
310        {
311            if compressor.matches_output(&stripped_for_generic) {
312                return compressor.compress_output_match(&stripped_for_generic);
313            }
314        }
315    }
316
317    // Tier 1c: PackageManager compressors get unclaimed commands.
318    for compressor in compressors
319        .iter()
320        .filter(|c| c.specificity() == Specificity::PackageManager)
321    {
322        if compressor.matches(dispatch_cmd) {
323            return compressor.compress(dispatch_cmd, &stripped_for_generic);
324        }
325    }
326
327    // Tier 2: TOML filters. Pass raw output so `[ansi].strip = false` filters
328    // can intentionally match escape sequences; `apply_filter` owns ANSI policy.
329    if let Some(filter) = registry.lookup(dispatch_cmd) {
330        return apply_filter(filter, output);
331    }
332
333    // Tier 3: generic fallback.
334    GenericCompressor.compress(command, &stripped_for_generic)
335}
336
337/// Build the registry of TOML filters from the standard sources for the
338/// active context. Called lazily by [`AppContext::filter_registry`].
339///
340/// Layering (highest priority first):
341/// 1. Project filters at `<project_root>/.aft/filters/*.toml` — loaded only
342///    when the project is in the trusted set (see [`trust`]).
343/// 2. User filters at `<storage_dir>/<harness>/filters/*.toml`.
344/// 3. Builtin filters compiled into the binary via [`builtin_filters`].
345pub fn build_registry_for_context(ctx: &AppContext) -> FilterRegistry {
346    let harness = ctx.harness.borrow().unwrap_or(Harness::Opencode);
347    let config = ctx.config();
348    let storage_dir = config.storage_dir.clone();
349    let project_root = config.project_root.clone();
350    drop(config);
351
352    let user_dir = storage_dir.as_ref().map(|dir| {
353        repair_legacy_user_filter_dir(dir, harness);
354        user_filter_dir(dir, harness)
355    });
356    let project_dir = match (project_root.as_ref(), storage_dir.as_ref()) {
357        (Some(root), Some(storage)) => {
358            if trust::is_project_trusted(Some(storage), root) {
359                Some(root.join(".aft").join("filters"))
360            } else {
361                None
362            }
363        }
364        _ => None,
365    };
366
367    toml_filter::build_registry(
368        builtin_filters::ALL,
369        user_dir.as_deref(),
370        project_dir.as_deref(),
371    )
372}
373
374/// Normalize a shell command for compressor dispatch by walking past
375/// common shell-prefix idioms so the REAL command head is what matchers
376/// see. Returns `Some(normalized)` if a prefix was stripped, `None` if
377/// the input was already a bare command.
378///
379/// Handles:
380///   - `cd /path && cmd ...`            → `cmd ...`
381///   - `cd /path; cmd ...`              → `cmd ...`
382///   - `env FOO=bar [BAR=baz ...] cmd`  → `cmd ...`
383///   - `FOO=bar [BAR=baz ...] cmd`      → `cmd ...`
384///   - `timeout 30 cmd ...`             → `cmd ...`
385///   - `nohup cmd ...`                  → `cmd ...`
386///   - `(cd /path && cmd ...)`          → `cmd ...`   (trailing `)` is kept; harmless for matchers)
387///
388/// Real agent invocations almost always wrap their actual command in
389/// `cd "$ROOT" && ...`. Without this normalization, BunCompressor /
390/// NpmCompressor / PnpmCompressor (head-token matchers) and the
391/// pkg-manager filters silently fall through to GenericCompressor for
392/// the majority of agent bash calls.
393///
394/// The normalizer is conservative: it only strips well-defined idioms
395/// and bails on anything ambiguous, so a malformed command degrades to
396/// the same dispatch behaviour as before this helper existed.
397pub fn normalize_command_for_dispatch(command: &str) -> Option<String> {
398    let trimmed = command.trim_start();
399    if trimmed.is_empty() {
400        return None;
401    }
402
403    // Step 1: peel a leading `(` from group-expression idioms.
404    let (open_paren, after_paren) = if let Some(rest) = trimmed.strip_prefix('(') {
405        (true, rest.trim_start())
406    } else {
407        (false, trimmed)
408    };
409
410    let mut current = after_paren.to_string();
411    let mut changed = open_paren;
412
413    // Step 2: iteratively peel known shell prefixes.
414    loop {
415        // `VAR=value cmd ...` (possibly multiple assignment words). This must
416        // run before head-token matching so package-manager/Rust compressors
417        // still see the real command for `NODE_ENV=production npm install`.
418        if let Some(stripped) = strip_leading_assignment_prefix(&current) {
419            current = stripped;
420            changed = true;
421            continue;
422        }
423
424        let head: String = current.split_whitespace().next().unwrap_or("").to_string();
425
426        // `cd <path> && ...` or `cd <path>; ...`
427        if head == "cd" {
428            // Find the next `&&` or `;` token; everything after that is the real command.
429            // Use char-level scan because `&&` is two chars not separated by whitespace.
430            if let Some(stripped) = strip_cd_prefix(&current) {
431                current = stripped;
432                changed = true;
433                continue;
434            }
435        }
436
437        // `env VAR=val [VAR=val ...] cmd ...`
438        if head == "env" {
439            if let Some(stripped) = strip_env_prefix(&current) {
440                current = stripped;
441                changed = true;
442                continue;
443            }
444        }
445
446        // `timeout <N> cmd ...` or `timeout <duration-with-unit> cmd ...`
447        if head == "timeout" {
448            if let Some(stripped) = strip_timeout_prefix(&current) {
449                current = stripped;
450                changed = true;
451                continue;
452            }
453        }
454
455        // `nohup cmd ...`
456        if head == "nohup" {
457            if let Some(rest) = current.strip_prefix("nohup").and_then(|s| {
458                let trimmed = s.trim_start();
459                if trimmed.is_empty() {
460                    None
461                } else {
462                    Some(trimmed.to_string())
463                }
464            }) {
465                current = rest;
466                changed = true;
467                continue;
468            }
469        }
470
471        break;
472    }
473
474    if changed {
475        Some(current)
476    } else {
477        None
478    }
479}
480
481fn strip_cd_prefix(command: &str) -> Option<String> {
482    // Look for `&&` or `;` outside of quotes.
483    let bytes = command.as_bytes();
484    let mut in_single = false;
485    let mut in_double = false;
486    let mut i = 0;
487    while i < bytes.len() {
488        let ch = bytes[i] as char;
489        if !in_double && ch == '\'' {
490            in_single = !in_single;
491        } else if !in_single && ch == '"' {
492            in_double = !in_double;
493        } else if !in_single && !in_double {
494            if ch == '&' && i + 1 < bytes.len() && bytes[i + 1] as char == '&' {
495                let rest = command[i + 2..].trim_start();
496                if rest.is_empty() {
497                    return None;
498                }
499                return Some(rest.to_string());
500            }
501            if ch == ';' {
502                let rest = command[i + 1..].trim_start();
503                if rest.is_empty() {
504                    return None;
505                }
506                return Some(rest.to_string());
507            }
508        }
509        i += 1;
510    }
511    None
512}
513
514fn strip_env_prefix(command: &str) -> Option<String> {
515    // env <ASSIGN>... <cmd> ...
516    let rest = command.strip_prefix("env")?.trim_start();
517    strip_leading_assignment_prefix(rest)
518}
519
520fn strip_leading_assignment_prefix(command: &str) -> Option<String> {
521    let mut index = 0usize;
522    let mut consumed_assignment = false;
523
524    loop {
525        index = skip_whitespace(command, index);
526        if index >= command.len() {
527            break;
528        }
529
530        let word_end = shell_word_end(command, index)?;
531        if word_end == index {
532            break;
533        }
534
535        let word = &command[index..word_end];
536        if !is_env_assignment(word) {
537            break;
538        }
539
540        consumed_assignment = true;
541        index = word_end;
542    }
543
544    if !consumed_assignment {
545        return None;
546    }
547
548    let after = command[index..].trim_start();
549    if after.is_empty() {
550        None
551    } else {
552        Some(after.to_string())
553    }
554}
555
556fn skip_whitespace(input: &str, mut index: usize) -> usize {
557    while index < input.len() {
558        let Some(ch) = input[index..].chars().next() else {
559            break;
560        };
561        if !ch.is_whitespace() {
562            break;
563        }
564        index += ch.len_utf8();
565    }
566    index
567}
568
569fn shell_word_end(command: &str, start: usize) -> Option<usize> {
570    let mut in_single = false;
571    let mut in_double = false;
572    let mut escaped = false;
573
574    for (offset, ch) in command[start..].char_indices() {
575        let index = start + offset;
576
577        if escaped {
578            escaped = false;
579            continue;
580        }
581
582        if ch == '\\' && !in_single {
583            escaped = true;
584            continue;
585        }
586
587        if ch == '\'' && !in_double {
588            in_single = !in_single;
589            continue;
590        }
591
592        if ch == '"' && !in_single {
593            in_double = !in_double;
594            continue;
595        }
596
597        if !in_single && !in_double && (ch.is_whitespace() || matches!(ch, ';' | '&' | '|')) {
598            return Some(index);
599        }
600    }
601
602    if in_single || in_double || escaped {
603        None
604    } else {
605        Some(command.len())
606    }
607}
608
609fn is_env_assignment(token: &str) -> bool {
610    if token.starts_with('-') {
611        return false;
612    }
613    let Some((name, _value)) = token.split_once('=') else {
614        return false;
615    };
616    let mut chars = name.chars();
617    let Some(first) = chars.next() else {
618        return false;
619    };
620    (first.is_ascii_alphabetic() || first == '_')
621        && chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
622}
623
624fn strip_timeout_prefix(command: &str) -> Option<String> {
625    let rest = command.strip_prefix("timeout")?.trim_start();
626    // Next token must look like a duration (digits, optional trailing unit s/m/h).
627    let mut iter = rest.splitn(2, char::is_whitespace);
628    let duration = iter.next()?;
629    let after = iter.next()?.trim_start();
630    if after.is_empty() || !looks_like_duration(duration) {
631        return None;
632    }
633    Some(after.to_string())
634}
635
636fn looks_like_duration(token: &str) -> bool {
637    if token.is_empty() {
638        return false;
639    }
640    let mut chars = token.chars().peekable();
641    let mut saw_digit = false;
642    while let Some(&ch) = chars.peek() {
643        if ch.is_ascii_digit() {
644            saw_digit = true;
645            chars.next();
646        } else {
647            break;
648        }
649    }
650    if !saw_digit {
651        return false;
652    }
653    match chars.next() {
654        None => true,
655        Some(unit) => matches!(unit, 's' | 'm' | 'h' | 'd') && chars.next().is_none(),
656    }
657}
658
659/// Resolve the harness-scoped user-filter directory for an arbitrary storage_dir.
660/// Used by `aft doctor filters` to inspect filters without needing a live AppContext.
661pub fn user_filter_dir(storage_dir: &Path, harness: Harness) -> PathBuf {
662    storage_dir.join(harness.as_str()).join("filters")
663}
664
665fn legacy_user_filter_dir(storage_dir: &Path) -> PathBuf {
666    storage_dir.join("filters")
667}
668
669/// Move filters written by the short-lived root-scoped v0.27 layout into the
670/// active harness directory. Existing harness files win; colliding root files
671/// are left in place so we never overwrite user-authored filters.
672pub(crate) fn repair_legacy_user_filter_dir(storage_dir: &Path, harness: Harness) {
673    let legacy_dir = legacy_user_filter_dir(storage_dir);
674    if !legacy_dir.exists() {
675        return;
676    }
677
678    let entries = match fs::read_dir(&legacy_dir) {
679        Ok(entries) => entries.filter_map(Result::ok).collect::<Vec<_>>(),
680        Err(_) => return,
681    };
682    if entries.is_empty() {
683        let _ = fs::remove_dir(&legacy_dir);
684        return;
685    }
686
687    let harness_dir = user_filter_dir(storage_dir, harness);
688    if fs::create_dir_all(&harness_dir).is_err() {
689        return;
690    }
691
692    for entry in entries {
693        let target = harness_dir.join(entry.file_name());
694        if target.exists() {
695            continue;
696        }
697        let _ = fs::rename(entry.path(), target);
698    }
699
700    if fs::read_dir(&legacy_dir)
701        .map(|mut entries| entries.next().is_none())
702        .unwrap_or(false)
703    {
704        let _ = fs::remove_dir(&legacy_dir);
705    }
706}
707
708/// Resolve the project-filter directory for an arbitrary project root.
709/// Returns the directory regardless of trust state — caller must check trust
710/// separately if it wants to gate loading.
711pub fn project_filter_dir(project_root: &Path) -> PathBuf {
712    project_root.join(".aft").join("filters")
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718
719    #[test]
720    fn user_and_project_filter_dir_helpers() {
721        let storage = Path::new("/tmp/aft-storage");
722        assert_eq!(
723            user_filter_dir(storage, Harness::Opencode),
724            Path::new("/tmp/aft-storage/opencode/filters")
725        );
726
727        let project = Path::new("/repo");
728        assert_eq!(project_filter_dir(project), Path::new("/repo/.aft/filters"));
729    }
730
731    #[test]
732    fn repair_legacy_user_filter_dir_moves_root_filters_without_overwrite() {
733        let temp = tempfile::tempdir().unwrap();
734        let storage = temp.path();
735        fs::create_dir_all(storage.join("filters")).unwrap();
736        fs::create_dir_all(storage.join("opencode/filters")).unwrap();
737        fs::write(storage.join("filters/root-only.toml"), "root").unwrap();
738        fs::write(storage.join("filters/collides.toml"), "root").unwrap();
739        fs::write(storage.join("opencode/filters/collides.toml"), "harness").unwrap();
740
741        repair_legacy_user_filter_dir(storage, Harness::Opencode);
742
743        assert_eq!(
744            fs::read_to_string(storage.join("opencode/filters/root-only.toml")).unwrap(),
745            "root"
746        );
747        assert_eq!(
748            fs::read_to_string(storage.join("opencode/filters/collides.toml")).unwrap(),
749            "harness"
750        );
751        assert_eq!(
752            fs::read_to_string(storage.join("filters/collides.toml")).unwrap(),
753            "root"
754        );
755        assert!(!storage.join("filters/root-only.toml").exists());
756    }
757}
758
759#[cfg(test)]
760mod dispatch_specificity_tests {
761    use super::*;
762    use crate::compress::toml_filter::FilterRegistry;
763
764    fn empty_registry() -> FilterRegistry {
765        FilterRegistry::default()
766    }
767
768    /// Helper: assert that a given command would be claimed by a specific
769    /// compressor by reading the output marker the compressor produces.
770    /// (We can't easily compare Compressor instances by identity, so we
771    /// dispatch and check for module-distinctive markers in the output.)
772    fn dispatch(cmd: &str, output: &str) -> String {
773        compress_with_registry(cmd, output, &empty_registry()).text
774    }
775
776    #[test]
777    fn generic_dispatch_does_not_classify_error_or_warning_words() {
778        let result = compress_with_registry(
779            "unknown-tool",
780            "error: this is just a log line\nwarning: this too",
781            &empty_registry(),
782        );
783
784        assert!(result.dropped_by_class.is_empty());
785        assert!(!result.had_inner_drop);
786        assert!(result.text.contains("error: this is just a log line"));
787    }
788
789    #[test]
790    fn bun_run_vitest_routes_to_vitest_not_generic() {
791        // VitestCompressor preserves PASS/FAIL markers and "Tests:" summary.
792        // BunCompressor's `Some("run")` arm currently goes to generic which
793        // would middle-truncate. Use a small vitest-shaped output and assert
794        // the vitest formatter's output marker is present.
795        let output = "Test Files  1 passed (1)\n     Tests  4 passed (4)\n  Start at  10:00:00\n  Duration  120ms\n";
796        let compressed = dispatch("bun run vitest", output);
797        // Assert vitest path took it: the vitest text summary keeps "Tests" / "Test Files" lines
798        assert!(compressed.contains("Tests") || compressed.contains("Test Files"));
799    }
800
801    #[test]
802    fn npm_test_routes_to_vitest_when_output_is_vitest_shaped() {
803        // `npm test` has no vitest token, so this proves the output-shape
804        // tier runs before the broad NpmCompressor PackageManager tier.
805        let output = "RERUN src/foo.test.ts x1\nFAIL src/foo.test.ts\nTest Files  1 failed (1)\nDuration    120ms\n";
806        let compressed = dispatch("npm test", output);
807        assert!(compressed.contains("FAIL src/foo.test.ts"));
808        assert!(compressed.contains("Duration    120ms"));
809        assert!(!compressed.contains("RERUN"));
810    }
811
812    #[test]
813    fn bun_run_vitest_token_match_wins_over_bun_head_match() {
814        // Concrete proof the new dispatch works: a command where Bun would
815        // otherwise have claimed it.
816        let output = "PASS src/a.test.ts (1)\n PASS src/b.test.ts (1)\nTest Files  2 passed (2)\n     Tests  4 passed (4)\n";
817        let compressed = dispatch("bun run vitest run", output);
818        // Vitest preserves PASS lines and "Tests:" summary.
819        assert!(compressed.contains("Test Files") || compressed.contains("PASS"));
820    }
821
822    #[test]
823    fn bunx_jest_routes_to_vitest_module() {
824        let output = "PASS src/foo.test.js (1.2s)\nTest Suites: 1 passed, 1 total\nTests:       3 passed, 3 total\n";
825        let compressed = dispatch("bunx jest --json", output);
826        assert!(compressed.contains("Tests:") && compressed.contains("Test Suites"));
827    }
828
829    #[test]
830    fn pnpm_run_vitest_routes_to_vitest() {
831        let output = "Test Files  1 passed (1)\n     Tests  10 passed (10)\n";
832        let compressed = dispatch("pnpm run vitest", output);
833        assert!(compressed.contains("Tests") || compressed.contains("Test Files"));
834    }
835
836    #[test]
837    fn npx_eslint_routes_to_eslint_not_generic() {
838        let output = "\n/tmp/a.js\n  1:1  error  'foo' is defined but never used  no-unused-vars\n\n✖ 1 problem (1 error, 0 warnings)\n";
839        let compressed = dispatch("npx eslint .", output);
840        // EslintCompressor preserves rule IDs and the ✖ summary.
841        assert!(compressed.contains("no-unused-vars") || compressed.contains("✖"));
842    }
843
844    #[test]
845    fn npm_run_lint_without_linter_output_shape_falls_back() {
846        // `npm run lint` has no eslint token, and this output has no eslint
847        // summary signature, so it should remain package-manager generic.
848        let output = "> my-project@1.0.0 lint\n> eslint .\n\nAll good.\n";
849        let compressed = dispatch("npm run lint", output);
850        assert!(compressed.contains("All good."));
851    }
852
853    #[test]
854    fn bun_test_still_routes_to_bun_test_compressor() {
855        // Bun.test is the v0.28.2 fix — make sure specificity dispatch
856        // doesn't accidentally break it. The Bun module's `Some("test")`
857        // arm should still claim this when no Specific matcher does.
858        // BunTestCompressor doesn't exist as a separate module — the
859        // BunCompressor.compress() routes Some("test") to its inner
860        // compress_test() function. The relevant assertion: this still
861        // produces bun-test-shaped output, not generic-truncated output.
862        let output = "bun test v1.3.14\n\nsrc/foo.test.ts:\n(pass) my test [0.5ms]\n\n 1 pass\n 0 fail\n 1 expect() calls\nRan 1 tests across 1 files. [1.00ms]\n";
863        let compressed = dispatch("bun test", output);
864        assert!(compressed.contains("(pass)") || compressed.contains("1 pass"));
865    }
866
867    #[test]
868    fn bunx_vitest_routes_to_vitest() {
869        let output = "Test Files  1 passed (1)\n     Tests  3 passed (3)\n";
870        let compressed = dispatch("bunx vitest run", output);
871        assert!(compressed.contains("Tests") || compressed.contains("Test Files"));
872    }
873
874    #[test]
875    fn cargo_test_still_routes_to_cargo() {
876        // Regression: specificity reordering must not break commands that
877        // already worked. Cargo is Specific tier.
878        let output = "running 5 tests\ntest foo ... ok\ntest bar ... FAILED\n\nfailures:\n\ntest result: FAILED. 4 passed; 1 failed\n";
879        let compressed = dispatch("cargo test", output);
880        // Cargo's test compressor preserves PASS/FAIL semantics.
881        assert!(compressed.contains("failed") || compressed.contains("FAILED"));
882    }
883
884    #[test]
885    fn git_status_still_routes_to_git() {
886        // Regression: git is Specific tier.
887        let output =
888            "On branch main\nYour branch is up to date.\n\nnothing to commit, working tree clean\n";
889        let compressed = dispatch("git status", output);
890        assert!(compressed.contains("branch") || compressed.contains("clean"));
891    }
892
893    #[test]
894    fn pnpm_install_still_routes_to_pnpm() {
895        // Regression: pnpm install was handled before this change.
896        let output = "Progress: resolved 100, downloaded 50, added 50\nAdded 50 packages\n";
897        let compressed = dispatch("pnpm install", output);
898        // PnpmCompressor's compress_package keeps "+ pkg" or "Added X packages" type lines.
899        assert!(compressed.contains("Added") || compressed.contains("Progress"));
900    }
901}
902
903#[cfg(test)]
904mod normalize_command_tests {
905    use super::*;
906
907    #[test]
908    fn passes_bare_commands_unchanged() {
909        assert_eq!(normalize_command_for_dispatch("bun test"), None);
910        assert_eq!(normalize_command_for_dispatch("cargo build"), None);
911        assert_eq!(normalize_command_for_dispatch("git status"), None);
912    }
913
914    #[test]
915    fn strips_cd_and_amp_prefix() {
916        assert_eq!(
917            normalize_command_for_dispatch("cd /repo && bun test").as_deref(),
918            Some("bun test")
919        );
920        assert_eq!(
921            normalize_command_for_dispatch("cd /repo/packages/aft && cargo test --release")
922                .as_deref(),
923            Some("cargo test --release")
924        );
925    }
926
927    #[test]
928    fn strips_cd_and_semicolon_prefix() {
929        assert_eq!(
930            normalize_command_for_dispatch("cd /repo; bun test").as_deref(),
931            Some("bun test")
932        );
933    }
934
935    #[test]
936    fn strips_cd_with_quoted_path() {
937        assert_eq!(
938            normalize_command_for_dispatch("cd \"/path with space\" && npm install").as_deref(),
939            Some("npm install")
940        );
941    }
942
943    #[test]
944    fn strips_env_assignments() {
945        assert_eq!(
946            normalize_command_for_dispatch("env FOO=bar npm install").as_deref(),
947            Some("npm install")
948        );
949        assert_eq!(
950            normalize_command_for_dispatch("env FOO=bar BAZ=qux RUST_LOG=info cargo test")
951                .as_deref(),
952            Some("cargo test")
953        );
954    }
955
956    #[test]
957    fn strips_bare_assignment_prefixes() {
958        assert_eq!(
959            normalize_command_for_dispatch("NODE_ENV=production npm install").as_deref(),
960            Some("npm install")
961        );
962        assert_eq!(
963            normalize_command_for_dispatch("FOO=1 BAR=2 cargo test").as_deref(),
964            Some("cargo test")
965        );
966        assert_eq!(
967            normalize_command_for_dispatch("RUSTFLAGS='-C debug' cargo build").as_deref(),
968            Some("cargo build")
969        );
970    }
971
972    #[test]
973    fn does_not_strip_later_assignment_arguments() {
974        assert_eq!(normalize_command_for_dispatch("npm install foo=bar"), None);
975    }
976
977    #[test]
978    fn env_without_assignments_returns_none() {
979        // `env` alone is the env-listing command, not a prefix.
980        assert_eq!(
981            normalize_command_for_dispatch("env npm install").as_deref(),
982            None
983        );
984    }
985
986    #[test]
987    fn strips_timeout_prefix() {
988        assert_eq!(
989            normalize_command_for_dispatch("timeout 30 cargo test").as_deref(),
990            Some("cargo test")
991        );
992        assert_eq!(
993            normalize_command_for_dispatch("timeout 5m bun test").as_deref(),
994            Some("bun test")
995        );
996    }
997
998    #[test]
999    fn strips_nohup_prefix() {
1000        assert_eq!(
1001            normalize_command_for_dispatch("nohup ./long-running-script.sh").as_deref(),
1002            Some("./long-running-script.sh")
1003        );
1004    }
1005
1006    #[test]
1007    fn strips_paren_then_cd_and_amp() {
1008        assert_eq!(
1009            normalize_command_for_dispatch("(cd /repo && bun test").as_deref(),
1010            Some("bun test")
1011        );
1012    }
1013
1014    #[test]
1015    fn chains_multiple_prefixes() {
1016        // env then timeout then real command.
1017        assert_eq!(
1018            normalize_command_for_dispatch("env FOO=bar timeout 30 cargo test").as_deref(),
1019            Some("cargo test")
1020        );
1021        // cd then env then real command.
1022        assert_eq!(
1023            normalize_command_for_dispatch("cd /repo && env FOO=bar npm install").as_deref(),
1024            Some("npm install")
1025        );
1026    }
1027
1028    // -------- end-to-end dispatch via normalize() --------
1029
1030    fn empty_registry() -> FilterRegistry {
1031        FilterRegistry::default()
1032    }
1033
1034    #[test]
1035    fn cd_prefix_bun_test_still_routes_to_bun_test() {
1036        let output = "bun test v1.3.14\n\nsrc/a.test.ts:\n(pass) ok [0.1ms]\n\n 1 pass\n 0 fail\n 1 expect() calls\nRan 1 tests across 1 files. [1.00ms]\n";
1037        let compressed = compress_with_registry("cd /repo && bun test", output, &empty_registry());
1038        // The bun test compressor produces (pass) / "1 pass" / "Ran ..." in
1039        // the pass-only path. Generic middle-truncate would drop these and
1040        // keep the original. Asserting their presence proves the normalizer
1041        // succeeded.
1042        assert!(compressed.contains("(pass)") || compressed.contains("1 pass"));
1043    }
1044
1045    #[test]
1046    fn cd_prefix_cargo_test_still_routes_to_cargo() {
1047        let output = "running 5 tests\ntest foo ... ok\ntest bar ... FAILED\n\nfailures:\n\ntest result: FAILED. 4 passed; 1 failed\n";
1048        let compressed =
1049            compress_with_registry("cd /repo && cargo test", output, &empty_registry());
1050        assert!(compressed.contains("FAILED") || compressed.contains("failed"));
1051    }
1052
1053    #[test]
1054    fn env_prefix_npm_install_still_routes_to_npm() {
1055        let output = "added 50 packages, and audited 100 packages in 3s\n";
1056        let compressed = compress_with_registry(
1057            "env NODE_ENV=production npm install",
1058            output,
1059            &empty_registry(),
1060        );
1061        // NpmCompressor's install path keeps "added N packages" / "audited" markers.
1062        assert!(compressed.contains("added") || compressed.contains("audited"));
1063    }
1064
1065    #[test]
1066    fn bare_assignment_prefix_npm_install_routes_to_npm() {
1067        let output = "npm http fetch GET 200 https://registry.npmjs.org/foo 123ms\nnpm WARN deprecated old-pkg@1.0.0: use new-pkg instead\n\nadded 42 packages in 2s\n\naudited 100 packages in 2s\n\nfound 0 vulnerabilities\n";
1068        let compressed =
1069            compress_with_registry("NODE_ENV=production npm install", output, &empty_registry());
1070        assert!(!compressed.contains("npm http fetch"));
1071        assert!(compressed.contains("audited 100 packages"));
1072    }
1073
1074    #[test]
1075    fn bare_assignment_prefix_cargo_test_routes_to_cargo() {
1076        let output = "running 1 test\ntest foo ... ok\n\ntest result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out\n";
1077        let compressed =
1078            compress_with_registry("FOO=1 BAR=2 cargo test", output, &empty_registry());
1079        assert!(compressed.contains("running 1 test"));
1080        assert!(compressed.contains("test result: ok"));
1081        assert!(!compressed.contains("test foo ... ok"));
1082    }
1083
1084    #[test]
1085    fn quoted_assignment_prefix_cargo_build_routes_to_cargo() {
1086        let output = "   Compiling foo v0.1.0\nwarning: unused variable: `x`\n --> src/lib.rs:1:9\n  |\n1 |     let x = 1;\n  |         ^ help: if this is intentional, prefix it with an underscore: `_x`\n\n    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.12s\n";
1087        let compressed = compress_with_registry(
1088            "RUSTFLAGS='-C debug' cargo build",
1089            output,
1090            &empty_registry(),
1091        );
1092        assert!(!compressed.contains("Compiling foo"));
1093        assert!(compressed.contains("warning: unused variable"));
1094        assert!(compressed.contains("Finished `dev` profile"));
1095    }
1096
1097    #[test]
1098    fn timeout_prefix_cargo_build_still_routes_to_cargo() {
1099        let output =
1100            "   Compiling foo v0.1.0\n    Finished `dev` profile [unoptimized] target(s) in 5s\n";
1101        let compressed =
1102            compress_with_registry("timeout 30 cargo build", output, &empty_registry());
1103        // CargoCompressor for build/check/run preserves the structure.
1104        assert!(compressed.contains("Compiling") || compressed.contains("Finished"));
1105    }
1106}