aft/compress/
mod.rs

1//! Output compression for hoisted bash.
2//!
3//! Compression has five tiers, tried in this order:
4//!
5//! 1. **Specific Rust [`Compressor`] modules** — hand-written parsers for
6//!    specific tools identified by tool tokens (for example `vitest`, `eslint`,
7//!    `cargo`, `git`). These win before broad package-manager compressors.
8//! 2. **Output-shape [`Compressor`] sniffers** — inner-tool parsers that can
9//!    recognize their own private summaries even when invoked through wrappers
10//!    such as `npm test`, `make test`, or `./scripts/check.sh`.
11//! 3. **Package-manager [`Compressor`] modules** — broad head-token matchers
12//!    (`npm`, `pnpm`, `bun`) that compress unclaimed package-manager output.
13//! 4. **TOML filters** — declarative strip + truncate + cap + shortcircuit
14//!    rules for the long tail of CLI tools. Loaded from builtin / user /
15//!    project sources via [`toml_filter::build_registry`]. See
16//!    [`toml_filter`] and [`trust`] for the trust model.
17//! 5. **[`generic`] fallback** — ANSI strip + consecutive-dedup. The
18//!    background bash registry owns the shared final output cap.
19
20pub mod biome;
21pub mod builtin_filters;
22pub mod bun;
23pub mod caps;
24pub mod cargo;
25pub mod eslint;
26pub mod find;
27pub mod generic;
28pub mod git;
29pub mod go;
30pub mod listing_fold;
31pub mod ls;
32pub mod mypy;
33pub mod next;
34pub mod npm;
35pub mod playwright;
36pub mod pnpm;
37pub mod prettier;
38pub mod pytest;
39pub mod ruff;
40pub mod toml_filter;
41pub mod tree;
42pub mod trust;
43pub mod tsc;
44pub mod vitest;
45
46use crate::context::AppContext;
47use crate::harness::Harness;
48use biome::BiomeCompressor;
49use bun::BunCompressor;
50use caps::DropClass;
51use cargo::CargoCompressor;
52use eslint::EslintCompressor;
53use find::FindCompressor;
54use generic::{strip_ansi, GenericCompressor};
55use git::GitCompressor;
56use go::{GoCompressor, GolangciLintCompressor};
57use ls::LsCompressor;
58use mypy::MypyCompressor;
59use next::NextCompressor;
60use npm::NpmCompressor;
61use playwright::PlaywrightCompressor;
62use pnpm::PnpmCompressor;
63use prettier::PrettierCompressor;
64use pytest::PytestCompressor;
65use ruff::RuffCompressor;
66use std::collections::{BTreeMap, BTreeSet};
67use std::fs;
68use std::path::{Path, PathBuf};
69use std::sync::{Arc, RwLock};
70use toml_filter::{apply_filter_with_exit_code, FilterRegistry};
71use tree::TreeCompressor;
72use tsc::TscCompressor;
73use vitest::VitestCompressor;
74
75/// Thread-safe handle to the TOML filter registry. Shared between
76/// `AppContext::filter_registry()` (for direct use in command handlers) and
77/// `BgTaskRegistry`'s output compression closure (for use from the watchdog
78/// thread).
79pub type SharedFilterRegistry = Arc<RwLock<FilterRegistry>>;
80
81/// How specifically a compressor identifies a command.
82///
83/// `Specific` matchers (vitest, eslint, biome, tsc, pytest, cargo, git)
84/// claim a command by recognising a SPECIFIC tool name as a token anywhere
85/// in the command line — `npx vitest`, `pnpm exec eslint --fix`,
86/// `bun run vitest`, etc.
87///
88/// `PackageManager` matchers (npm, pnpm, bun) claim a command by its
89/// HEAD token alone (e.g. `npm`, `bun`) regardless of what subcommand
90/// follows. They are intentionally broad — when a `bun run vitest` is
91/// not claimed by VitestCompressor, BunCompressor still wants the chance
92/// to compress generic bun output for unknown subcommands.
93///
94/// Dispatch order: Specific command tier first, then output-shape sniffers
95/// (Specific before PackageManager), then PackageManager command tier, then
96/// TOML filters, then GenericCompressor.
97#[derive(Clone, Copy, Debug, PartialEq, Eq)]
98pub enum Specificity {
99    Specific,
100    PackageManager,
101}
102
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct CompressionResult {
105    pub text: String,
106    pub dropped_by_class: BTreeMap<DropClass, usize>,
107    pub had_inner_drop: bool,
108    pub offset_hint_eligible: bool,
109    pub offset_start_line: Option<usize>,
110}
111
112impl CompressionResult {
113    pub fn new(text: impl Into<String>) -> Self {
114        Self {
115            text: text.into(),
116            dropped_by_class: BTreeMap::new(),
117            had_inner_drop: false,
118            offset_hint_eligible: true,
119            offset_start_line: None,
120        }
121    }
122
123    pub fn with_class_drops(
124        text: impl Into<String>,
125        dropped_by_class: BTreeMap<DropClass, usize>,
126    ) -> Self {
127        let had_inner_drop = !dropped_by_class.is_empty();
128        Self {
129            text: text.into(),
130            dropped_by_class,
131            had_inner_drop,
132            offset_hint_eligible: !had_inner_drop,
133            offset_start_line: None,
134        }
135    }
136
137    pub fn with_inner_drop(text: impl Into<String>, offset_hint_eligible: bool) -> Self {
138        Self {
139            text: text.into(),
140            dropped_by_class: BTreeMap::new(),
141            had_inner_drop: true,
142            offset_hint_eligible,
143            offset_start_line: None,
144        }
145    }
146
147    pub fn with_prefix_drop(text: impl Into<String>, offset_start_line: usize) -> Self {
148        Self {
149            text: text.into(),
150            dropped_by_class: BTreeMap::new(),
151            had_inner_drop: true,
152            offset_hint_eligible: true,
153            offset_start_line: Some(offset_start_line),
154        }
155    }
156
157    pub fn has_semantic_drops(&self) -> bool {
158        !self.dropped_by_class.is_empty()
159    }
160
161    pub fn has_any_drop(&self) -> bool {
162        self.had_inner_drop || self.has_semantic_drops()
163    }
164
165    pub fn map_text<F>(mut self, f: F) -> Self
166    where
167        F: FnOnce(&str) -> String,
168    {
169        self.text = f(&self.text);
170        self
171    }
172}
173
174impl std::fmt::Display for CompressionResult {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        f.write_str(&self.text)
177    }
178}
179
180impl std::ops::Deref for CompressionResult {
181    type Target = str;
182
183    fn deref(&self) -> &Self::Target {
184        &self.text
185    }
186}
187
188impl PartialEq<&str> for CompressionResult {
189    fn eq(&self, other: &&str) -> bool {
190        self.text == *other
191    }
192}
193
194impl PartialEq<String> for CompressionResult {
195    fn eq(&self, other: &String) -> bool {
196        self.text == *other
197    }
198}
199
200impl From<String> for CompressionResult {
201    fn from(text: String) -> Self {
202        Self::new(text)
203    }
204}
205
206impl From<&str> for CompressionResult {
207    fn from(text: &str) -> Self {
208        Self::new(text)
209    }
210}
211
212/// A `Compressor` knows how to reduce one specific command's output to fewer
213/// tokens while preserving the information the agent needs.
214pub trait Compressor: Send + Sync {
215    /// Returns true if this compressor handles the given command head + args.
216    /// Called after generic detection (ANSI strip, dedup) so this is per-command logic only.
217    fn matches(&self, command: &str) -> bool;
218
219    /// Compress the output when the process exit code is unknown.
220    fn compress(&self, command: &str, output: &str) -> CompressionResult {
221        self.compress_with_exit_code(command, output, None)
222    }
223
224    /// Compress the output. Original is left untouched if compression fails.
225    fn compress_with_exit_code(
226        &self,
227        command: &str,
228        output: &str,
229        exit_code: Option<i32>,
230    ) -> CompressionResult;
231
232    fn specificity(&self) -> Specificity {
233        Specificity::Specific
234    }
235
236    /// Returns true when this compressor recognizes output produced by its
237    /// inner tool even if the command head was a wrapper (`npm test`,
238    /// `make test`, `./scripts/check.sh`, etc.). Wrapper compressors should
239    /// not override this; they remain command-only.
240    fn matches_output(&self, _output: &str) -> bool {
241        false
242    }
243
244    /// Compress output after an output-shape match when the process exit code is unknown.
245    fn compress_output_match(&self, output: &str) -> CompressionResult {
246        self.compress_output_match_with_exit_code(output, None)
247    }
248
249    /// Compress output after an output-shape match. Compressors that branch by
250    /// subcommand override this to jump directly to the matched branch.
251    fn compress_output_match_with_exit_code(
252        &self,
253        output: &str,
254        exit_code: Option<i32>,
255    ) -> CompressionResult {
256        self.compress_with_exit_code("", output, exit_code)
257    }
258}
259/// Top-level dispatch: try specific Rust modules, output-shape sniffers, package-manager modules, TOML filters, then generic fallback.
260///
261/// Convenience wrapper for command handlers that already hold an `AppContext`.
262/// Backs onto [`compress_with_registry`] which is thread-safe for use from the
263/// `BgTaskRegistry` watchdog.
264pub fn compress(command: &str, output: String, ctx: &AppContext) -> CompressionResult {
265    compress_with_exit_code(command, output, None, ctx)
266}
267
268pub fn compress_with_exit_code(
269    command: &str,
270    output: String,
271    exit_code: Option<i32>,
272    ctx: &AppContext,
273) -> CompressionResult {
274    if !ctx.config().experimental_bash_compress {
275        return CompressionResult::new(output);
276    }
277    let registry_handle = ctx.shared_filter_registry();
278    let guard = match registry_handle.read() {
279        Ok(g) => g,
280        Err(poisoned) => poisoned.into_inner(),
281    };
282    compress_with_registry_exit_code(command, &output, exit_code, &guard)
283}
284
285/// Thread-safe dispatch that does not need `AppContext`. Caller is responsible
286/// for the `experimental_bash_compress` gate (the registry has no opinion).
287///
288/// Used from background threads (notably the `BgTaskRegistry` watchdog and
289/// completion-frame emitter) where lock-free access is required.
290pub fn compress_with_registry(
291    command: &str,
292    output: &str,
293    registry: &FilterRegistry,
294) -> CompressionResult {
295    compress_with_registry_exit_code(command, output, None, registry)
296}
297
298pub fn compress_with_registry_exit_code(
299    command: &str,
300    output: &str,
301    exit_code: Option<i32>,
302    registry: &FilterRegistry,
303) -> CompressionResult {
304    let stripped_for_generic = strip_ansi(output);
305
306    // Resolve what to dispatch on: peel shell-prefix idioms (`cd /path && bun
307    // test`, `env FOO=bar npm install`, `timeout 30 cargo build`, `(cd /path;
308    // cmd)`) so head-token matchers see the real command. Any top-level pipe
309    // must stay generic: the shell already ran the user's pipeline verbatim, so
310    // the captured output belongs to the pipeline result, not necessarily to the
311    // runner that appeared before `|`.
312    let dispatch_owned = match resolve_dispatch_target(command) {
313        DispatchTarget::Pipeline(_) | DispatchTarget::ForceGeneric => {
314            return GenericCompressor.compress_with_exit_code(
315                command,
316                &stripped_for_generic,
317                exit_code,
318            );
319        }
320        DispatchTarget::Command(cmd) => cmd,
321    };
322    let dispatch_cmd = dispatch_owned.as_str();
323
324    let compressors: [&dyn Compressor; 20] = [
325        &GitCompressor,
326        &CargoCompressor,
327        &TscCompressor,
328        &NpmCompressor,
329        &BunCompressor,
330        &PnpmCompressor,
331        &PytestCompressor,
332        &EslintCompressor,
333        &VitestCompressor,
334        &BiomeCompressor,
335        &PrettierCompressor,
336        &RuffCompressor,
337        &MypyCompressor,
338        &GoCompressor,
339        &GolangciLintCompressor,
340        &PlaywrightCompressor,
341        &NextCompressor,
342        &LsCompressor,
343        &FindCompressor,
344        &TreeCompressor,
345    ];
346
347    // Tier 1a: Specific command compressors win first.
348    for compressor in compressors
349        .iter()
350        .filter(|c| c.specificity() == Specificity::Specific)
351    {
352        if compressor.matches(dispatch_cmd) {
353            let result =
354                compressor.compress_with_exit_code(dispatch_cmd, &stripped_for_generic, exit_code);
355            return failure_preserving_result(command, &stripped_for_generic, result, exit_code);
356        }
357    }
358
359    // Tier 1b: Output-shape sniffers handle wrapped inner tools before broad
360    // package managers or TOML filters can consume `npm test`, `make test`,
361    // `just test`, etc. Collision order is deterministic: Specific compressors
362    // in registry order win before PackageManager sniffers (currently Bun's
363    // test-output signature).
364    for specificity in [Specificity::Specific, Specificity::PackageManager] {
365        for compressor in compressors
366            .iter()
367            .filter(|c| c.specificity() == specificity)
368        {
369            if compressor.matches_output(&stripped_for_generic) {
370                let result = compressor
371                    .compress_output_match_with_exit_code(&stripped_for_generic, exit_code);
372                return failure_preserving_result(
373                    command,
374                    &stripped_for_generic,
375                    result,
376                    exit_code,
377                );
378            }
379        }
380    }
381
382    // Tier 1c: PackageManager compressors get unclaimed commands.
383    for compressor in compressors
384        .iter()
385        .filter(|c| c.specificity() == Specificity::PackageManager)
386    {
387        if compressor.matches(dispatch_cmd) {
388            let result =
389                compressor.compress_with_exit_code(dispatch_cmd, &stripped_for_generic, exit_code);
390            return failure_preserving_result(command, &stripped_for_generic, result, exit_code);
391        }
392    }
393
394    // Tier 2: TOML filters. Pass raw output so `[ansi].strip = false` filters
395    // can intentionally match escape sequences; `apply_filter` owns ANSI policy.
396    if let Some(filter) = registry.lookup(dispatch_cmd) {
397        let result = apply_filter_with_exit_code(filter, output, exit_code);
398        return failure_preserving_result(command, &stripped_for_generic, result, exit_code);
399    }
400
401    // Tier 3: generic fallback.
402    GenericCompressor.compress_with_exit_code(command, &stripped_for_generic, exit_code)
403}
404
405fn failure_preserving_result(
406    command: &str,
407    stripped_raw_output: &str,
408    result: CompressionResult,
409    exit_code: Option<i32>,
410) -> CompressionResult {
411    if !matches!(exit_code, Some(code) if code != 0) {
412        return result;
413    }
414
415    if dropped_failure_or_error_blocks(&result)
416        || !text_has_failure_signal(&result.text)
417        || result_looks_successful(&result.text)
418    {
419        return GenericCompressor.compress_with_exit_code(command, stripped_raw_output, exit_code);
420    }
421
422    let missing = missing_raw_failure_signal_lines(stripped_raw_output, &result.text);
423    if missing.is_empty() {
424        result
425    } else {
426        append_missing_failure_lines(result, &missing)
427    }
428}
429
430fn dropped_failure_or_error_blocks(result: &CompressionResult) -> bool {
431    [DropClass::Error, DropClass::Failure]
432        .into_iter()
433        .any(|class| result.dropped_by_class.get(&class).copied().unwrap_or(0) > 0)
434}
435
436fn append_missing_failure_lines(
437    mut result: CompressionResult,
438    missing_failure_lines: &[String],
439) -> CompressionResult {
440    let mut text = result.text.trim_end().to_string();
441    if !text.is_empty() {
442        text.push('\n');
443    }
444    text.push_str("[raw failure lines preserved by AFT]\n");
445    text.push_str(&missing_failure_lines.join("\n"));
446    result.text = text;
447    result
448}
449
450pub(crate) fn missing_raw_failure_signal_lines(
451    raw_output: &str,
452    compressed_text: &str,
453) -> Vec<String> {
454    let compressed_lines: BTreeSet<String> = compressed_text
455        .lines()
456        .map(str::trim)
457        .filter(|line| !line.is_empty())
458        .map(ToString::to_string)
459        .collect();
460    let mut seen = BTreeSet::new();
461    let mut missing = Vec::new();
462
463    for line in raw_output.lines() {
464        let trimmed = line.trim();
465        if trimmed.is_empty() || !line_has_failure_signal(trimmed) {
466            continue;
467        }
468        if compressed_lines.contains(trimmed) || !seen.insert(trimmed.to_string()) {
469            continue;
470        }
471        missing.push(trimmed.to_string());
472    }
473
474    missing
475}
476
477fn result_looks_successful(text: &str) -> bool {
478    let lower = text.to_ascii_lowercase();
479    lower.contains("clean")
480        || lower.contains(" ok")
481        || lower.contains(":ok")
482        || lower.contains(": ok")
483        || lower.contains("passed")
484        || lower.contains("succeeded")
485        || lower.contains("no errors")
486        || lower.contains("0 errors")
487        || lower.contains("no issues")
488        || lower.contains("no diagnostics")
489        || lower.contains("all checks passed")
490        || lower.contains("formatted")
491        || lower.contains("0 fail")
492        || lower.contains("found 0")
493        || lower.contains("up to date")
494        || lower.contains("up-to-date")
495}
496
497pub(crate) fn text_has_failure_signal(text: &str) -> bool {
498    text.lines()
499        .any(|line| line_has_failure_signal(line.trim()))
500}
501
502fn line_has_failure_signal(line: &str) -> bool {
503    let lower = line.to_ascii_lowercase();
504    line.contains("error[")
505        || lower.contains("error:")
506        || line.contains("Error")
507        || line.contains("ERROR")
508        || lower.contains("internalerror")
509        || lower.contains("traceback")
510        || lower.contains("exception")
511        || lower.contains("no module named")
512        || lower.contains("undefined reference")
513        || lower.contains("linker command failed")
514        || lower.contains("undefined:")
515        || lower.contains("expected declaration")
516        || lower.contains("collect2: error")
517        || lower.contains("ld: error")
518        || lower.contains("fatal error")
519        || line.contains("FAILED")
520        || line.contains("FAIL")
521        || contains_nonzero_failure_word(line, "fail")
522        || contains_nonzero_failure_word(line, "failed")
523        || contains_nonzero_failure_word(line, "failure")
524        || contains_nonzero_failure_word(line, "failures")
525        || lower.contains("panic")
526        || lower.contains("cannot find")
527        || lower.contains("not found")
528        || lower.contains("no such")
529}
530
531fn contains_nonzero_failure_word(line: &str, word: &str) -> bool {
532    let lower = line.to_ascii_lowercase();
533    for (index, _) in lower.match_indices(word) {
534        let end = index + word.len();
535        let before_is_word = lower[..index].chars().next_back().is_some_and(is_word_char);
536        let after_is_word = lower[end..].chars().next().is_some_and(is_word_char);
537        if before_is_word || after_is_word {
538            continue;
539        }
540
541        let prefix = lower[..index].trim_end();
542        let digits_start = prefix
543            .char_indices()
544            .rev()
545            .take_while(|(_, ch)| ch.is_ascii_digit())
546            .last()
547            .map(|(idx, _)| idx);
548        let Some(digits_start) = digits_start else {
549            return true;
550        };
551        let digits = &prefix[digits_start..];
552        if digits.parse::<usize>().ok() != Some(0) {
553            return true;
554        }
555    }
556    false
557}
558
559fn is_word_char(ch: char) -> bool {
560    ch.is_ascii_alphanumeric() || ch == '_'
561}
562
563/// Build the registry of TOML filters from the standard sources for the
564/// active context. Called lazily by [`AppContext::filter_registry`].
565///
566/// Layering (highest priority first):
567/// 1. Project filters at `<project_root>/.cortexkit/aft/filters/*.toml` — loaded only
568///    when the project is in the trusted set (see [`trust`]).
569/// 2. User filters at `<storage_dir>/<harness>/filters/*.toml`.
570/// 3. Builtin filters compiled into the binary via [`builtin_filters`].
571pub fn build_registry_for_context(ctx: &AppContext) -> FilterRegistry {
572    let harness = ctx.harness.lock().clone().unwrap_or(Harness::Opencode);
573    let config = ctx.config();
574    let storage_dir = config.storage_dir.clone();
575    let project_root = config.project_root.clone();
576    drop(config);
577
578    let user_dir = storage_dir.as_ref().map(|dir| {
579        repair_legacy_user_filter_dir(dir, harness.clone());
580        user_filter_dir(dir, harness)
581    });
582    let project_dir = match (project_root.as_ref(), storage_dir.as_ref()) {
583        (Some(root), Some(storage)) => {
584            if trust::is_project_trusted(Some(storage), root) {
585                Some(project_filter_dir(root))
586            } else {
587                None
588            }
589        }
590        _ => None,
591    };
592
593    toml_filter::build_registry(
594        builtin_filters::ALL,
595        user_dir.as_deref(),
596        project_dir.as_deref(),
597    )
598}
599
600/// Normalize a shell command for compressor dispatch by walking past
601/// common shell-prefix idioms so the REAL command head is what matchers
602/// see. Returns `Some(normalized)` if a prefix was stripped, `None` if
603/// the input was already a bare command.
604///
605/// Handles:
606///   - `cd /path && cmd ...`            → `cmd ...`
607///   - `cd /path; cmd ...`              → `cmd ...`
608///   - `env FOO=bar [BAR=baz ...] cmd`  → `cmd ...`
609///   - `FOO=bar [BAR=baz ...] cmd`      → `cmd ...`
610///   - `timeout 30 cmd ...`             → `cmd ...`
611///   - `nohup cmd ...`                  → `cmd ...`
612///   - `(cd /path && cmd ...)`          → `cmd ...`   (trailing `)` is kept; harmless for matchers)
613///
614/// Real agent invocations almost always wrap their actual command in
615/// `cd "$ROOT" && ...`. Without this normalization, BunCompressor /
616/// NpmCompressor / PnpmCompressor (head-token matchers) and the
617/// pkg-manager filters silently fall through to GenericCompressor for
618/// the majority of agent bash calls.
619///
620/// The normalizer is conservative: it only strips well-defined idioms
621/// and bails on anything ambiguous, so a malformed command degrades to
622/// the same dispatch behaviour as before this helper existed.
623pub fn normalize_command_for_dispatch(command: &str) -> Option<String> {
624    match resolve_dispatch_target(command) {
625        // Ambiguous or unsafe pipelines must not be claimed by specific
626        // compressors, so return None to make callers use generic dispatch.
627        DispatchTarget::ForceGeneric => None,
628        DispatchTarget::Command(resolved) | DispatchTarget::Pipeline(resolved) => {
629            if resolved == command.trim_start() {
630                None
631            } else {
632                Some(resolved)
633            }
634        }
635    }
636}
637
638/// Normalize commands for structured-output detection, where a top-level pipe
639/// must suppress structured handling instead of falling back to the raw command.
640pub(crate) fn plain_command_for_structured_output(command: &str) -> Option<String> {
641    match resolve_dispatch_target(command) {
642        DispatchTarget::Command(resolved) => Some(resolved),
643        DispatchTarget::Pipeline(_) | DispatchTarget::ForceGeneric => None,
644    }
645}
646
647/// What compressor dispatch should target for a command, after peeling shell
648/// prefixes and resolving any top-level pipeline.
649enum DispatchTarget {
650    /// Match compressors against this command string (peeled, and/or the last
651    /// pipeline stage whose stdout was captured).
652    Command(String),
653    /// A clean top-level pipeline was found. The contained string is the last
654    /// stage for callers that only need a normalized command, but compression
655    /// dispatch treats the original command as generic raw pipeline output.
656    Pipeline(String),
657    /// An unsafe pipeline was detected (a `|` is present but the command could
658    /// not be parsed safely). Skip all specific compressors and use generic —
659    /// a head-token compressor claiming `cargo test | …` would drop the output.
660    ForceGeneric,
661}
662
663fn resolve_dispatch_target(command: &str) -> DispatchTarget {
664    // Strip top-level comments FIRST. A `#` comment's text otherwise reaches the
665    // head-token matchers, which scan the whole string for their tool name — so
666    // `printf keep # cargo test` would let CargoCompressor claim the printf
667    // command's output and drop it (issue #137), with or without a pipe.
668    let decommented = strip_top_level_comment(command);
669    let peeled = peel_shell_prefixes(&decommented);
670    let base = peeled
671        .as_deref()
672        .unwrap_or_else(|| decommented.trim_start());
673    match split_top_level_pipe(base) {
674        PipeSplit::LastStage(last) => DispatchTarget::Pipeline(last),
675        PipeSplit::Unsafe => DispatchTarget::ForceGeneric,
676        PipeSplit::None => DispatchTarget::Command(base.to_string()),
677    }
678}
679
680/// Remove top-level shell comments (`#` to end of line) from a command so the
681/// comment text can't fool head-token compressor matchers (which scan the whole
682/// command string for their tool name). Quote/backtick/substitution aware: a `#`
683/// inside quotes, inside `$(`/`` ` ``, or not at a word boundary is literal.
684/// Copies byte ranges (UTF-8 safe — every decision point is an ASCII byte) and
685/// preserves newlines so any later top-level structure stays visible to the
686/// pipeline scanner.
687fn strip_top_level_comment(command: &str) -> String {
688    let bytes = command.as_bytes();
689    let mut result = String::with_capacity(command.len());
690    let mut seg_start = 0usize;
691    let mut in_single = false;
692    let mut in_double = false;
693    let mut in_backtick = false;
694    let mut paren_depth: u32 = 0;
695    let mut escaped = false;
696    let mut prev = b' '; // start-of-string counts as a word boundary
697
698    let mut i = 0;
699    while i < bytes.len() {
700        let ch = bytes[i];
701        if escaped {
702            escaped = false;
703            prev = ch;
704            i += 1;
705            continue;
706        }
707        if in_single {
708            if ch == b'\'' {
709                in_single = false;
710            }
711            prev = ch;
712            i += 1;
713            continue;
714        }
715        if in_backtick {
716            if ch == b'\\' {
717                escaped = true;
718            } else if ch == b'`' {
719                in_backtick = false;
720            }
721            prev = ch;
722            i += 1;
723            continue;
724        }
725        if ch == b'\\' {
726            escaped = true;
727            prev = ch;
728            i += 1;
729            continue;
730        }
731        if ch == b'`' {
732            in_backtick = true;
733            prev = ch;
734            i += 1;
735            continue;
736        }
737        if ch == b'$' && bytes.get(i + 1) == Some(&b'(') {
738            paren_depth += 1;
739            prev = b'(';
740            i += 2;
741            continue;
742        }
743        if in_double {
744            if ch == b'"' {
745                in_double = false;
746            }
747            prev = ch;
748            i += 1;
749            continue;
750        }
751        if ch == b'#'
752            && paren_depth == 0
753            && matches!(prev, b' ' | b'\t' | b'\n' | b';' | b'&' | b'|' | b'(')
754        {
755            result.push_str(&command[seg_start..i]);
756            while i < bytes.len() && bytes[i] != b'\n' {
757                i += 1;
758            }
759            seg_start = i; // resume at the newline (kept) or EOL
760            prev = b'\n';
761            continue;
762        }
763        match ch {
764            b'\'' => in_single = true,
765            b'"' => in_double = true,
766            b'<' | b'>' if bytes.get(i + 1) == Some(&b'(') => {
767                paren_depth += 1;
768                prev = b'(';
769                i += 2;
770                continue;
771            }
772            b'(' => paren_depth += 1,
773            b')' => paren_depth = paren_depth.saturating_sub(1),
774            _ => {}
775        }
776        prev = ch;
777        i += 1;
778    }
779    result.push_str(&command[seg_start..]);
780    result
781}
782
783/// Peel known shell-prefix idioms (`cd … &&`, `env VAR=v`, `VAR=v`, `timeout N`,
784/// `nohup`, leading `(`) so the REAL command head is exposed to matchers.
785/// Returns `Some(peeled)` when something was stripped, `None` otherwise.
786fn peel_shell_prefixes(command: &str) -> Option<String> {
787    let trimmed = command.trim_start();
788    if trimmed.is_empty() {
789        return None;
790    }
791
792    // Step 1: peel a leading `(` from group-expression idioms.
793    let (open_paren, after_paren) = if let Some(rest) = trimmed.strip_prefix('(') {
794        (true, rest.trim_start())
795    } else {
796        (false, trimmed)
797    };
798
799    let mut current = after_paren.to_string();
800    let mut changed = open_paren;
801
802    // Step 2: iteratively peel known shell prefixes.
803    loop {
804        // `VAR=value cmd ...` (possibly multiple assignment words). This must
805        // run before head-token matching so package-manager/Rust compressors
806        // still see the real command for `NODE_ENV=production npm install`.
807        if let Some(stripped) = strip_leading_assignment_prefix(&current) {
808            current = stripped;
809            changed = true;
810            continue;
811        }
812
813        let head: String = current.split_whitespace().next().unwrap_or("").to_string();
814
815        // `cd <path> && ...` or `cd <path>; ...`
816        if head == "cd" {
817            // Find the next `&&` or `;` token; everything after that is the real command.
818            // Use char-level scan because `&&` is two chars not separated by whitespace.
819            if let Some(stripped) = strip_cd_prefix(&current) {
820                current = stripped;
821                changed = true;
822                continue;
823            }
824        }
825
826        // `env VAR=val [VAR=val ...] cmd ...`
827        if head == "env" {
828            if let Some(stripped) = strip_env_prefix(&current) {
829                current = stripped;
830                changed = true;
831                continue;
832            }
833        }
834
835        // `timeout <N> cmd ...` or `timeout <duration-with-unit> cmd ...`
836        if head == "timeout" {
837            if let Some(stripped) = strip_timeout_prefix(&current) {
838                current = stripped;
839                changed = true;
840                continue;
841            }
842        }
843
844        // `nohup cmd ...`
845        if head == "nohup" {
846            if let Some(rest) = current.strip_prefix("nohup").and_then(|s| {
847                let trimmed = s.trim_start();
848                if trimmed.is_empty() {
849                    None
850                } else {
851                    Some(trimmed.to_string())
852                }
853            }) {
854                current = rest;
855                changed = true;
856                continue;
857            }
858        }
859
860        break;
861    }
862
863    if changed {
864        Some(current)
865    } else {
866        None
867    }
868}
869
870/// Returns true if the token is a shell metacharacter that acts as a
871/// command boundary. Subcommand parsers use this to avoid returning a
872/// redirect/operator token as a subcommand name. Covers control operators
873/// (`|`, `|&`, `;`, `&`, `&&`, `||`), and every redirect shape — bare
874/// (`>`, `>>`, `<`, `<<`, `<<<`, `&>`, `&>>`), fd-prefixed (`2>`, `2>>`,
875/// `2>&1`, `1>&2`), and glued (`>file`, `2>/dev/null`).
876pub fn is_shell_boundary(token: &str) -> bool {
877    matches!(token, "|" | "|&" | ";" | "&" | "&&" | "||" | "&>" | "&>>") || is_redirect_token(token)
878}
879
880/// A redirect operator token: an optional leading fd (`2` in `2>&1`) followed
881/// by a `>`/`<` redirect, or an `&>`/`&>>` merge redirect. Real subcommands
882/// (`test`, `log`, `build`) never match, so this can't suppress a true one.
883fn is_redirect_token(token: &str) -> bool {
884    let rest = token.trim_start_matches(|c: char| c.is_ascii_digit());
885    rest.starts_with('>') || rest.starts_with('<') || rest.starts_with("&>")
886}
887
888/// Outcome of scanning a command for a top-level pipeline.
889#[derive(Debug, PartialEq, Eq)]
890enum PipeSplit {
891    /// No top-level `|` — dispatch on the command as-is.
892    None,
893    /// A top-level pipeline; the captured stdout is this last stage's output.
894    LastStage(String),
895    /// A pipe-like operator is present but the command couldn't be safely
896    /// parsed (unbalanced quotes/parens/backtick). Callers must NOT fall back
897    /// to head-token dispatch — a compressor that claims `cargo test | …`
898    /// would drop the piped output. Force generic instead.
899    Unsafe,
900}
901
902/// Depth-aware pipeline scanner that FAILS CLOSED. Tracks single/double quotes,
903/// backslash escapes, backtick substitution, and `(`/`$(`/`<(`/`>(` nesting so a
904/// `|` inside any of them is not treated as a stage boundary. Splits on a
905/// top-level `|`/`|&` (never `||`) and returns the LAST stage — but ONLY when
906/// the command is a clean single pipeline. The caller captured the WHOLE
907/// command's stdout, so "last stage == captured output" holds only when no other
908/// top-level structure exists; otherwise a head-token compressor could claim the
909/// command and drop output (issue #137). Therefore, whenever a top-level pipe
910/// coexists with ANY of {a top-level separator `;`/`&&`/`||`/bare `&`/newline,
911/// an unbalanced quote/paren/backtick/escape, an unmatched `)`, or an empty
912/// trailing stage}, we return `Unsafe` so the caller forces generic compression.
913/// Top-level comments must already be removed by `strip_top_level_comment`.
914/// Redirects (`>`, `2>&1`, `&>`, …) are NOT separators.
915fn split_top_level_pipe(command: &str) -> PipeSplit {
916    let bytes = command.as_bytes();
917    let mut in_single = false;
918    let mut in_double = false;
919    let mut in_backtick = false;
920    let mut paren_depth: u32 = 0;
921    let mut escaped = false;
922    let mut saw_unmatched_close = false;
923    let mut saw_top_pipe = false;
924    let mut saw_top_separator = false;
925    let mut last_pipe_end: Option<usize> = None;
926
927    let mut i = 0;
928    while i < bytes.len() {
929        let ch = bytes[i];
930
931        if escaped {
932            escaped = false;
933            i += 1;
934            continue;
935        }
936        if in_single {
937            if ch == b'\'' {
938                in_single = false;
939            }
940            i += 1;
941            continue;
942        }
943        if in_backtick {
944            // Backtick substitution is opaque for splitting. A backslash still
945            // escapes the next byte so an escaped backtick doesn't close it.
946            if ch == b'\\' {
947                escaped = true;
948            } else if ch == b'`' {
949                in_backtick = false;
950            }
951            i += 1;
952            continue;
953        }
954        if ch == b'\\' {
955            escaped = true;
956            i += 1;
957            continue;
958        }
959        if ch == b'`' {
960            in_backtick = true;
961            i += 1;
962            continue;
963        }
964        // `$(` opens command substitution even inside double quotes.
965        if ch == b'$' && bytes.get(i + 1) == Some(&b'(') {
966            paren_depth += 1;
967            i += 2;
968            continue;
969        }
970        if in_double {
971            if ch == b'"' {
972                in_double = false;
973            }
974            i += 1;
975            continue;
976        }
977
978        // Below here: outside single/double quotes and backticks. Top-level
979        // comments are already removed by `strip_top_level_comment` before this
980        // scanner runs, so no `#` handling is needed here.
981        let prev_raw = if i > 0 { bytes[i - 1] } else { b' ' };
982
983        match ch {
984            b'\'' => in_single = true,
985            b'"' => in_double = true,
986            // process substitution `<(` / `>(`
987            b'<' | b'>' if bytes.get(i + 1) == Some(&b'(') => {
988                paren_depth += 1;
989                i += 2;
990                continue;
991            }
992            b'(' => paren_depth += 1,
993            b')' => {
994                if paren_depth == 0 {
995                    saw_unmatched_close = true;
996                } else {
997                    paren_depth -= 1;
998                }
999            }
1000            b'|' if paren_depth == 0 => {
1001                if bytes.get(i + 1) == Some(&b'|') {
1002                    saw_top_separator = true; // `||` logical OR
1003                    i += 2;
1004                    continue;
1005                }
1006                saw_top_pipe = true;
1007                if bytes.get(i + 1) == Some(&b'&') {
1008                    last_pipe_end = Some(i + 2); // `|&` (stdout+stderr)
1009                    i += 2;
1010                    continue;
1011                }
1012                last_pipe_end = Some(i + 1);
1013            }
1014            b'&' if paren_depth == 0 => {
1015                if bytes.get(i + 1) == Some(&b'&') {
1016                    saw_top_separator = true; // `&&`
1017                    i += 2;
1018                    continue;
1019                }
1020                // `&>`/`&>>` redirect, or `>&`/`2>&1` fd-dup: NOT a separator.
1021                // A bare `&` is the background control operator.
1022                if bytes.get(i + 1) != Some(&b'>') && prev_raw != b'>' {
1023                    saw_top_separator = true;
1024                }
1025            }
1026            b';' if paren_depth == 0 => saw_top_separator = true,
1027            b'\n' if paren_depth == 0 => saw_top_separator = true,
1028            _ => {}
1029        }
1030        i += 1;
1031    }
1032
1033    let imbalance =
1034        in_single || in_double || in_backtick || escaped || paren_depth != 0 || saw_unmatched_close;
1035
1036    if saw_top_pipe {
1037        // Only a clean single pipeline is safe to last-stage dispatch.
1038        if imbalance || saw_top_separator {
1039            return PipeSplit::Unsafe;
1040        }
1041        match last_pipe_end {
1042            Some(end) => {
1043                let last_stage = command[end..].trim();
1044                if last_stage.is_empty() {
1045                    PipeSplit::Unsafe // trailing empty stage, e.g. `cargo test |`
1046                } else {
1047                    PipeSplit::LastStage(last_stage.to_string())
1048                }
1049            }
1050            None => PipeSplit::Unsafe,
1051        }
1052    } else if imbalance && command.contains('|') {
1053        // No resolvable top-level pipe, but a `|` hides in an unbalanced region.
1054        PipeSplit::Unsafe
1055    } else {
1056        PipeSplit::None
1057    }
1058}
1059
1060fn strip_cd_prefix(command: &str) -> Option<String> {
1061    // Look for `&&` or `;` outside of quotes.
1062    let bytes = command.as_bytes();
1063    let mut in_single = false;
1064    let mut in_double = false;
1065    let mut i = 0;
1066    while i < bytes.len() {
1067        let ch = bytes[i] as char;
1068        if !in_double && ch == '\'' {
1069            in_single = !in_single;
1070        } else if !in_single && ch == '"' {
1071            in_double = !in_double;
1072        } else if !in_single && !in_double {
1073            if ch == '&' && i + 1 < bytes.len() && bytes[i + 1] as char == '&' {
1074                let rest = command[i + 2..].trim_start();
1075                if rest.is_empty() {
1076                    return None;
1077                }
1078                return Some(rest.to_string());
1079            }
1080            if ch == ';' {
1081                let rest = command[i + 1..].trim_start();
1082                if rest.is_empty() {
1083                    return None;
1084                }
1085                return Some(rest.to_string());
1086            }
1087        }
1088        i += 1;
1089    }
1090    None
1091}
1092
1093fn strip_env_prefix(command: &str) -> Option<String> {
1094    // env <ASSIGN>... <cmd> ...
1095    let rest = command.strip_prefix("env")?.trim_start();
1096    strip_leading_assignment_prefix(rest)
1097}
1098
1099fn strip_leading_assignment_prefix(command: &str) -> Option<String> {
1100    let mut index = 0usize;
1101    let mut consumed_assignment = false;
1102
1103    loop {
1104        index = skip_whitespace(command, index);
1105        if index >= command.len() {
1106            break;
1107        }
1108
1109        let word_end = shell_word_end(command, index)?;
1110        if word_end == index {
1111            break;
1112        }
1113
1114        let word = &command[index..word_end];
1115        if !is_env_assignment(word) {
1116            break;
1117        }
1118
1119        consumed_assignment = true;
1120        index = word_end;
1121    }
1122
1123    if !consumed_assignment {
1124        return None;
1125    }
1126
1127    let after = command[index..].trim_start();
1128    if after.is_empty() {
1129        None
1130    } else {
1131        Some(after.to_string())
1132    }
1133}
1134
1135fn skip_whitespace(input: &str, mut index: usize) -> usize {
1136    while index < input.len() {
1137        let Some(ch) = input[index..].chars().next() else {
1138            break;
1139        };
1140        if !ch.is_whitespace() {
1141            break;
1142        }
1143        index += ch.len_utf8();
1144    }
1145    index
1146}
1147
1148fn shell_word_end(command: &str, start: usize) -> Option<usize> {
1149    let mut in_single = false;
1150    let mut in_double = false;
1151    let mut escaped = false;
1152
1153    for (offset, ch) in command[start..].char_indices() {
1154        let index = start + offset;
1155
1156        if escaped {
1157            escaped = false;
1158            continue;
1159        }
1160
1161        if ch == '\\' && !in_single {
1162            escaped = true;
1163            continue;
1164        }
1165
1166        if ch == '\'' && !in_double {
1167            in_single = !in_single;
1168            continue;
1169        }
1170
1171        if ch == '"' && !in_single {
1172            in_double = !in_double;
1173            continue;
1174        }
1175
1176        if !in_single && !in_double && (ch.is_whitespace() || matches!(ch, ';' | '&' | '|')) {
1177            return Some(index);
1178        }
1179    }
1180
1181    if in_single || in_double || escaped {
1182        None
1183    } else {
1184        Some(command.len())
1185    }
1186}
1187
1188fn is_env_assignment(token: &str) -> bool {
1189    if token.starts_with('-') {
1190        return false;
1191    }
1192    let Some((name, _value)) = token.split_once('=') else {
1193        return false;
1194    };
1195    let mut chars = name.chars();
1196    let Some(first) = chars.next() else {
1197        return false;
1198    };
1199    (first.is_ascii_alphabetic() || first == '_')
1200        && chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
1201}
1202
1203fn strip_timeout_prefix(command: &str) -> Option<String> {
1204    let rest = command.strip_prefix("timeout")?.trim_start();
1205    // Next token must look like a duration (digits, optional trailing unit s/m/h).
1206    let mut iter = rest.splitn(2, char::is_whitespace);
1207    let duration = iter.next()?;
1208    let after = iter.next()?.trim_start();
1209    if after.is_empty() || !looks_like_duration(duration) {
1210        return None;
1211    }
1212    Some(after.to_string())
1213}
1214
1215fn looks_like_duration(token: &str) -> bool {
1216    if token.is_empty() {
1217        return false;
1218    }
1219    let mut chars = token.chars().peekable();
1220    let mut saw_digit = false;
1221    while let Some(&ch) = chars.peek() {
1222        if ch.is_ascii_digit() {
1223            saw_digit = true;
1224            chars.next();
1225        } else {
1226            break;
1227        }
1228    }
1229    if !saw_digit {
1230        return false;
1231    }
1232    match chars.next() {
1233        None => true,
1234        Some(unit) => matches!(unit, 's' | 'm' | 'h' | 'd') && chars.next().is_none(),
1235    }
1236}
1237
1238/// Resolve the harness-scoped user-filter directory for an arbitrary storage_dir.
1239/// Used by `aft doctor filters` to inspect filters without needing a live AppContext.
1240pub fn user_filter_dir(storage_dir: &Path, harness: Harness) -> PathBuf {
1241    storage_dir.join(harness.storage_segment()).join("filters")
1242}
1243
1244fn legacy_user_filter_dir(storage_dir: &Path) -> PathBuf {
1245    storage_dir.join("filters")
1246}
1247
1248/// Move filters written by the short-lived root-scoped v0.27 layout into the
1249/// active harness directory. Existing harness files win; colliding root files
1250/// are left in place so we never overwrite user-authored filters.
1251pub(crate) fn repair_legacy_user_filter_dir(storage_dir: &Path, harness: Harness) {
1252    let legacy_dir = legacy_user_filter_dir(storage_dir);
1253    if !legacy_dir.exists() {
1254        return;
1255    }
1256
1257    let entries = match fs::read_dir(&legacy_dir) {
1258        Ok(entries) => entries.filter_map(Result::ok).collect::<Vec<_>>(),
1259        Err(_) => return,
1260    };
1261    if entries.is_empty() {
1262        let _ = fs::remove_dir(&legacy_dir);
1263        return;
1264    }
1265
1266    let harness_dir = user_filter_dir(storage_dir, harness);
1267    if fs::create_dir_all(&harness_dir).is_err() {
1268        return;
1269    }
1270
1271    for entry in entries {
1272        let target = harness_dir.join(entry.file_name());
1273        if target.exists() {
1274            continue;
1275        }
1276        let _ = fs::rename(entry.path(), target);
1277    }
1278
1279    if fs::read_dir(&legacy_dir)
1280        .map(|mut entries| entries.next().is_none())
1281        .unwrap_or(false)
1282    {
1283        let _ = fs::remove_dir(&legacy_dir);
1284    }
1285}
1286
1287/// Resolve the project-filter directory for an arbitrary project root.
1288/// Returns the directory regardless of trust state — caller must check trust
1289/// separately if it wants to gate loading.
1290pub fn project_filter_dir(project_root: &Path) -> PathBuf {
1291    project_root.join(".cortexkit").join("aft").join("filters")
1292}
1293
1294#[cfg(test)]
1295mod tests {
1296    use super::*;
1297
1298    #[test]
1299    fn user_and_project_filter_dir_helpers() {
1300        let storage = Path::new("/tmp/aft-storage");
1301        assert_eq!(
1302            user_filter_dir(storage, Harness::Opencode),
1303            Path::new("/tmp/aft-storage/opencode/filters")
1304        );
1305
1306        let project = Path::new("/repo");
1307        assert_eq!(
1308            project_filter_dir(project),
1309            Path::new("/repo/.cortexkit/aft/filters")
1310        );
1311    }
1312
1313    #[test]
1314    fn repair_legacy_user_filter_dir_moves_root_filters_without_overwrite() {
1315        let temp = tempfile::tempdir().unwrap();
1316        let storage = temp.path();
1317        fs::create_dir_all(storage.join("filters")).unwrap();
1318        fs::create_dir_all(storage.join("opencode/filters")).unwrap();
1319        fs::write(storage.join("filters/root-only.toml"), "root").unwrap();
1320        fs::write(storage.join("filters/collides.toml"), "root").unwrap();
1321        fs::write(storage.join("opencode/filters/collides.toml"), "harness").unwrap();
1322
1323        repair_legacy_user_filter_dir(storage, Harness::Opencode);
1324
1325        assert_eq!(
1326            fs::read_to_string(storage.join("opencode/filters/root-only.toml")).unwrap(),
1327            "root"
1328        );
1329        assert_eq!(
1330            fs::read_to_string(storage.join("opencode/filters/collides.toml")).unwrap(),
1331            "harness"
1332        );
1333        assert_eq!(
1334            fs::read_to_string(storage.join("filters/collides.toml")).unwrap(),
1335            "root"
1336        );
1337        assert!(!storage.join("filters/root-only.toml").exists());
1338    }
1339}
1340
1341#[cfg(test)]
1342mod dispatch_specificity_tests {
1343    use super::*;
1344    use crate::compress::toml_filter::FilterRegistry;
1345
1346    fn empty_registry() -> FilterRegistry {
1347        FilterRegistry::default()
1348    }
1349
1350    /// Helper: assert that a given command would be claimed by a specific
1351    /// compressor by reading the output marker the compressor produces.
1352    /// (We can't easily compare Compressor instances by identity, so we
1353    /// dispatch and check for module-distinctive markers in the output.)
1354    fn dispatch(cmd: &str, output: &str) -> String {
1355        compress_with_registry(cmd, output, &empty_registry()).text
1356    }
1357
1358    #[test]
1359    fn generic_dispatch_does_not_classify_error_or_warning_words() {
1360        let result = compress_with_registry(
1361            "unknown-tool",
1362            "error: this is just a log line\nwarning: this too",
1363            &empty_registry(),
1364        );
1365
1366        assert!(result.dropped_by_class.is_empty());
1367        assert!(!result.had_inner_drop);
1368        assert!(result.text.contains("error: this is just a log line"));
1369    }
1370
1371    #[test]
1372    fn bun_run_vitest_routes_to_vitest_not_generic() {
1373        // VitestCompressor preserves PASS/FAIL markers and "Tests:" summary.
1374        // BunCompressor's `Some("run")` arm currently goes to generic which
1375        // would middle-truncate. Use a small vitest-shaped output and assert
1376        // the vitest formatter's output marker is present.
1377        let output = "Test Files  1 passed (1)\n     Tests  4 passed (4)\n  Start at  10:00:00\n  Duration  120ms\n";
1378        let compressed = dispatch("bun run vitest", output);
1379        // Assert vitest path took it: the vitest text summary keeps "Tests" / "Test Files" lines
1380        assert!(compressed.contains("Tests") || compressed.contains("Test Files"));
1381    }
1382
1383    #[test]
1384    fn npm_test_routes_to_vitest_when_output_is_vitest_shaped() {
1385        // `npm test` has no vitest token, so this proves the output-shape
1386        // tier runs before the broad NpmCompressor PackageManager tier.
1387        let output = "RERUN src/foo.test.ts x1\nFAIL src/foo.test.ts\nTest Files  1 failed (1)\nDuration    120ms\n";
1388        let compressed = dispatch("npm test", output);
1389        assert!(compressed.contains("FAIL src/foo.test.ts"));
1390        assert!(compressed.contains("Duration    120ms"));
1391        assert!(!compressed.contains("RERUN"));
1392    }
1393
1394    #[test]
1395    fn bun_run_vitest_token_match_wins_over_bun_head_match() {
1396        // Concrete proof the new dispatch works: a command where Bun would
1397        // otherwise have claimed it.
1398        let output = "PASS src/a.test.ts (1)\n PASS src/b.test.ts (1)\nTest Files  2 passed (2)\n     Tests  4 passed (4)\n";
1399        let compressed = dispatch("bun run vitest run", output);
1400        // Vitest preserves PASS lines and "Tests:" summary.
1401        assert!(compressed.contains("Test Files") || compressed.contains("PASS"));
1402    }
1403
1404    #[test]
1405    fn bunx_jest_routes_to_vitest_module() {
1406        let output = "PASS src/foo.test.js (1.2s)\nTest Suites: 1 passed, 1 total\nTests:       3 passed, 3 total\n";
1407        let compressed = dispatch("bunx jest --json", output);
1408        assert!(compressed.contains("Tests:") && compressed.contains("Test Suites"));
1409    }
1410
1411    #[test]
1412    fn pnpm_run_vitest_routes_to_vitest() {
1413        let output = "Test Files  1 passed (1)\n     Tests  10 passed (10)\n";
1414        let compressed = dispatch("pnpm run vitest", output);
1415        assert!(compressed.contains("Tests") || compressed.contains("Test Files"));
1416    }
1417
1418    #[test]
1419    fn npx_eslint_routes_to_eslint_not_generic() {
1420        let output = "\n/tmp/a.js\n  1:1  error  'foo' is defined but never used  no-unused-vars\n\n✖ 1 problem (1 error, 0 warnings)\n";
1421        let compressed = dispatch("npx eslint .", output);
1422        // EslintCompressor preserves rule IDs and the ✖ summary.
1423        assert!(compressed.contains("no-unused-vars") || compressed.contains("✖"));
1424    }
1425
1426    #[test]
1427    fn npm_run_lint_without_linter_output_shape_falls_back() {
1428        // `npm run lint` has no eslint token, and this output has no eslint
1429        // summary signature, so it should remain package-manager generic.
1430        let output = "> my-project@1.0.0 lint\n> eslint .\n\nAll good.\n";
1431        let compressed = dispatch("npm run lint", output);
1432        assert!(compressed.contains("All good."));
1433    }
1434
1435    #[test]
1436    fn bun_test_still_routes_to_bun_test_compressor() {
1437        // Bun.test is the v0.28.2 fix — make sure specificity dispatch
1438        // doesn't accidentally break it. The Bun module's `Some("test")`
1439        // arm should still claim this when no Specific matcher does.
1440        // BunTestCompressor doesn't exist as a separate module — the
1441        // BunCompressor.compress() routes Some("test") to its inner
1442        // compress_test() function. The relevant assertion: this still
1443        // produces bun-test-shaped output, not generic-truncated output.
1444        let output = "bun test v1.3.14\n\nsrc/foo.test.ts:\n(pass) my test [0.5ms]\n\n 1 pass\n 0 fail\n 1 expect() calls\nRan 1 tests across 1 files. [1.00ms]\n";
1445        let compressed = dispatch("bun test", output);
1446        assert!(compressed.contains("(pass)") || compressed.contains("1 pass"));
1447    }
1448
1449    #[test]
1450    fn bunx_vitest_routes_to_vitest() {
1451        let output = "Test Files  1 passed (1)\n     Tests  3 passed (3)\n";
1452        let compressed = dispatch("bunx vitest run", output);
1453        assert!(compressed.contains("Tests") || compressed.contains("Test Files"));
1454    }
1455
1456    #[test]
1457    fn cargo_test_still_routes_to_cargo() {
1458        // Regression: specificity reordering must not break commands that
1459        // already worked. Cargo is Specific tier.
1460        let output = "running 5 tests\ntest foo ... ok\ntest bar ... FAILED\n\nfailures:\n\ntest result: FAILED. 4 passed; 1 failed\n";
1461        let compressed = dispatch("cargo test", output);
1462        // Cargo's test compressor preserves PASS/FAIL semantics.
1463        assert!(compressed.contains("failed") || compressed.contains("FAILED"));
1464    }
1465
1466    #[test]
1467    fn top_level_piped_cargo_test_uses_generic_output() {
1468        let output = "running 1 test\ntest ok_test ... ok\n\ntest result: ok. 1 passed; 0 failed\n";
1469
1470        let compressed = compress_with_registry("cargo test | cat", output, &empty_registry());
1471
1472        assert!(
1473            compressed.text.contains("test ok_test ... ok"),
1474            "piped cargo output must stay generic/raw, got: {}",
1475            compressed.text
1476        );
1477    }
1478
1479    #[test]
1480    fn non_piped_cargo_test_still_uses_cargo_compressor() {
1481        let output = "running 1 test\ntest ok_test ... ok\n\ntest result: ok. 1 passed; 0 failed\n";
1482
1483        let compressed = compress_with_registry("cargo test", output, &empty_registry());
1484
1485        assert!(compressed.text.contains("running 1 test"));
1486        assert!(compressed.text.contains("test result: ok"));
1487        assert!(
1488            !compressed.text.contains("test ok_test ... ok"),
1489            "non-piped cargo test should keep using the cargo compressor, got: {}",
1490            compressed.text
1491        );
1492    }
1493
1494    #[test]
1495    fn git_status_still_routes_to_git() {
1496        // Regression: git is Specific tier.
1497        let output =
1498            "On branch main\nYour branch is up to date.\n\nnothing to commit, working tree clean\n";
1499        let compressed = dispatch("git status", output);
1500        assert!(compressed.contains("branch") || compressed.contains("clean"));
1501    }
1502
1503    #[test]
1504    fn pnpm_install_still_routes_to_pnpm() {
1505        // Regression: pnpm install was handled before this change.
1506        let output = "Progress: resolved 100, downloaded 50, added 50\nAdded 50 packages\n";
1507        let compressed = dispatch("pnpm install", output);
1508        // PnpmCompressor's compress_package keeps "+ pkg" or "Added X packages" type lines.
1509        assert!(compressed.contains("Added") || compressed.contains("Progress"));
1510    }
1511}
1512
1513#[cfg(test)]
1514mod exit_code_safety_tests {
1515    use super::*;
1516    use crate::compress::toml_filter::{build_registry, FilterRegistry};
1517
1518    fn empty_registry() -> FilterRegistry {
1519        FilterRegistry::default()
1520    }
1521
1522    #[test]
1523    fn go_build_failure_signal_preserved_even_when_exit_zero_masks_failure() {
1524        let output = "go: go.mod file not found in current directory or any parent directory; see 'go help modules'\n";
1525
1526        let failed =
1527            compress_with_registry_exit_code("go build ./...", output, Some(1), &empty_registry());
1528        assert!(!failed.text.contains("go build: ok"));
1529        assert!(failed.text.contains("go.mod file not found"));
1530
1531        let masked =
1532            compress_with_registry_exit_code("go build ./...", output, Some(0), &empty_registry());
1533        assert!(!masked.text.contains("go build: ok"));
1534        assert!(masked.text.contains("go.mod file not found"));
1535    }
1536
1537    #[test]
1538    fn playwright_nonzero_crash_does_not_become_passed_summary() {
1539        let output = r#"Running 4 tests using 2 workers
1540
1541  ✓  1 [chromium] › example.spec.ts:5:1 › has title (2.3s)
1542  ✓  2 [chromium] › example.spec.ts:9:1 › get started link (1.8s)
1543  ✓  3 [chromium] › nav.spec.ts:3:1 › navigates (1.2s)
1544  ✓  4 [chromium] › auth.spec.ts:7:1 › logs out (1.0s)
1545
1546  4 passed (6.3s)
1547Error: browserType.launch: Target page, context or browser has been closed
1548"#;
1549
1550        let failed = compress_with_registry_exit_code(
1551            "npx playwright test",
1552            output,
1553            Some(1),
1554            &empty_registry(),
1555        );
1556        assert!(!failed.text.starts_with("playwright: 4 tests passed"));
1557        assert!(failed.text.contains("browserType.launch"));
1558    }
1559
1560    #[test]
1561    fn cargo_test_compile_error_nonzero_preserves_error_code_diagnostic() {
1562        let output = r#"   Compiling demo v0.1.0 (/tmp/demo)
1563error[E0432]: unresolved import `crate::missing`
1564 --> src/lib.rs:1:5
1565  |
15661 | use crate::missing;
1567  |     ^^^^^^^^^^^^^^ no `missing` in the root
1568
1569error: could not compile `demo` (lib test) due to 1 previous error
1570"#;
1571
1572        let failed =
1573            compress_with_registry_exit_code("cargo test", output, Some(101), &empty_registry());
1574        assert!(failed.text.contains("error[E0432]"));
1575        assert!(failed.text.contains("unresolved import"));
1576        assert!(failed.text.contains("error: could not compile"));
1577    }
1578
1579    #[test]
1580    fn chained_mypy_success_then_later_failure_uses_failure_preserving_output() {
1581        let output = "Success: no issues found in 1 source file\nError: node process exploded\n";
1582
1583        let failed = compress_with_registry_exit_code(
1584            "mypy src && node fail.js",
1585            output,
1586            Some(1),
1587            &empty_registry(),
1588        );
1589        assert_ne!(failed.text, "mypy: clean");
1590        assert!(failed.text.contains("Error: node process exploded"));
1591    }
1592
1593    #[test]
1594    fn toml_shortcircuit_is_skipped_for_nonzero_exit() {
1595        let registry = build_registry(
1596            &[(
1597                "wget",
1598                r#"[filter]
1599matches = ["wget"]
1600
1601[shortcircuit]
1602when = '(?s).*'
1603replacement = "wget: ok"
1604"#,
1605            )],
1606            None,
1607            None,
1608        );
1609        let output = "Connecting to example.invalid\nerror: connection refused\n";
1610
1611        let failed = compress_with_registry_exit_code(
1612            "wget https://example.invalid",
1613            output,
1614            Some(1),
1615            &registry,
1616        );
1617        assert_ne!(failed.text, "wget: ok");
1618        assert!(failed.text.contains("error: connection refused"));
1619    }
1620
1621    #[test]
1622    fn unknown_exit_code_keeps_byte_identical_legacy_compressor_output() {
1623        let output =
1624            "Success: no issues found in 1 source file\nError: later chained command failed\n";
1625
1626        let legacy = compress_with_registry_exit_code(
1627            "mypy src && node fail.js",
1628            output,
1629            None,
1630            &empty_registry(),
1631        );
1632        assert_eq!(legacy.text, "mypy: clean");
1633    }
1634
1635    #[test]
1636    fn killed_exit_sentinel_rejects_clean_legacy_summary() {
1637        let output = "Success: no issues found in 1 source file
1638Error: later chained command failed
1639";
1640
1641        let killed = compress_with_registry_exit_code(
1642            "mypy src && node fail.js",
1643            output,
1644            Some(137),
1645            &empty_registry(),
1646        );
1647        assert_ne!(killed.text, "mypy: clean");
1648        assert!(killed.text.contains("Error: later chained command failed"));
1649    }
1650
1651    #[test]
1652    fn nonzero_clean_eslint_json_summary_falls_back_to_raw_output() {
1653        let output =
1654            r#"[{"filePath":"/repo/src/main.ts","messages":[],"errorCount":0,"warningCount":0}]"#;
1655
1656        let failed = compress_with_registry_exit_code(
1657            "eslint -f json .",
1658            output,
1659            Some(1),
1660            &empty_registry(),
1661        );
1662
1663        assert_ne!(failed.text, "eslint: no issues");
1664        assert!(failed.text.contains(r#""messages":[]"#));
1665    }
1666
1667    #[test]
1668    fn nonzero_appends_distinct_missing_raw_failure_lines() {
1669        let raw = "Error: first failure
1670progress
1671Error: second failure
1672";
1673        let compressed = CompressionResult::new("Error: first failure");
1674
1675        let preserved = failure_preserving_result("tool", raw, compressed, Some(1));
1676
1677        assert!(preserved.text.contains("Error: first failure"));
1678        assert!(preserved.text.contains("Error: second failure"));
1679        assert!(preserved
1680            .text
1681            .contains("[raw failure lines preserved by AFT]"));
1682    }
1683
1684    #[test]
1685    fn nonzero_cargo_failure_class_cap_falls_back_to_all_failures() {
1686        let mut output = String::from(
1687            "running 40 tests
1688
1689failures:
1690
1691",
1692        );
1693        for index in 0..40 {
1694            output.push_str(&format!(
1695                "---- case_{index} stdout ----
1696thread 'case_{index}' panicked at src/lib.rs:{index}:1
1697
1698"
1699            ));
1700        }
1701        output.push_str(
1702            "failures:
1703",
1704        );
1705        for index in 0..40 {
1706            output.push_str(&format!(
1707                "    case_{index}
1708"
1709            ));
1710        }
1711        output.push_str(
1712            "
1713test result: FAILED. 0 passed; 40 failed; 0 ignored; 0 measured; 0 filtered out
1714",
1715        );
1716
1717        let failed =
1718            compress_with_registry_exit_code("cargo test", &output, Some(101), &empty_registry());
1719
1720        assert!(failed.text.contains("---- case_0 stdout ----"));
1721        assert!(failed.text.contains("---- case_39 stdout ----"));
1722        assert!(failed.dropped_by_class.is_empty());
1723    }
1724
1725    #[test]
1726    fn toml_shortcircuit_is_skipped_for_unknown_exit_when_failure_signal_exists() {
1727        let registry = build_registry(
1728            &[(
1729                "make",
1730                r#"[filter]
1731matches = ["make"]
1732
1733[shortcircuit]
1734when = '(?s).*'
1735replacement = "make: ok"
1736"#,
1737            )],
1738            None,
1739            None,
1740        );
1741        let output = "build step
1742ERROR: compiler crashed
1743";
1744
1745        let failed = compress_with_registry_exit_code("make", output, None, &registry);
1746
1747        assert_ne!(failed.text, "make: ok");
1748        assert!(failed.text.contains("ERROR: compiler crashed"));
1749    }
1750
1751    #[test]
1752    fn successful_exit_still_gets_concise_success_summary() {
1753        let output = r#"Running 4 tests using 2 workers
1754
1755  ✓  1 [chromium] › example.spec.ts:5:1 › has title (2.3s)
1756  ✓  2 [chromium] › example.spec.ts:9:1 › get started link (1.8s)
1757  ✓  3 [chromium] › nav.spec.ts:3:1 › navigates (1.2s)
1758  ✓  4 [chromium] › auth.spec.ts:7:1 › logs out (1.0s)
1759
1760  4 passed (6.3s)
1761"#;
1762
1763        let successful =
1764            compress_with_registry_exit_code("playwright test", output, Some(0), &empty_registry());
1765        assert_eq!(successful.text, "playwright: 4 tests passed (6.3s)");
1766    }
1767}
1768
1769#[cfg(test)]
1770mod normalize_command_tests {
1771    use super::*;
1772
1773    #[test]
1774    fn passes_bare_commands_unchanged() {
1775        assert_eq!(normalize_command_for_dispatch("bun test"), None);
1776        assert_eq!(normalize_command_for_dispatch("cargo build"), None);
1777        assert_eq!(normalize_command_for_dispatch("git status"), None);
1778    }
1779
1780    #[test]
1781    fn strips_cd_and_amp_prefix() {
1782        assert_eq!(
1783            normalize_command_for_dispatch("cd /repo && bun test").as_deref(),
1784            Some("bun test")
1785        );
1786        assert_eq!(
1787            normalize_command_for_dispatch("cd /repo/packages/aft && cargo test --release")
1788                .as_deref(),
1789            Some("cargo test --release")
1790        );
1791    }
1792
1793    #[test]
1794    fn strips_cd_and_semicolon_prefix() {
1795        assert_eq!(
1796            normalize_command_for_dispatch("cd /repo; bun test").as_deref(),
1797            Some("bun test")
1798        );
1799    }
1800
1801    #[test]
1802    fn strips_cd_with_quoted_path() {
1803        assert_eq!(
1804            normalize_command_for_dispatch("cd \"/path with space\" && npm install").as_deref(),
1805            Some("npm install")
1806        );
1807    }
1808
1809    #[test]
1810    fn strips_env_assignments() {
1811        assert_eq!(
1812            normalize_command_for_dispatch("env FOO=bar npm install").as_deref(),
1813            Some("npm install")
1814        );
1815        assert_eq!(
1816            normalize_command_for_dispatch("env FOO=bar BAZ=qux RUST_LOG=info cargo test")
1817                .as_deref(),
1818            Some("cargo test")
1819        );
1820    }
1821
1822    #[test]
1823    fn strips_bare_assignment_prefixes() {
1824        assert_eq!(
1825            normalize_command_for_dispatch("NODE_ENV=production npm install").as_deref(),
1826            Some("npm install")
1827        );
1828        assert_eq!(
1829            normalize_command_for_dispatch("FOO=1 BAR=2 cargo test").as_deref(),
1830            Some("cargo test")
1831        );
1832        assert_eq!(
1833            normalize_command_for_dispatch("RUSTFLAGS='-C debug' cargo build").as_deref(),
1834            Some("cargo build")
1835        );
1836    }
1837
1838    #[test]
1839    fn does_not_strip_later_assignment_arguments() {
1840        assert_eq!(normalize_command_for_dispatch("npm install foo=bar"), None);
1841    }
1842
1843    #[test]
1844    fn env_without_assignments_returns_none() {
1845        // `env` alone is the env-listing command, not a prefix.
1846        assert_eq!(
1847            normalize_command_for_dispatch("env npm install").as_deref(),
1848            None
1849        );
1850    }
1851
1852    #[test]
1853    fn strips_timeout_prefix() {
1854        assert_eq!(
1855            normalize_command_for_dispatch("timeout 30 cargo test").as_deref(),
1856            Some("cargo test")
1857        );
1858        assert_eq!(
1859            normalize_command_for_dispatch("timeout 5m bun test").as_deref(),
1860            Some("bun test")
1861        );
1862    }
1863
1864    #[test]
1865    fn strips_nohup_prefix() {
1866        assert_eq!(
1867            normalize_command_for_dispatch("nohup ./long-running-script.sh").as_deref(),
1868            Some("./long-running-script.sh")
1869        );
1870    }
1871
1872    #[test]
1873    fn strips_paren_then_cd_and_amp() {
1874        assert_eq!(
1875            normalize_command_for_dispatch("(cd /repo && bun test").as_deref(),
1876            Some("bun test")
1877        );
1878    }
1879
1880    #[test]
1881    fn chains_multiple_prefixes() {
1882        // env then timeout then real command.
1883        assert_eq!(
1884            normalize_command_for_dispatch("env FOO=bar timeout 30 cargo test").as_deref(),
1885            Some("cargo test")
1886        );
1887        // cd then env then real command.
1888        assert_eq!(
1889            normalize_command_for_dispatch("cd /repo && env FOO=bar npm install").as_deref(),
1890            Some("npm install")
1891        );
1892    }
1893
1894    // -------- end-to-end dispatch via normalize() --------
1895
1896    fn empty_registry() -> FilterRegistry {
1897        FilterRegistry::default()
1898    }
1899
1900    #[test]
1901    fn cd_prefix_bun_test_still_routes_to_bun_test() {
1902        let output = "bun test v1.3.14\n\nsrc/a.test.ts:\n(pass) ok [0.1ms]\n\n 1 pass\n 0 fail\n 1 expect() calls\nRan 1 tests across 1 files. [1.00ms]\n";
1903        let compressed = compress_with_registry("cd /repo && bun test", output, &empty_registry());
1904        // The bun test compressor produces (pass) / "1 pass" / "Ran ..." in
1905        // the pass-only path. Generic middle-truncate would drop these and
1906        // keep the original. Asserting their presence proves the normalizer
1907        // succeeded.
1908        assert!(compressed.contains("(pass)") || compressed.contains("1 pass"));
1909    }
1910
1911    #[test]
1912    fn cd_prefix_cargo_test_still_routes_to_cargo() {
1913        let output = "running 5 tests\ntest foo ... ok\ntest bar ... FAILED\n\nfailures:\n\ntest result: FAILED. 4 passed; 1 failed\n";
1914        let compressed =
1915            compress_with_registry("cd /repo && cargo test", output, &empty_registry());
1916        assert!(compressed.contains("FAILED") || compressed.contains("failed"));
1917    }
1918
1919    #[test]
1920    fn env_prefix_npm_install_still_routes_to_npm() {
1921        let output = "added 50 packages, and audited 100 packages in 3s\n";
1922        let compressed = compress_with_registry(
1923            "env NODE_ENV=production npm install",
1924            output,
1925            &empty_registry(),
1926        );
1927        // NpmCompressor's install path keeps "added N packages" / "audited" markers.
1928        assert!(compressed.contains("added") || compressed.contains("audited"));
1929    }
1930
1931    #[test]
1932    fn bare_assignment_prefix_npm_install_routes_to_npm() {
1933        let output = "npm http fetch GET 200 https://registry.npmjs.org/foo 123ms\nnpm WARN deprecated old-pkg@1.0.0: use new-pkg instead\n\nadded 42 packages in 2s\n\naudited 100 packages in 2s\n\nfound 0 vulnerabilities\n";
1934        let compressed =
1935            compress_with_registry("NODE_ENV=production npm install", output, &empty_registry());
1936        assert!(!compressed.contains("npm http fetch"));
1937        assert!(compressed.contains("audited 100 packages"));
1938    }
1939
1940    #[test]
1941    fn bare_assignment_prefix_cargo_test_routes_to_cargo() {
1942        let output = "running 1 test\ntest foo ... ok\n\ntest result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out\n";
1943        let compressed =
1944            compress_with_registry("FOO=1 BAR=2 cargo test", output, &empty_registry());
1945        assert!(compressed.contains("running 1 test"));
1946        assert!(compressed.contains("test result: ok"));
1947        assert!(!compressed.contains("test foo ... ok"));
1948    }
1949
1950    #[test]
1951    fn quoted_assignment_prefix_cargo_build_routes_to_cargo() {
1952        let output = "   Compiling foo v0.1.0\nwarning: unused variable: `x`\n --> src/lib.rs:1:9\n  |\n1 |     let x = 1;\n  |         ^ help: if this is intentional, prefix it with an underscore: `_x`\n\n    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.12s\n";
1953        let compressed = compress_with_registry(
1954            "RUSTFLAGS='-C debug' cargo build",
1955            output,
1956            &empty_registry(),
1957        );
1958        assert!(!compressed.contains("Compiling foo"));
1959        assert!(compressed.contains("warning: unused variable"));
1960        assert!(compressed.contains("Finished `dev` profile"));
1961    }
1962
1963    #[test]
1964    fn timeout_prefix_cargo_build_still_routes_to_cargo() {
1965        let output =
1966            "   Compiling foo v0.1.0\n    Finished `dev` profile [unoptimized] target(s) in 5s\n";
1967        let compressed =
1968            compress_with_registry("timeout 30 cargo build", output, &empty_registry());
1969        // CargoCompressor for build/check/run preserves the structure.
1970        assert!(compressed.contains("Compiling") || compressed.contains("Finished"));
1971    }
1972
1973    #[test]
1974    fn normalize_splits_pipe_and_takes_last_stage() {
1975        assert_eq!(
1976            normalize_command_for_dispatch("git log | grep fix").as_deref(),
1977            Some("grep fix")
1978        );
1979    }
1980
1981    #[test]
1982    fn normalize_cd_prefix_then_pipe_takes_last_stage() {
1983        assert_eq!(
1984            normalize_command_for_dispatch("cd /repo && git log | grep fix").as_deref(),
1985            Some("grep fix")
1986        );
1987    }
1988
1989    #[test]
1990    fn normalize_no_pipe_returns_none() {
1991        assert_eq!(normalize_command_for_dispatch("git log"), None);
1992    }
1993
1994    #[test]
1995    fn normalize_quoted_pipe_not_split() {
1996        assert_eq!(
1997            normalize_command_for_dispatch("grep \"a|b\" file.txt"),
1998            None
1999        );
2000    }
2001
2002    #[test]
2003    fn normalize_balanced_command_substitution_splits_top_level_pipe() {
2004        // The inner `|` is inside $(...) (depth > 0) and must be ignored; the
2005        // real top-level `| grep x` splits to the last stage. The OLD code
2006        // bailed to None here and fell back to head-token dispatch on the full
2007        // command — exactly the data-loss path issue #137 is about.
2008        assert_eq!(
2009            normalize_command_for_dispatch("echo $(cmd | cmd) | grep x").as_deref(),
2010            Some("grep x")
2011        );
2012    }
2013
2014    #[test]
2015    fn normalize_inner_pipe_in_substitution_without_top_level_pipe_is_none() {
2016        // No top-level pipe at all — the only `|` is inside $(...).
2017        assert_eq!(
2018            normalize_command_for_dispatch("echo $(cargo test | cat)"),
2019            None
2020        );
2021    }
2022
2023    #[test]
2024    fn normalize_double_pipe_not_split() {
2025        assert_eq!(normalize_command_for_dispatch("git log || echo fail"), None);
2026    }
2027
2028    #[test]
2029    fn normalize_multi_pipe_returns_last_stage() {
2030        assert_eq!(
2031            normalize_command_for_dispatch("git log | grep fix | head -5").as_deref(),
2032            Some("head -5")
2033        );
2034    }
2035
2036    #[test]
2037    fn normalize_process_substitution_splits_top_level_pipe() {
2038        // `<(...)` inner pipe ignored; top-level `| grep x` splits to last stage.
2039        assert_eq!(
2040            normalize_command_for_dispatch("cat <(echo a | cat) | grep x").as_deref(),
2041            Some("grep x")
2042        );
2043    }
2044
2045    #[test]
2046    fn normalize_pipe_ampersand_splits_last_stage() {
2047        // `|&` pipes stdout+stderr; it is a real pipe boundary, not `|` + `&`.
2048        assert_eq!(
2049            normalize_command_for_dispatch("cargo test |& grep FAIL").as_deref(),
2050            Some("grep FAIL")
2051        );
2052    }
2053
2054    #[test]
2055    fn piped_cargo_test_grep_preserves_failed() {
2056        let grep_output = "test foo ... FAILED\n";
2057        let compressed =
2058            compress_with_registry("cargo test | grep FAIL", grep_output, &empty_registry());
2059        assert!(
2060            compressed.text.contains("FAILED"),
2061            "grep-filtered FAILED must survive, got: {}",
2062            compressed.text
2063        );
2064    }
2065
2066    #[test]
2067    fn unsafe_piped_command_forces_generic_and_preserves_output() {
2068        // Unbalanced quote → the scanner can't trust the parse. A `|` is
2069        // present, so it must force generic rather than let CargoCompressor
2070        // claim `cargo test | …` and drop the single grep-filtered line.
2071        let grep_output = "test foo ... FAILED\n";
2072        let compressed =
2073            compress_with_registry("cargo test | grep \"FAIL", grep_output, &empty_registry());
2074        assert!(
2075            compressed.text.contains("FAILED"),
2076            "unsafe pipe must not drop output, got: {}",
2077            compressed.text
2078        );
2079    }
2080
2081    #[test]
2082    fn split_top_level_pipe_variants() {
2083        assert_eq!(split_top_level_pipe("git log"), PipeSplit::None);
2084        assert_eq!(
2085            split_top_level_pipe("git log | grep fix"),
2086            PipeSplit::LastStage("grep fix".to_string())
2087        );
2088        // `||` is logical-or, not a pipe.
2089        assert_eq!(split_top_level_pipe("a || b"), PipeSplit::None);
2090        // inner pipe inside a subshell is not a top-level boundary.
2091        assert_eq!(split_top_level_pipe("(a | b)"), PipeSplit::None);
2092        // inner pipe inside $() is not a top-level boundary.
2093        assert_eq!(split_top_level_pipe("echo $(a | b)"), PipeSplit::None);
2094        // unbalanced quote with a pipe present → unsafe.
2095        assert_eq!(split_top_level_pipe("a | grep \"x"), PipeSplit::Unsafe);
2096        // unbalanced paren with a pipe present → unsafe.
2097        assert_eq!(split_top_level_pipe("$(a | b | grep x"), PipeSplit::Unsafe);
2098        // FAIL-CLOSED cases (Oracle findings) — a pipe must never be last-staged
2099        // when other top-level structure could mean the captured output isn't
2100        // the last stage's:
2101        // trailing empty stage
2102        assert_eq!(split_top_level_pipe("cargo test |"), PipeSplit::Unsafe);
2103        assert_eq!(split_top_level_pipe("cargo test |&"), PipeSplit::Unsafe);
2104        // pipe coexisting with a top-level separator
2105        assert_eq!(
2106            split_top_level_pipe("true | cargo test --quiet ; printf X"),
2107            PipeSplit::Unsafe
2108        );
2109        assert_eq!(
2110            split_top_level_pipe("true | cargo test && echo done"),
2111            PipeSplit::Unsafe
2112        );
2113        // unmatched close paren with a pipe
2114        assert_eq!(
2115            split_top_level_pipe("echo ) | cargo test"),
2116            PipeSplit::Unsafe
2117        );
2118        // bare `&` background is a separator; `2>&1` / `&>` redirects are not
2119        assert_eq!(split_top_level_pipe("a | b & c"), PipeSplit::Unsafe);
2120        assert_eq!(
2121            split_top_level_pipe("cargo test 2>&1 | grep FAIL"),
2122            PipeSplit::LastStage("grep FAIL".to_string())
2123        );
2124    }
2125
2126    #[test]
2127    fn strip_top_level_comment_removes_only_real_comments() {
2128        assert_eq!(
2129            strip_top_level_comment("printf keep # | cargo test"),
2130            "printf keep "
2131        );
2132        assert_eq!(
2133            strip_top_level_comment("printf keep # cargo test"),
2134            "printf keep "
2135        );
2136        // `#` not at a word boundary is literal (e.g. a fragment/anchor).
2137        assert_eq!(
2138            strip_top_level_comment("curl http://x/y#frag"),
2139            "curl http://x/y#frag"
2140        );
2141        // `#` inside quotes is literal.
2142        assert_eq!(
2143            strip_top_level_comment("grep \"# not a comment\" f"),
2144            "grep \"# not a comment\" f"
2145        );
2146        assert_eq!(
2147            strip_top_level_comment("echo '# literal'"),
2148            "echo '# literal'"
2149        );
2150        // no comment → unchanged.
2151        assert_eq!(
2152            strip_top_level_comment("git log | grep fix"),
2153            "git log | grep fix"
2154        );
2155    }
2156
2157    #[test]
2158    fn commented_command_does_not_misdispatch_and_preserves_output() {
2159        // The `# cargo test` comment must not let CargoCompressor claim this
2160        // printf command's output and drop it — with OR without a pipe.
2161        for cmd in ["printf keep # | cargo test", "printf keep # cargo test"] {
2162            let compressed = compress_with_registry(cmd, "keep\n", &empty_registry());
2163            assert!(
2164                compressed.text.contains("keep"),
2165                "comment must not drop output for {cmd:?}, got: {}",
2166                compressed.text
2167            );
2168        }
2169    }
2170
2171    #[test]
2172    fn pipe_with_trailing_command_chain_preserves_sentinel() {
2173        // `true | cargo test ; printf SENTINEL` — captured output includes
2174        // SENTINEL; cargo must not claim it and drop the sentinel line.
2175        let compressed = compress_with_registry(
2176            "true | cargo test --quiet ; printf SENTINEL",
2177            "SENTINEL\n",
2178            &empty_registry(),
2179        );
2180        assert!(
2181            compressed.text.contains("SENTINEL"),
2182            "trailing-chain output must survive, got: {}",
2183            compressed.text
2184        );
2185    }
2186
2187    #[test]
2188    fn is_shell_boundary_covers_redirects_and_operators() {
2189        for tok in [
2190            "|",
2191            "|&",
2192            ";",
2193            "&",
2194            "&&",
2195            "||",
2196            ">",
2197            ">>",
2198            "<",
2199            "<<",
2200            "<<<",
2201            "&>",
2202            "&>>",
2203            "2>",
2204            "2>>",
2205            "2>&1",
2206            "1>&2",
2207            ">/dev/null",
2208            "2>/dev/null",
2209        ] {
2210            assert!(is_shell_boundary(tok), "{tok} should be a boundary");
2211        }
2212        for tok in ["test", "log", "build", "--release", "-v", "file.txt"] {
2213            assert!(!is_shell_boundary(tok), "{tok} must not be a boundary");
2214        }
2215    }
2216}
aft/compress/mod.rs

aft/compress/
mod.rs