Skip to main content

aft/compress/
tree.rs

1use crate::compress::generic::{strip_ansi, GenericCompressor};
2use crate::compress::listing_fold::{
3    fold_consecutive_runs, shape_key_for_basename, FoldEntry, FOLD_THRESHOLD,
4};
5use crate::compress::{CompressionResult, Compressor};
6
7pub struct TreeCompressor;
8
9impl Compressor for TreeCompressor {
10    fn matches(&self, command: &str) -> bool {
11        command_tokens(command).any(|token| token == "tree")
12    }
13
14    fn compress_with_exit_code(
15        &self,
16        _command: &str,
17        output: &str,
18        exit_code: Option<i32>,
19    ) -> CompressionResult {
20        let stripped = strip_ansi(output);
21        if matches!(exit_code, Some(code) if code != 0) {
22            return GenericCompressor::compress_output(&stripped).into();
23        }
24        if stripped.trim().is_empty() {
25            return CompressionResult::new(stripped);
26        }
27
28        match compress_tree_listing(&stripped) {
29            Some(folded) => CompressionResult::new(folded),
30            None => GenericCompressor::compress_output(&stripped).into(),
31        }
32    }
33}
34
35fn command_tokens(command: &str) -> impl Iterator<Item = String> + '_ {
36    command
37        .split_whitespace()
38        .map(|token| token.trim_matches(|ch| matches!(ch, '\'' | '"')))
39        .filter(|token| {
40            !matches!(
41                *token,
42                "npx" | "pnpm" | "yarn" | "bun" | "bunx" | "exec" | "-m"
43            )
44        })
45        .map(|token| {
46            token
47                .rsplit(['/', '\\'])
48                .next()
49                .unwrap_or(token)
50                .trim_end_matches(".cmd")
51                .to_string()
52        })
53}
54
55fn compress_tree_listing(output: &str) -> Option<String> {
56    let doc = parse_tree(output)?;
57    let mut lines = render_tree(&doc);
58    lines.extend(doc.trailer.iter().cloned());
59    Some(lines.join("\n"))
60}
61
62#[derive(Debug)]
63struct TreeDoc {
64    nodes: Vec<TreeNode>,
65    roots: Vec<usize>,
66    trailer: Vec<String>,
67}
68
69#[derive(Debug)]
70struct TreeNode {
71    label: String,
72    children: Vec<usize>,
73}
74
75#[derive(Debug)]
76struct ParsedTreeEntry<'a> {
77    depth: usize,
78    label: &'a str,
79}
80
81fn parse_tree(output: &str) -> Option<TreeDoc> {
82    let lines: Vec<&str> = output.lines().collect();
83    if lines.is_empty() {
84        return Some(TreeDoc {
85            nodes: Vec::new(),
86            roots: Vec::new(),
87            trailer: Vec::new(),
88        });
89    }
90
91    let (tree_lines, trailer) = split_tree_trailer(&lines);
92    if tree_lines.is_empty() {
93        return None;
94    }
95
96    let mut nodes = Vec::new();
97    let mut roots = Vec::new();
98    let mut stack: Vec<usize> = Vec::new();
99
100    for line in tree_lines {
101        if line.trim().is_empty() {
102            return None;
103        }
104
105        if let Some(entry) = parse_tree_entry(line) {
106            if entry.depth == 0 || entry.label.trim().is_empty() || stack.len() < entry.depth {
107                return None;
108            }
109            let parent = stack[entry.depth - 1];
110            let id = push_node(&mut nodes, entry.label.to_string());
111            nodes[parent].children.push(id);
112            stack.truncate(entry.depth);
113            stack.push(id);
114            continue;
115        }
116
117        if looks_like_malformed_tree_line(line) {
118            return None;
119        }
120
121        let id = push_node(&mut nodes, line.to_string());
122        roots.push(id);
123        stack.clear();
124        stack.push(id);
125    }
126
127    if roots.is_empty() {
128        return None;
129    }
130
131    Some(TreeDoc {
132        nodes,
133        roots,
134        trailer,
135    })
136}
137
138fn push_node(nodes: &mut Vec<TreeNode>, label: String) -> usize {
139    let id = nodes.len();
140    nodes.push(TreeNode {
141        label,
142        children: Vec::new(),
143    });
144    id
145}
146
147fn split_tree_trailer<'a>(lines: &'a [&'a str]) -> (&'a [&'a str], Vec<String>) {
148    let Some(summary_index) = lines.iter().rposition(|line| !line.trim().is_empty()) else {
149        return (lines, Vec::new());
150    };
151
152    if !is_tree_summary_line(lines[summary_index]) {
153        return (lines, Vec::new());
154    }
155
156    let mut trailer_start = summary_index;
157    while trailer_start > 0 && lines[trailer_start - 1].trim().is_empty() {
158        trailer_start -= 1;
159    }
160
161    (
162        &lines[..trailer_start],
163        lines[trailer_start..]
164            .iter()
165            .map(|line| (*line).to_string())
166            .collect(),
167    )
168}
169
170fn is_tree_summary_line(line: &str) -> bool {
171    let trimmed = line.trim();
172    let Some((directories, files)) = trimmed.split_once(',') else {
173        return false;
174    };
175
176    count_word(directories.trim(), "directory", "directories")
177        && count_word(files.trim(), "file", "files")
178}
179
180fn count_word(text: &str, singular: &str, plural: &str) -> bool {
181    let mut parts = text.split_whitespace();
182    let Some(count) = parts.next() else {
183        return false;
184    };
185    if count.is_empty() || !count.chars().all(|ch| ch.is_ascii_digit()) {
186        return false;
187    }
188    let Some(noun) = parts.next() else {
189        return false;
190    };
191    (noun == singular || noun == plural) && parts.next().is_none()
192}
193
194fn parse_tree_entry(line: &str) -> Option<ParsedTreeEntry<'_>> {
195    let mut consumed = 0usize;
196    let mut ancestor_units = 0usize;
197
198    while let Some(next) = consume_tree_indent_unit(line, consumed) {
199        ancestor_units += 1;
200        consumed = next;
201    }
202
203    let label_start = consume_tree_connector(line, consumed)?;
204    Some(ParsedTreeEntry {
205        depth: ancestor_units + 1,
206        label: &line[label_start..],
207    })
208}
209
210fn consume_tree_indent_unit(line: &str, start: usize) -> Option<usize> {
211    let (first, _) = next_char(line, start)?;
212    if is_tree_vertical_char(first) {
213        let mut index = start + first.len_utf8();
214        for _ in 0..3 {
215            index = consume_tree_space_char(line, index)?;
216        }
217        Some(index)
218    } else if is_tree_space_char(first) {
219        let mut index = start;
220        for _ in 0..4 {
221            index = consume_tree_space_char(line, index)?;
222        }
223        Some(index)
224    } else {
225        None
226    }
227}
228
229fn consume_tree_connector(line: &str, start: usize) -> Option<usize> {
230    let (branch, mut index) = next_char(line, start)?;
231    if !is_tree_connector_char(branch) {
232        return None;
233    }
234
235    for _ in 0..2 {
236        let (horizontal, next) = next_char(line, index)?;
237        if !is_tree_horizontal_char(horizontal) {
238            return None;
239        }
240        index = next;
241    }
242
243    consume_tree_space_char(line, index)
244}
245
246fn consume_tree_space_char(line: &str, start: usize) -> Option<usize> {
247    let (ch, next) = next_char(line, start)?;
248    is_tree_space_char(ch).then_some(next)
249}
250
251fn next_char(line: &str, start: usize) -> Option<(char, usize)> {
252    let ch = line.get(start..)?.chars().next()?;
253    Some((ch, start + ch.len_utf8()))
254}
255
256fn is_tree_space_char(ch: char) -> bool {
257    matches!(ch, ' ' | '\u{00a0}')
258}
259
260fn is_tree_vertical_char(ch: char) -> bool {
261    matches!(ch, '│' | '|')
262}
263
264fn is_tree_connector_char(ch: char) -> bool {
265    matches!(ch, '├' | '└' | '|' | '`')
266}
267
268fn is_tree_horizontal_char(ch: char) -> bool {
269    matches!(ch, '─' | '-')
270}
271
272fn looks_like_malformed_tree_line(line: &str) -> bool {
273    let trimmed = line.trim_start_matches(|ch: char| ch.is_whitespace() || is_tree_space_char(ch));
274    trimmed.starts_with(|ch: char| is_tree_vertical_char(ch) || matches!(ch, '├' | '└' | '`'))
275}
276
277#[derive(Debug, PartialEq, Eq)]
278enum FoldedChild {
279    Node(usize),
280    Summary(String),
281}
282
283fn render_tree(doc: &TreeDoc) -> Vec<String> {
284    let mut out = Vec::new();
285    for &root in &doc.roots {
286        let label = doc.nodes[root].label.clone();
287        out.push(label.clone());
288        render_children(&doc.nodes, root, "", &label, &mut out);
289    }
290    out
291}
292
293fn render_children(
294    nodes: &[TreeNode],
295    parent: usize,
296    prefix: &str,
297    parent_path: &str,
298    out: &mut Vec<String>,
299) {
300    let children = fold_child_entries(nodes, &nodes[parent].children, parent_path);
301    let child_count = children.len();
302
303    for (index, child) in children.iter().enumerate() {
304        let is_last = index + 1 == child_count;
305        let connector = if is_last { "└── " } else { "├── " };
306        match child {
307            FoldedChild::Summary(label) => {
308                out.push(format!("{prefix}{connector}{label}"));
309            }
310            FoldedChild::Node(id) => {
311                let label = &nodes[*id].label;
312                out.push(format!("{prefix}{connector}{label}"));
313                if !nodes[*id].children.is_empty() {
314                    let next_prefix =
315                        format!("{}{}", prefix, if is_last { "    " } else { "│   " });
316                    let child_path = join_tree_path(parent_path, label);
317                    render_children(nodes, *id, &next_prefix, &child_path, out);
318                }
319            }
320        }
321    }
322}
323
324fn fold_child_entries(
325    nodes: &[TreeNode],
326    child_ids: &[usize],
327    parent_path: &str,
328) -> Vec<FoldedChild> {
329    let mut out = Vec::new();
330    let mut index = 0usize;
331
332    while index < child_ids.len() {
333        let first_id = child_ids[index];
334        let foldable = nodes[first_id].children.is_empty();
335
336        if !foldable {
337            out.push(FoldedChild::Node(first_id));
338            index += 1;
339            continue;
340        }
341
342        let key = shape_key_for_basename(parent_path, &nodes[first_id].label);
343        let mut end = index + 1;
344        while end < child_ids.len() {
345            let id = child_ids[end];
346            if !nodes[id].children.is_empty()
347                || shape_key_for_basename(parent_path, &nodes[id].label) != key
348            {
349                break;
350            }
351            end += 1;
352        }
353
354        if end - index >= FOLD_THRESHOLD {
355            out.push(FoldedChild::Summary(fold_summary_for_run(
356                nodes,
357                &child_ids[index..end],
358                parent_path,
359            )));
360        } else {
361            out.extend(child_ids[index..end].iter().copied().map(FoldedChild::Node));
362        }
363        index = end;
364    }
365
366    out
367}
368
369fn fold_summary_for_run(nodes: &[TreeNode], run: &[usize], parent_path: &str) -> String {
370    let entries = run
371        .iter()
372        .map(|id| {
373            let basename = nodes[*id].label.clone();
374            FoldEntry {
375                line: basename.clone(),
376                dir: String::new(),
377                shape_key: shape_key_for_basename(parent_path, &basename),
378                basename,
379            }
380        })
381        .collect();
382    let folded = fold_consecutive_runs(entries);
383    debug_assert_eq!(folded.len(), 1);
384    folded.into_iter().next().unwrap_or_default()
385}
386
387fn join_tree_path(parent_path: &str, label: &str) -> String {
388    if parent_path.is_empty() {
389        label.to_string()
390    } else {
391        format!("{}/{}", parent_path.trim_end_matches('/'), label)
392    }
393}
394
395#[derive(Clone, Copy)]
396enum TreeFixtureFormat {
397    Utf8BoxAsciiIndent,
398    Utf8BoxNbspIndent,
399    CLocaleAscii,
400}
401
402impl TreeFixtureFormat {
403    fn continuing_indent(self) -> &'static str {
404        match self {
405            Self::Utf8BoxAsciiIndent => "\u{2502}   ",
406            Self::Utf8BoxNbspIndent => "\u{2502}\u{00a0}\u{00a0} ",
407            Self::CLocaleAscii => "|   ",
408        }
409    }
410
411    fn blank_indent(self) -> &'static str {
412        "    "
413    }
414
415    fn branch_connector(self) -> &'static str {
416        match self {
417            Self::Utf8BoxAsciiIndent | Self::Utf8BoxNbspIndent => "\u{251c}\u{2500}\u{2500} ",
418            Self::CLocaleAscii => "|-- ",
419        }
420    }
421
422    fn last_connector(self) -> &'static str {
423        match self {
424            Self::Utf8BoxAsciiIndent | Self::Utf8BoxNbspIndent => "\u{2514}\u{2500}\u{2500} ",
425            Self::CLocaleAscii => "`-- ",
426        }
427    }
428}
429
430fn build_lebench_tree_fixture_with_format(format: TreeFixtureFormat) -> String {
431    let mut lines = Vec::with_capacity(207);
432    lines.push("src".to_string());
433    lines.push(format!("{}generated", format.branch_connector()));
434    lines.push(format!(
435        "{}{}client",
436        format.continuing_indent(),
437        format.last_connector()
438    ));
439    let nested_prefix = format!("{}{}", format.continuing_indent(), format.blank_indent());
440    for i in 0..=100u32 {
441        lines.push(format!(
442            "{nested_prefix}{}module_{i:03}.ts",
443            format.branch_connector()
444        ));
445    }
446    lines.push(format!(
447        "{nested_prefix}{}module_100_NEEDLE_FILE_marker.ts",
448        format.branch_connector()
449    ));
450    for i in 101..=198u32 {
451        lines.push(format!(
452            "{nested_prefix}{}module_{i:03}.ts",
453            format.branch_connector()
454        ));
455    }
456    lines.push(format!(
457        "{nested_prefix}{}module_199.ts",
458        format.last_connector()
459    ));
460    lines.push(format!("{}main.ts", format.last_connector()));
461    lines.push(String::new());
462    lines.push("2 directories, 202 files".to_string());
463    lines.join("\n")
464}
465
466pub fn build_lebench_tree_fixture() -> String {
467    build_lebench_tree_fixture_with_format(TreeFixtureFormat::Utf8BoxAsciiIndent)
468}
469
470pub fn build_lebench_tree_utf8_nbsp_fixture() -> String {
471    build_lebench_tree_fixture_with_format(TreeFixtureFormat::Utf8BoxNbspIndent)
472}
473
474pub fn build_lebench_tree_c_locale_ascii_fixture() -> String {
475    build_lebench_tree_fixture_with_format(TreeFixtureFormat::CLocaleAscii)
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481
482    const NEEDLE: &str = "module_100_NEEDLE_FILE_marker.ts";
483
484    fn assert_tree_fixture_folds(input: &str) -> String {
485        let line_count = input.lines().count();
486        let out = compress_tree_listing(input).expect("tree parses");
487
488        assert!(out.contains(NEEDLE), "needle must survive; got:\n{out}");
489        assert!(
490            out.contains("module_*.ts —"),
491            "homogeneous module run should fold: {out}"
492        );
493        assert!(
494            out.contains("2 directories, 202 files"),
495            "tree summary trailer should be preserved: {out}"
496        );
497        assert!(
498            out.lines().count() < line_count / 2,
499            "should compress dramatically: {line_count} -> {} lines",
500            out.lines().count()
501        );
502        out
503    }
504
505    #[test]
506    fn matches_tree_invocations() {
507        let c = TreeCompressor;
508        assert!(c.matches("tree"));
509        assert!(c.matches("tree -a src"));
510        assert!(c.matches("cd /tmp && tree ."));
511        assert!(!c.matches("treeify"));
512    }
513
514    #[test]
515    fn lebench_tree_folds_sibling_run_and_preserves_needle_and_trailer() {
516        let input = build_lebench_tree_fixture();
517        let out = assert_tree_fixture_folds(&input);
518
519        assert!(
520            out.contains("│       ├── module_100_NEEDLE_FILE_marker.ts"),
521            "needle should keep its sibling-tree prefix: {out}"
522        );
523    }
524
525    #[test]
526    fn lebench_utf8_nbsp_tree_folds_sibling_run_and_preserves_needle_and_trailer() {
527        let input = build_lebench_tree_utf8_nbsp_fixture();
528        assert!(
529            input.contains("\u{2502}\u{00a0}\u{00a0}     \u{251c}\u{2500}\u{2500} module_000.ts")
530        );
531
532        let out = assert_tree_fixture_folds(&input);
533
534        assert!(
535            out.contains("│       ├── module_100_NEEDLE_FILE_marker.ts"),
536            "needle should keep its sibling-tree prefix after normalization: {out}"
537        );
538    }
539
540    #[test]
541    fn lebench_c_locale_ascii_tree_folds_sibling_run_and_preserves_needle_and_trailer() {
542        let input = build_lebench_tree_c_locale_ascii_fixture();
543        assert!(input.contains("|       |-- module_000.ts"));
544
545        assert_tree_fixture_folds(&input);
546    }
547
548    #[test]
549    fn tree_compression_is_deterministic_across_fixture_formats() {
550        for input in [
551            build_lebench_tree_fixture(),
552            build_lebench_tree_utf8_nbsp_fixture(),
553            build_lebench_tree_c_locale_ascii_fixture(),
554        ] {
555            let first = compress_tree_listing(&input).expect("tree parses");
556            let second = compress_tree_listing(&input).expect("tree parses");
557            let recompressed = compress_tree_listing(&first).expect("compressed tree parses");
558            assert_eq!(first, second);
559            assert_eq!(first, recompressed);
560        }
561    }
562
563    #[test]
564    fn nested_directories_fold_independent_sibling_groups() {
565        let mut lines = vec![".".to_string(), "├── app".to_string()];
566        for i in 0..10u32 {
567            lines.push(format!("│   ├── module_{i:03}.rs"));
568        }
569        lines.push("│   └── special.rs".to_string());
570        lines.push("└── lib".to_string());
571        for i in 0..8u32 {
572            lines.push(format!("    ├── component_{i:03}.rs"));
573        }
574        lines.push("    └── keep.rs".to_string());
575        lines.push(String::new());
576        lines.push("2 directories, 20 files".to_string());
577        let input = lines.join("\n");
578
579        let out = compress_tree_listing(&input).expect("tree parses");
580
581        assert!(out.contains("│   ├── module_*.rs — 10 files"), "{out}");
582        assert!(out.contains("│   └── special.rs"), "{out}");
583        assert!(out.contains("    ├── component_*.rs — 8 files"), "{out}");
584        assert!(out.contains("    └── keep.rs"), "{out}");
585        assert!(out.contains("2 directories, 20 files"), "{out}");
586    }
587
588    #[test]
589    fn mixed_ascii_and_nbsp_indent_space_chars_still_parse() {
590        let lines = [
591            ".".to_string(),
592            "├── src".to_string(),
593            format!("\u{2502} \u{00a0} ├── module_{:03}.rs", 0),
594            format!("\u{2502}\u{00a0}  ├── module_{:03}.rs", 1),
595            format!("\u{2502}  \u{00a0}├── module_{:03}.rs", 2),
596            format!("\u{2502}\u{00a0}\u{00a0} ├── module_{:03}.rs", 3),
597            format!("\u{2502} \u{00a0} ├── module_{:03}.rs", 4),
598            format!("\u{2502}\u{00a0}  ├── module_{:03}.rs", 5),
599            format!("\u{2502}  \u{00a0}├── module_{:03}.rs", 6),
600            format!("\u{2502}\u{00a0}\u{00a0} ├── module_{:03}.rs", 7),
601            "\u{2502} \u{00a0} \u{2514}\u{2500}\u{2500} keep.rs".to_string(),
602            "└── tail.rs".to_string(),
603            String::new(),
604            "1 directory, 10 files".to_string(),
605        ]
606        .join("\n");
607
608        let out = compress_tree_listing(&lines).expect("tree parses");
609
610        assert!(out.contains("│   ├── module_*.rs — 8 files"), "{out}");
611        assert!(out.contains("│   └── keep.rs"), "{out}");
612        assert!(out.contains("└── tail.rs"), "{out}");
613        assert!(out.contains("1 directory, 10 files"), "{out}");
614    }
615
616    #[test]
617    fn small_sibling_groups_pass_through_unchanged() {
618        let mut lines = vec![".".to_string()];
619        for i in 0..6u32 {
620            lines.push(format!("├── file_{i}.txt"));
621        }
622        lines.push("└── file_6.txt".to_string());
623        lines.push(String::new());
624        lines.push("0 directories, 7 files".to_string());
625        let input = lines.join("\n");
626
627        let out = compress_tree_listing(&input).expect("tree parses");
628
629        assert_eq!(out, input);
630    }
631
632    #[test]
633    fn non_tree_shaped_output_degrades_to_generic_passthrough() {
634        let c = TreeCompressor;
635        let out = c.compress_with_exit_code(
636            "tree --bad-option",
637            "tree: invalid option -- z\nusage: tree [options]",
638            Some(1),
639        );
640        assert!(out.text.contains("tree: invalid option"));
641        assert!(out.text.contains("usage: tree"));
642    }
643}