Skip to main content

cc_toolgate/parse/
shell.rs

1//! Shell command parsing backed by tree-sitter-bash.
2//!
3//! This module provides two public functions:
4//!
5//! - [`parse_with_substitutions`] — decomposes a shell command string into a
6//!   [`ParsedPipeline`] of segments joined by operators, plus a list of
7//!   extracted command/process substitution contents.
8//!
9//! - [`has_output_redirection`] — checks whether a command string contains
10//!   output redirection that could mutate filesystem state.
11//!
12//! Both functions parse their input with tree-sitter-bash, which provides a
13//! full AST from a formal grammar.  This means quoting, heredocs, control flow
14//! keywords, and nested substitutions are handled by the grammar itself —
15//! the code here walks the resulting AST rather than scanning characters.
16//!
17//! # Control flow handling
18//!
19//! Shell keywords (`for`, `if`, `while`, `case`) are grammar structure, not
20//! commands.  The AST walker recurses into control flow bodies and extracts the
21//! actual commands inside them as pipeline segments.  For example,
22//! `for i in *; do rm "$i"; done` produces a segment for `rm "$i"`, not for
23//! `for` or `done`.
24//!
25//! # Redirection propagation
26//!
27//! When a control flow construct is wrapped in a `redirected_statement`
28//! (e.g. `for ... done > file`), the output redirection is propagated to the
29//! inner segments via [`ShellSegment::redirection`].  The eval layer uses this
30//! field to escalate decisions for commands that are contextually mutating even
31//! though their own text contains no redirect.
32//!
33//! # Substitution extraction
34//!
35//! Outermost `$()`, backtick, `<()`, and `>()` nodes are collected and their
36//! spans replaced with `__SUBST__` placeholders in the segment text.  The eval
37//! layer recursively evaluates each substitution's inner command independently.
38
39use super::types::{Operator, ParsedPipeline, Redirection, ShellSegment};
40use std::cell::RefCell;
41use tree_sitter::{Node, Parser, Tree};
42
43// ---------------------------------------------------------------------------
44// Thread-local parser
45// ---------------------------------------------------------------------------
46
47thread_local! {
48    /// tree-sitter `Parser` is `!Send`, so we use `thread_local!` storage.
49    static TS_PARSER: RefCell<Parser> = RefCell::new({
50        let mut p = Parser::new();
51        p.set_language(&tree_sitter_bash::LANGUAGE.into())
52            .expect("failed to load bash grammar");
53        p
54    });
55}
56
57/// Parse `source` into a tree-sitter syntax tree.
58fn parse_tree(source: &str) -> Tree {
59    TS_PARSER.with(|p| {
60        p.borrow_mut()
61            .parse(source, None)
62            .expect("tree-sitter parse failed")
63    })
64}
65
66// ---------------------------------------------------------------------------
67// Substitution extraction
68// ---------------------------------------------------------------------------
69
70/// A substitution's byte range in the source and its inner command text.
71struct SubstSpan {
72    start: usize,
73    end: usize,
74    inner: String,
75}
76
77/// Walk `node` for outermost `command_substitution` and `process_substitution`
78/// nodes, appending each to `out`.  Does not recurse into found substitutions;
79/// the eval layer handles nested evaluation.
80fn collect_substitutions(node: Node, source: &[u8], out: &mut Vec<SubstSpan>) {
81    if matches!(node.kind(), "command_substitution" | "process_substitution") {
82        let full = node.utf8_text(source).unwrap_or("");
83        let inner = strip_subst_delimiters(full);
84        if !inner.is_empty() {
85            out.push(SubstSpan {
86                start: node.start_byte(),
87                end: node.end_byte(),
88                inner: inner.to_string(),
89            });
90        }
91        return;
92    }
93    let mut cursor = node.walk();
94    for child in node.children(&mut cursor) {
95        collect_substitutions(child, source, out);
96    }
97}
98
99/// Strip the outer delimiters from a substitution node's text.
100///
101/// `$(cmd)` → `cmd`, `` `cmd` `` → `cmd`, `<(cmd)` / `>(cmd)` → `cmd`.
102fn strip_subst_delimiters(text: &str) -> &str {
103    let t = if text.starts_with("$(") || text.starts_with("<(") || text.starts_with(">(") {
104        text.get(2..text.len().saturating_sub(1)).unwrap_or("")
105    } else if text.starts_with('`') && text.ends_with('`') && text.len() >= 2 {
106        &text[1..text.len() - 1]
107    } else {
108        text
109    };
110    t.trim()
111}
112
113/// Produce the text of `source[start..end]` with any substitution spans inside
114/// that range replaced by `__SUBST__` placeholders.  Replacement is performed
115/// right-to-left so that earlier byte offsets remain valid.
116fn text_replacing_substitutions(
117    source: &str,
118    start: usize,
119    end: usize,
120    subs: &[SubstSpan],
121) -> String {
122    let mut relevant: Vec<&SubstSpan> = subs
123        .iter()
124        .filter(|s| s.start >= start && s.end <= end)
125        .collect();
126    if relevant.is_empty() {
127        return source[start..end].to_string();
128    }
129    relevant.sort_by(|a, b| b.start.cmp(&a.start));
130    let mut text = source[start..end].to_string();
131    for sub in relevant {
132        text.replace_range((sub.start - start)..(sub.end - start), "__SUBST__");
133    }
134    text
135}
136
137// ---------------------------------------------------------------------------
138// Output redirection detection
139// ---------------------------------------------------------------------------
140
141/// Inspect a `file_redirect` AST node and decide whether it represents an
142/// output redirection that could mutate filesystem state.
143///
144/// # Safe patterns (returns `None`)
145///
146/// - Input redirects: `<`, `<<`, `<<<`, `<&`
147/// - Any redirect targeting `/dev/null`
148/// - fd duplication to standard streams: `>&1`, `>&2`, `2>&1`, etc.
149/// - fd closing: `>&-`, `2>&-`
150///
151/// # Flagged patterns (returns `Some`)
152///
153/// - `>`, `>>`, `>|` to any path other than `/dev/null`
154/// - `<>` (read-write open, detected via ERROR node in tree-sitter AST)
155/// - `&>`, `&>>` to any path other than `/dev/null`
156/// - `>&N` or `M>&N` where N ≥ 3 (custom fd target)
157/// - `N>` or `N>>` to any path other than `/dev/null`
158fn check_file_redirect(node: Node, source: &[u8]) -> Option<Redirection> {
159    let mut fd: Option<String> = None;
160    let mut operator = "";
161    let mut dest = String::new();
162
163    let mut cursor = node.walk();
164    for child in node.children(&mut cursor) {
165        if child.kind() == "file_descriptor" {
166            fd = child.utf8_text(source).ok().map(str::to_string);
167        } else if child.is_named() {
168            dest = child.utf8_text(source).unwrap_or("").to_string();
169        } else {
170            let k = child.kind();
171            if matches!(
172                k,
173                ">" | ">>" | ">|" | "&>" | "&>>" | ">&" | "<" | "<<<" | "<<" | "<&"
174            ) {
175                operator = k;
176            }
177        }
178    }
179
180    if matches!(operator, "" | "<" | "<<<" | "<<" | "<&") {
181        // tree-sitter-bash parses `<>` (read-write) as `<` + ERROR(`>`).
182        // Detect this by checking the node's raw text for the `<>` sequence.
183        if operator == "<" {
184            let text = node.utf8_text(source).unwrap_or("");
185            if text.contains("<>") {
186                return Some(Redirection {
187                    description: "output redirection (<> read-write)".into(),
188                });
189            }
190        }
191        return None;
192    }
193
194    if matches!(operator, "&>" | "&>>") {
195        if dest == "/dev/null" {
196            return None;
197        }
198        return Some(Redirection {
199            description: format!("output redirection ({operator})"),
200        });
201    }
202
203    if operator == ">&" {
204        if dest == "-" {
205            return None;
206        }
207        if let Some(ref f) = fd {
208            if matches!(dest.as_str(), "0" | "1" | "2") {
209                return None;
210            }
211            return Some(Redirection {
212                description: format!("output redirection ({f}>&{dest}, custom fd target)"),
213            });
214        }
215        if matches!(dest.as_str(), "0" | "1" | "2") {
216            return None;
217        }
218        return Some(Redirection {
219            description: format!("output redirection (>&{dest}, custom fd target)"),
220        });
221    }
222
223    if matches!(operator, ">" | ">>" | ">|") {
224        if dest == "/dev/null" {
225            return None;
226        }
227        if let Some(ref f) = fd {
228            return Some(Redirection {
229                description: format!("output redirection ({f}{operator})"),
230            });
231        }
232        return Some(Redirection {
233            description: format!("output redirection ({operator})"),
234        });
235    }
236
237    None
238}
239
240/// Recursively search `node` for `file_redirect` descendants, returning the
241/// first output redirection found.  Skips `heredoc_body` subtrees entirely so
242/// that text inside heredoc bodies (e.g. email addresses containing `>`) never
243/// triggers a false positive.
244fn detect_redirections(node: Node, source: &[u8]) -> Option<Redirection> {
245    if node.kind() == "file_redirect" {
246        return check_file_redirect(node, source);
247    }
248    if node.kind() == "heredoc_body" {
249        return None;
250    }
251    let mut cursor = node.walk();
252    for child in node.children(&mut cursor) {
253        if let Some(r) = detect_redirections(child, source) {
254            return Some(r);
255        }
256    }
257    None
258}
259
260// ---------------------------------------------------------------------------
261// AST walking — compound command decomposition
262// ---------------------------------------------------------------------------
263
264/// Intermediate result of walking a subtree: a flat sequence of segment byte
265/// ranges interleaved with operators.
266struct WalkResult {
267    segments: Vec<SegmentInfo>,
268    operators: Vec<Operator>,
269}
270
271/// A segment's position in the source and any redirection inherited from a
272/// wrapping `redirected_statement`.
273struct SegmentInfo {
274    start: usize,
275    end: usize,
276    redirection: Option<Redirection>,
277}
278
279impl WalkResult {
280    fn empty() -> Self {
281        Self {
282            segments: vec![],
283            operators: vec![],
284        }
285    }
286
287    fn single(start: usize, end: usize, redir: Option<Redirection>) -> Self {
288        Self {
289            segments: vec![SegmentInfo {
290                start,
291                end,
292                redirection: redir,
293            }],
294            operators: vec![],
295        }
296    }
297
298    /// Merge `other` into `self`, inserting `join_op` between the two if both
299    /// contain segments.
300    fn append(&mut self, other: WalkResult, join_op: Option<Operator>) {
301        if other.segments.is_empty() {
302            return;
303        }
304        if !self.segments.is_empty()
305            && let Some(op) = join_op
306        {
307            self.operators.push(op);
308        }
309        self.segments.extend(other.segments);
310        self.operators.extend(other.operators);
311    }
312}
313
314/// Dispatch on the AST `node` kind and return a flat segment/operator sequence.
315///
316/// The match arms fall into three categories:
317///
318/// 1. **Structure nodes** (`program`, `list`, `pipeline`) — decompose into
319///    children connected by operators.
320/// 2. **Leaf command nodes** (`command`, `declaration_command`,
321///    `variable_assignment`) — become a single segment whose byte range is the
322///    node's span.
323/// 3. **Control flow nodes** (`for_statement`, `if_statement`, etc.) — recurse
324///    into their body to extract the actual commands.
325///
326/// Apply a wrapping redirection to segments from a compound body.
327///
328/// For `list` nodes (`&&`/`||`/`;` chains) and `pipeline` nodes (`|`/`|&`
329/// chains) only the last segment receives the redirect — earlier segments are
330/// independent commands whose output goes to the next pipe stage and is not
331/// itself redirected.  For control-flow bodies (`for`/`while`/`if`/`case`)
332/// every segment is wrapped by the construct, so all receive the redirect.
333fn propagate_redirect(result: &mut WalkResult, node_kind: &str, redir: &Redirection) {
334    if node_kind == "list" || node_kind == "pipeline" {
335        if let Some(last) = result.segments.last_mut()
336            && last.redirection.is_none()
337        {
338            last.redirection = Some(redir.clone());
339        }
340    } else {
341        for seg in &mut result.segments {
342            if seg.redirection.is_none() {
343                seg.redirection = Some(redir.clone());
344            }
345        }
346    }
347}
348
349/// Unknown named nodes are treated as single segments (conservative: the eval
350/// layer will flag them as unrecognized → ASK).
351fn walk_ast(node: Node, source: &[u8]) -> WalkResult {
352    match node.kind() {
353        "program" => walk_program(node, source),
354        "list" => walk_list(node, source),
355        "pipeline" => walk_pipeline(node, source),
356        "command" | "declaration_command" => {
357            let redir = detect_redirections(node, source);
358            WalkResult::single(node.start_byte(), node.end_byte(), redir)
359        }
360        "redirected_statement" => walk_redirected(node, source),
361        "for_statement" | "while_statement" | "until_statement" | "c_style_for_statement" => {
362            walk_loop(node, source)
363        }
364        "if_statement" => walk_if(node, source),
365        "case_statement" => walk_case(node, source),
366        "subshell" | "compound_statement" | "do_group" | "else_clause" | "elif_clause" => {
367            walk_block(node, source)
368        }
369        "case_item" => walk_case_item(node, source),
370        "negated_command" => walk_negated(node, source),
371        "function_definition" => walk_function(node, source),
372        "variable_assignment" => WalkResult::single(node.start_byte(), node.end_byte(), None),
373        "comment" | "heredoc_body" => WalkResult::empty(),
374        _ if node.is_named() => WalkResult::single(node.start_byte(), node.end_byte(), None),
375        _ => WalkResult::empty(),
376    }
377}
378
379/// Top-level `program` node: join named children with [`Operator::Semi`].
380fn walk_program(node: Node, source: &[u8]) -> WalkResult {
381    let mut result = WalkResult::empty();
382    let mut cursor = node.walk();
383    for child in node.named_children(&mut cursor) {
384        result.append(walk_ast(child, source), Some(Operator::Semi));
385    }
386    result
387}
388
389/// `list` is a left-recursive binary tree: `a && b || c` parses as
390/// `list(list(a, &&, b), ||, c)`.  This function flattens it into a linear
391/// segment/operator sequence.
392fn walk_list(node: Node, source: &[u8]) -> WalkResult {
393    let mut cursor = node.walk();
394    let named: Vec<Node> = node.named_children(&mut cursor).collect();
395    if named.len() < 2 {
396        let mut result = WalkResult::empty();
397        for child in named {
398            result.append(walk_ast(child, source), Some(Operator::Semi));
399        }
400        return result;
401    }
402    let op = list_operator(node);
403    let mut result = walk_ast(named[0], source);
404    result.append(walk_ast(named[1], source), Some(op));
405    result
406}
407
408/// Extract the operator from a `list` node's anonymous children.
409fn list_operator(node: Node) -> Operator {
410    let mut cursor = node.walk();
411    for child in node.children(&mut cursor) {
412        if !child.is_named() {
413            match child.kind() {
414                "&&" => return Operator::And,
415                "||" => return Operator::Or,
416                _ => {}
417            }
418        }
419    }
420    Operator::Semi
421}
422
423/// `pipeline` node: named children are commands, anonymous `|` / `|&` tokens
424/// are the operators between them.
425fn walk_pipeline(node: Node, source: &[u8]) -> WalkResult {
426    let mut result = WalkResult::empty();
427    let mut pending_op: Option<Operator> = None;
428    let mut cursor = node.walk();
429    for child in node.children(&mut cursor) {
430        if child.is_named() {
431            result.append(walk_ast(child, source), pending_op.take());
432        } else {
433            match child.kind() {
434                "|" => pending_op = Some(Operator::Pipe),
435                "|&" => pending_op = Some(Operator::PipeErr),
436                _ => {}
437            }
438        }
439    }
440    result
441}
442
443/// `redirected_statement` wraps a body node (command, pipeline, control flow,
444/// etc.) together with one or more redirect nodes (`file_redirect`,
445/// `heredoc_redirect`, `herestring_redirect`).
446///
447/// For a leaf command body, the full `redirected_statement` text (minus any
448/// heredoc body content) becomes the segment text — this preserves redirect
449/// tokens like `> file` in the text that downstream `base_command()` and
450/// `has_output_redirection()` will see.
451///
452/// For a compound body (e.g. `for ... done > file`), the walker recurses into
453/// the body and propagates the detected redirection to each inner segment.
454///
455/// `heredoc_redirect` nodes may contain same-line pipeline/list children
456/// (e.g. `cat <<EOF | grep foo` produces a `pipeline` inside
457/// `heredoc_redirect`).  These are checked **first** because the body
458/// command (e.g. `cat`) appears as an earlier sibling and would otherwise
459/// trigger the leaf-command short-circuit.
460fn walk_redirected(node: Node, source: &[u8]) -> WalkResult {
461    let redir = detect_redirections(node, source);
462
463    // First pass: check if any heredoc_redirect contains same-line commands.
464    // This must run before the leaf-command path because tree-sitter places
465    // `cat <<EOF | cmd` as: redirected_statement { command("cat"),
466    // heredoc_redirect { pipeline("| cmd") } }.  The command("cat") child
467    // would otherwise trigger an early return.
468    let mut cursor = node.walk();
469    for child in node.named_children(&mut cursor) {
470        if child.kind() == "heredoc_redirect" {
471            let inner = walk_heredoc_redirect(child, source);
472            if !inner.segments.is_empty() {
473                let mut full = WalkResult::empty();
474                // Find the body command (sibling before heredoc_redirect).
475                let mut c2 = node.walk();
476                for sib in node.named_children(&mut c2) {
477                    if sib.kind() == "heredoc_redirect" {
478                        break;
479                    }
480                    if matches!(sib.kind(), "file_redirect" | "herestring_redirect") {
481                        continue;
482                    }
483                    if is_leaf_command(sib) {
484                        let end = effective_end(node).min(child.start_byte());
485                        full.append(
486                            WalkResult::single(sib.start_byte(), end, redir.clone()),
487                            None,
488                        );
489                    } else {
490                        // Compound body (for/while/if/case): recurse to
491                        // extract inner commands instead of flattening.
492                        let mut body = walk_ast(sib, source);
493                        if let Some(ref r) = redir {
494                            propagate_redirect(&mut body, sib.kind(), r);
495                        }
496                        full.append(body, None);
497                    }
498                    break;
499                }
500                // The first operator token in heredoc_redirect determines how
501                // the body command joins the same-line content.
502                let join_op = heredoc_join_operator(child);
503                full.append(inner, Some(join_op));
504                return full;
505            }
506        }
507    }
508
509    // Second pass: no heredoc piped content.  Handle body normally.
510    let mut cursor2 = node.walk();
511    for child in node.named_children(&mut cursor2) {
512        if matches!(
513            child.kind(),
514            "file_redirect" | "herestring_redirect" | "heredoc_redirect"
515        ) {
516            continue;
517        }
518        if is_leaf_command(child) {
519            let end = effective_end(node);
520            return WalkResult::single(node.start_byte(), end, redir);
521        }
522        // Compound body with redirect.  For list nodes (&&/||/; chains) only
523        // the last segment gets the redirect — the earlier segments are
524        // independent commands whose output is not redirected.  For
525        // control-flow bodies (for/while/if/case) every inner segment is
526        // wrapped by the construct and therefore all receive the redirect.
527        let mut result = walk_ast(child, source);
528        if let Some(ref r) = redir {
529            propagate_redirect(&mut result, child.kind(), r);
530        }
531        return result;
532    }
533
534    // Fallback: no recognized body.
535    let end = effective_end(node);
536    WalkResult::single(node.start_byte(), end, redir)
537}
538
539/// Walk a `heredoc_redirect` node for commands that appear on the same line
540/// as the heredoc marker.
541///
542/// In tree-sitter-bash, `cat <<EOF | grep foo` places the `| grep foo`
543/// pipeline inside the `heredoc_redirect` node rather than as a sibling in an
544/// outer pipeline.  Similarly, `cat <<EOF && rm file` places `&& rm file` as
545/// an anonymous operator token + named `command` child.
546///
547/// For parse errors (e.g. `;` in heredoc context), commands may appear as
548/// loose `word` nodes.  These are collected into a synthetic segment so the
549/// eval layer can flag them.
550fn walk_heredoc_redirect(node: Node, source: &[u8]) -> WalkResult {
551    let mut result = WalkResult::empty();
552    let mut cursor = node.walk();
553    let mut loose_words_start: Option<usize> = None;
554    let mut loose_words_end: usize = 0;
555
556    for child in node.named_children(&mut cursor) {
557        match child.kind() {
558            "pipeline" | "list" | "command" | "redirected_statement" => {
559                // Flush accumulated loose words as a segment.
560                if let Some(start) = loose_words_start.take() {
561                    result.append(
562                        WalkResult::single(start, loose_words_end, None),
563                        Some(Operator::Semi),
564                    );
565                }
566                let op = heredoc_operator_before(node, child);
567                result.append(walk_ast(child, source), Some(op));
568            }
569            "word" => {
570                if loose_words_start.is_none() {
571                    loose_words_start = Some(child.start_byte());
572                }
573                loose_words_end = child.end_byte();
574            }
575            _ => {}
576        }
577    }
578
579    // Flush any trailing loose words.
580    if let Some(start) = loose_words_start {
581        result.append(
582            WalkResult::single(start, loose_words_end, None),
583            Some(Operator::Semi),
584        );
585    }
586
587    result
588}
589
590/// Determine the operator that precedes `child` inside a `heredoc_redirect`.
591///
592/// Scans the anonymous children of `heredoc_node` for operator tokens (`&&`,
593/// `||`, `|`, `|&`) that appear before `child`.  Returns the corresponding
594/// [`Operator`], defaulting to [`Operator::Pipe`] when no explicit operator is
595/// found (the most common heredoc pattern is piping).
596fn heredoc_operator_before(heredoc_node: Node, child: Node) -> Operator {
597    let mut cursor = heredoc_node.walk();
598    let mut last_op = None;
599    for sib in heredoc_node.children(&mut cursor) {
600        if sib.start_byte() >= child.start_byte() {
601            break;
602        }
603        if !sib.is_named() {
604            match sib.kind() {
605                "&&" => last_op = Some(Operator::And),
606                "||" => last_op = Some(Operator::Or),
607                "|&" => last_op = Some(Operator::PipeErr),
608                "|" => last_op = Some(Operator::Pipe),
609                _ => {}
610            }
611        }
612    }
613    last_op.unwrap_or(Operator::Pipe)
614}
615
616/// Determine the operator joining the body command to same-line heredoc content.
617///
618/// Checks direct children of the `heredoc_redirect` node: anonymous operator
619/// tokens (`&&`, `||`, `|&`) and named `pipeline` children (which imply `|`).
620/// Returns [`Operator::Pipe`] as default since piping from a heredoc is the
621/// most common pattern.
622fn heredoc_join_operator(heredoc_node: Node) -> Operator {
623    let mut cursor = heredoc_node.walk();
624    for child in heredoc_node.children(&mut cursor) {
625        if !child.is_named() {
626            match child.kind() {
627                "&&" => return Operator::And,
628                "||" => return Operator::Or,
629                "|&" => return Operator::PipeErr,
630                _ => {}
631            }
632        } else {
633            match child.kind() {
634                "pipeline" => return Operator::Pipe,
635                "command" | "list" | "redirected_statement" => break,
636                _ => {}
637            }
638        }
639    }
640    Operator::Pipe
641}
642
643/// `for_statement`, `while_statement`, `until_statement`, `c_style_for_statement`:
644/// recurse into child nodes to extract evaluable commands.
645///
646/// For `while` and `until`, the condition is itself a command (e.g. `true`,
647/// `test -f foo`) and must be evaluated alongside the body.  For `for` and
648/// `c_style_for`, non-`do_group` children are variable names and word lists,
649/// not commands.
650fn walk_loop(node: Node, source: &[u8]) -> WalkResult {
651    let mut result = WalkResult::empty();
652    let mut cursor = node.walk();
653    for child in node.named_children(&mut cursor) {
654        match child.kind() {
655            "do_group" => result.append(walk_block(child, source), Some(Operator::Semi)),
656            _ if node.kind() == "while_statement" || node.kind() == "until_statement" => {
657                result.append(walk_ast(child, source), Some(Operator::Semi));
658            }
659            _ => {}
660        }
661    }
662    result
663}
664
665/// `if_statement`: extract commands from the condition, then-body, and any
666/// else/elif clauses.
667fn walk_if(node: Node, source: &[u8]) -> WalkResult {
668    let mut result = WalkResult::empty();
669    let mut cursor = node.walk();
670    for child in node.named_children(&mut cursor) {
671        match child.kind() {
672            "command"
673            | "declaration_command"
674            | "pipeline"
675            | "list"
676            | "redirected_statement"
677            | "compound_statement"
678            | "subshell"
679            | "negated_command" => {
680                result.append(walk_ast(child, source), Some(Operator::Semi));
681            }
682            "else_clause" | "elif_clause" => {
683                result.append(walk_ast(child, source), Some(Operator::Semi));
684            }
685            _ => {}
686        }
687    }
688    result
689}
690
691/// `case_statement`: recurse into each `case_item`, extracting only the body
692/// commands (after the `)` delimiter), not the pattern labels before it.
693fn walk_case(node: Node, source: &[u8]) -> WalkResult {
694    let mut result = WalkResult::empty();
695    let mut cursor = node.walk();
696    for child in node.named_children(&mut cursor) {
697        if child.kind() == "case_item" {
698            result.append(walk_case_item(child, source), Some(Operator::Semi));
699        }
700    }
701    result
702}
703
704/// Walk a `case_item` node, skipping pattern labels and extracting only the
705/// body commands.
706///
707/// In tree-sitter-bash, `case_item` children before the `)` token are pattern
708/// labels (e.g. `rm`, `*.txt`).  Children after `)` are the body commands to
709/// execute when matched.  Only the body commands are evaluable.
710fn walk_case_item(node: Node, source: &[u8]) -> WalkResult {
711    let mut result = WalkResult::empty();
712    let mut past_paren = false;
713    let mut cursor = node.walk();
714    for child in node.children(&mut cursor) {
715        if !child.is_named() && child.kind() == ")" {
716            past_paren = true;
717            continue;
718        }
719        if past_paren && child.is_named() {
720            result.append(walk_ast(child, source), Some(Operator::Semi));
721        }
722    }
723    result
724}
725
726/// Generic block walk: recurse into all named children, joining with
727/// [`Operator::Semi`].  Used for `do_group`, `else_clause`, `elif_clause`,
728/// `case_item`, `subshell`, and `compound_statement`.
729fn walk_block(node: Node, source: &[u8]) -> WalkResult {
730    let mut result = WalkResult::empty();
731    let mut cursor = node.walk();
732    for child in node.named_children(&mut cursor) {
733        result.append(walk_ast(child, source), Some(Operator::Semi));
734    }
735    result
736}
737
738/// `negated_command` (`! cmd`): walk the first named child (the negated body).
739fn walk_negated(node: Node, source: &[u8]) -> WalkResult {
740    let mut cursor = node.walk();
741    if let Some(child) = node.named_children(&mut cursor).next() {
742        return walk_ast(child, source);
743    }
744    WalkResult::empty()
745}
746
747/// `function_definition`: recurse into the `compound_statement` body.
748fn walk_function(node: Node, source: &[u8]) -> WalkResult {
749    let mut cursor = node.walk();
750    for child in node.named_children(&mut cursor) {
751        if child.kind() == "compound_statement" {
752            return walk_block(child, source);
753        }
754    }
755    WalkResult::empty()
756}
757
758// ---------------------------------------------------------------------------
759// Helpers
760// ---------------------------------------------------------------------------
761
762/// True for node kinds that represent a single evaluable command.
763fn is_leaf_command(node: Node) -> bool {
764    matches!(
765        node.kind(),
766        "command" | "declaration_command" | "variable_assignment"
767    )
768}
769
770/// Return the effective end byte of `node`, excluding any `heredoc_body`
771/// descendant.  This trims heredoc body content from segment text so that only
772/// the command line (including the `<<DELIM` token) is included.
773fn effective_end(node: Node) -> usize {
774    let mut end = node.end_byte();
775    trim_at_heredoc_body(node, &mut end);
776    end
777}
778
779fn trim_at_heredoc_body(node: Node, end: &mut usize) {
780    let mut cursor = node.walk();
781    for child in node.children(&mut cursor) {
782        if child.kind() == "heredoc_body" {
783            *end = (*end).min(child.start_byte());
784            return;
785        }
786        trim_at_heredoc_body(child, end);
787    }
788}
789
790// ---------------------------------------------------------------------------
791// Public API
792// ---------------------------------------------------------------------------
793
794/// Parse a shell command string into a pipeline of segments and a list of
795/// extracted substitution contents.
796///
797/// # Returns
798///
799/// `(pipeline, substitutions)` where:
800///
801/// - `pipeline.segments` — one [`ShellSegment`] per evaluable command, with
802///   `__SUBST__` placeholders where substitutions were extracted.
803/// - `pipeline.operators` — the shell operators (`&&`, `||`, `;`, `|`, `|&`)
804///   between consecutive segments.
805/// - `substitutions` — the inner command text of each outermost `$()`,
806///   backtick, `<()`, or `>()` substitution, in source order.  The eval layer
807///   evaluates these recursively.
808///
809/// # Trivial case
810///
811/// When the command is a single simple statement with no substitutions and no
812/// control flow unwrapping, the original command text is returned as-is in a
813/// single segment.  This lets the eval layer's `evaluate_single` fast path
814/// work on the exact input text.
815pub fn parse_with_substitutions(command: &str) -> (ParsedPipeline, Vec<String>) {
816    let tree = parse_tree(command);
817    let root = tree.root_node();
818    let source = command.as_bytes();
819
820    let mut subst_spans = Vec::new();
821    collect_substitutions(root, source, &mut subst_spans);
822
823    let result = walk_ast(root, source);
824
825    // Trivial: one segment spanning the full input, no substitutions, no
826    // control flow unwrapping.  When the walker recurses into a for/if/while
827    // body the segment byte range will be a sub-range of the input, so this
828    // check correctly detects unwrapping.
829    let is_trivial = result.segments.len() <= 1
830        && subst_spans.is_empty()
831        && result
832            .segments
833            .first()
834            .is_none_or(|seg| seg.start == 0 && seg.end >= command.trim_end().len());
835
836    if is_trivial {
837        let redir = result
838            .segments
839            .first()
840            .and_then(|seg| seg.redirection.clone())
841            .or_else(|| detect_redirections(root, source));
842        return (
843            ParsedPipeline {
844                segments: vec![ShellSegment {
845                    command: command.trim().to_string(),
846                    redirection: redir,
847                }],
848                operators: vec![],
849            },
850            vec![],
851        );
852    }
853
854    let substitutions: Vec<String> = subst_spans.iter().map(|s| s.inner.clone()).collect();
855
856    let segments: Vec<ShellSegment> = result
857        .segments
858        .iter()
859        .map(|seg| {
860            let text = text_replacing_substitutions(command, seg.start, seg.end, &subst_spans);
861            ShellSegment {
862                command: text.trim().to_string(),
863                redirection: seg.redirection.clone(),
864            }
865        })
866        .filter(|s| !s.command.is_empty())
867        .collect();
868
869    (
870        ParsedPipeline {
871            segments,
872            operators: result.operators,
873        },
874        substitutions,
875    )
876}
877
878/// Check whether `command` contains output redirection that could mutate
879/// filesystem state.
880///
881/// Parses the command with tree-sitter-bash and inspects `file_redirect` nodes.
882/// See `check_file_redirect` for the full safe/flagged policy.
883pub fn has_output_redirection(command: &str) -> Option<Redirection> {
884    let tree = parse_tree(command);
885    detect_redirections(tree.root_node(), command.as_bytes())
886}
887
888/// Dump the tree-sitter AST and parsed pipeline for a command string.
889///
890/// Returns a human-readable diagnostic string showing the raw AST tree,
891/// the segments and operators produced by the walker, and any extracted
892/// substitutions. Used by `--dump-ast` CLI flag.
893pub fn dump_ast(command: &str) -> String {
894    use std::fmt::Write;
895    let mut out = String::new();
896
897    // Raw AST
898    let tree = parse_tree(command);
899    writeln!(out, "── tree-sitter AST ──").unwrap();
900    fn print_node(out: &mut String, node: tree_sitter::Node, source: &[u8], indent: usize) {
901        let text = node.utf8_text(source).unwrap_or("???");
902        let short: String = text.chars().take(60).collect();
903        let tag = if node.is_named() { "named" } else { "anon" };
904        writeln!(
905            out,
906            "{}{} [{}] {:?}",
907            "  ".repeat(indent),
908            node.kind(),
909            tag,
910            short
911        )
912        .unwrap();
913        let mut cursor = node.walk();
914        for child in node.children(&mut cursor) {
915            print_node(out, child, source, indent + 1);
916        }
917    }
918    print_node(&mut out, tree.root_node(), command.as_bytes(), 0);
919
920    // Parsed pipeline
921    let (pipeline, substitutions) = parse_with_substitutions(command);
922    writeln!(out, "\n── parsed pipeline ──").unwrap();
923    for (i, seg) in pipeline.segments.iter().enumerate() {
924        let redir = seg
925            .redirection
926            .as_ref()
927            .map(|r| format!(" [{}]", r.description))
928            .unwrap_or_default();
929        writeln!(out, "  segment {}: {:?}{}", i, seg.command, redir).unwrap();
930        if i < pipeline.operators.len() {
931            writeln!(out, "  operator: {}", pipeline.operators[i].as_str()).unwrap();
932        }
933    }
934    if !substitutions.is_empty() {
935        writeln!(out, "\n── substitutions ──").unwrap();
936        for (i, sub) in substitutions.iter().enumerate() {
937            writeln!(out, "  {}: {:?}", i, sub).unwrap();
938        }
939    }
940
941    // Redirection check
942    let redir = has_output_redirection(command);
943    writeln!(out, "\n── output redirection ──").unwrap();
944    match redir {
945        Some(r) => writeln!(out, "  {}", r.description).unwrap(),
946        None => writeln!(out, "  (none)").unwrap(),
947    }
948
949    out
950}
951
952#[cfg(test)]
953mod tests {
954    use super::*;
955
956    // --- Compound splitting ---
957
958    #[test]
959    fn simple_command() {
960        let (p, subs) = parse_with_substitutions("ls -la");
961        assert_eq!(p.segments.len(), 1);
962        assert_eq!(p.segments[0].command, "ls -la");
963        assert!(p.operators.is_empty());
964        assert!(subs.is_empty());
965    }
966
967    #[test]
968    fn pipe() {
969        let (p, _) = parse_with_substitutions("ls | grep foo");
970        assert_eq!(p.segments.len(), 2);
971        assert_eq!(p.segments[0].command, "ls");
972        assert_eq!(p.segments[1].command, "grep foo");
973        assert_eq!(p.operators, vec![Operator::Pipe]);
974    }
975
976    #[test]
977    fn and_then() {
978        let (p, _) = parse_with_substitutions("mkdir foo && cd foo");
979        assert_eq!(p.segments.len(), 2);
980        assert_eq!(p.segments[0].command, "mkdir foo");
981        assert_eq!(p.segments[1].command, "cd foo");
982        assert_eq!(p.operators, vec![Operator::And]);
983    }
984
985    #[test]
986    fn or_else() {
987        let (p, _) = parse_with_substitutions("test -f x || echo missing");
988        assert_eq!(p.segments.len(), 2);
989        assert_eq!(p.operators, vec![Operator::Or]);
990    }
991
992    #[test]
993    fn semicolon() {
994        let (p, _) = parse_with_substitutions("echo a; echo b");
995        assert_eq!(p.segments.len(), 2);
996        assert_eq!(p.segments[0].command, "echo a");
997        assert_eq!(p.segments[1].command, "echo b");
998    }
999
1000    #[test]
1001    fn triple_and() {
1002        let (p, _) = parse_with_substitutions("a && b && c");
1003        assert_eq!(p.segments.len(), 3);
1004        assert_eq!(p.operators, vec![Operator::And, Operator::And]);
1005    }
1006
1007    #[test]
1008    fn mixed_operators() {
1009        let (p, _) = parse_with_substitutions("a && b || c");
1010        assert_eq!(p.segments.len(), 3);
1011        assert_eq!(p.operators, vec![Operator::And, Operator::Or]);
1012    }
1013
1014    #[test]
1015    fn quoted_operator_not_split() {
1016        let (p, subs) = parse_with_substitutions(r#"echo "a && b""#);
1017        assert_eq!(p.segments.len(), 1);
1018        assert!(subs.is_empty());
1019    }
1020
1021    // --- Substitution extraction ---
1022
1023    #[test]
1024    fn dollar_paren_substitution() {
1025        let (p, subs) = parse_with_substitutions("echo $(date)");
1026        assert_eq!(subs, vec!["date"]);
1027        assert_eq!(p.segments[0].command, "echo __SUBST__");
1028    }
1029
1030    #[test]
1031    fn backtick_substitution() {
1032        let (p, subs) = parse_with_substitutions("echo `date`");
1033        assert_eq!(subs, vec!["date"]);
1034        assert_eq!(p.segments[0].command, "echo __SUBST__");
1035    }
1036
1037    #[test]
1038    fn single_quoted_not_substituted() {
1039        let (_, subs) = parse_with_substitutions("echo '$(date)'");
1040        assert!(subs.is_empty());
1041    }
1042
1043    #[test]
1044    fn double_quoted_is_substituted() {
1045        let (_, subs) = parse_with_substitutions(r#"echo "$(date)""#);
1046        assert_eq!(subs, vec!["date"]);
1047    }
1048
1049    #[test]
1050    fn process_substitution() {
1051        let (_, subs) = parse_with_substitutions("diff <(ls a) <(ls b)");
1052        assert_eq!(subs.len(), 2);
1053        assert_eq!(subs[0], "ls a");
1054        assert_eq!(subs[1], "ls b");
1055    }
1056
1057    // --- Redirection detection ---
1058
1059    #[test]
1060    fn redir_simple_gt() {
1061        assert!(has_output_redirection("echo hi > file").is_some());
1062    }
1063
1064    #[test]
1065    fn redir_append() {
1066        assert!(has_output_redirection("echo hi >> file").is_some());
1067    }
1068
1069    #[test]
1070    fn redir_ampersand_gt() {
1071        assert!(has_output_redirection("cmd &> file").is_some());
1072    }
1073
1074    #[test]
1075    fn no_redir_devnull() {
1076        assert!(has_output_redirection("cmd > /dev/null").is_none());
1077    }
1078
1079    #[test]
1080    fn no_redir_devnull_stderr() {
1081        assert!(has_output_redirection("cmd 2>/dev/null").is_none());
1082    }
1083
1084    #[test]
1085    fn no_redir_devnull_append() {
1086        assert!(has_output_redirection("cmd >> /dev/null").is_none());
1087    }
1088
1089    #[test]
1090    fn no_redir_devnull_ampersand() {
1091        assert!(has_output_redirection("cmd &>/dev/null").is_none());
1092    }
1093
1094    #[test]
1095    fn no_redir_fd_dup_stderr_to_stdout() {
1096        assert!(has_output_redirection("cmd 2>&1").is_none());
1097    }
1098
1099    #[test]
1100    fn no_redir_fd_dup_stdout_to_stderr() {
1101        assert!(has_output_redirection("cmd >&2").is_none());
1102    }
1103
1104    #[test]
1105    fn no_redir_fd_close() {
1106        assert!(has_output_redirection("cmd >&-").is_none());
1107    }
1108
1109    #[test]
1110    fn redir_custom_fd_target() {
1111        let r = has_output_redirection("cmd >&3");
1112        assert!(r.is_some());
1113        assert!(r.unwrap().description.contains("custom fd target"));
1114    }
1115
1116    #[test]
1117    fn no_redir_quoted() {
1118        assert!(has_output_redirection(r#"echo ">""#).is_none());
1119    }
1120
1121    #[test]
1122    fn no_redir_process_subst() {
1123        assert!(has_output_redirection("diff <(ls) >(cat)").is_none());
1124    }
1125
1126    #[test]
1127    fn redir_clobber() {
1128        let r = has_output_redirection("echo hi >| file.txt");
1129        assert!(
1130            r.is_some(),
1131            "expected >| to be flagged as output redirection"
1132        );
1133        assert!(r.unwrap().description.contains(">|"));
1134    }
1135
1136    #[test]
1137    fn redir_clobber_devnull() {
1138        assert!(has_output_redirection("echo hi >| /dev/null").is_none());
1139    }
1140
1141    #[test]
1142    fn redir_read_write_detected() {
1143        // tree-sitter-bash parses `<>` as `<` + ERROR(`>`). We detect the
1144        // ERROR child and flag it as output redirection.
1145        let r = has_output_redirection("cat <> file.txt");
1146        assert!(
1147            r.is_some(),
1148            "expected <> to be flagged as output redirection"
1149        );
1150        assert!(r.unwrap().description.contains("<>"));
1151    }
1152
1153    // --- Control flow ---
1154
1155    #[test]
1156    fn for_loop_extracts_body() {
1157        let (p, _) = parse_with_substitutions("for i in *; do echo \"$i\"; done");
1158        assert!(p.segments.iter().all(|s| !s.command.starts_with("for")));
1159        assert!(p.segments.iter().any(|s| s.command.contains("echo")));
1160    }
1161
1162    #[test]
1163    fn if_statement_extracts_body() {
1164        let (p, _) = parse_with_substitutions("if test -f x; then echo yes; fi");
1165        assert!(p.segments.iter().all(|s| !s.command.starts_with("if")));
1166        assert!(p.segments.iter().any(|s| s.command.contains("test")));
1167        assert!(p.segments.iter().any(|s| s.command.contains("echo")));
1168    }
1169
1170    #[test]
1171    fn while_loop_extracts_body() {
1172        let (p, _) = parse_with_substitutions("while true; do sleep 1; done");
1173        assert!(p.segments.iter().all(|s| !s.command.starts_with("while")));
1174        assert!(p.segments.iter().any(|s| s.command.contains("true")));
1175        assert!(p.segments.iter().any(|s| s.command.contains("sleep")));
1176    }
1177
1178    #[test]
1179    fn case_pattern_not_treated_as_command() {
1180        let (p, _) =
1181            parse_with_substitutions(r#"case $x in rm) echo hi ;; kubectl) echo bye ;; esac"#);
1182        let commands: Vec<&str> = p.segments.iter().map(|s| s.command.as_str()).collect();
1183        // Pattern labels (rm, kubectl) must NOT appear as segments.
1184        // Only the body commands (echo hi, echo bye) should.
1185        assert!(
1186            !p.segments.iter().any(|s| s.command.trim() == "rm"),
1187            "case pattern 'rm' leaked as segment: {commands:?}",
1188        );
1189        assert!(
1190            !p.segments.iter().any(|s| s.command.trim() == "kubectl"),
1191            "case pattern 'kubectl' leaked as segment: {commands:?}",
1192        );
1193        assert!(
1194            p.segments.iter().any(|s| s.command.contains("echo hi")),
1195            "expected 'echo hi' body: {commands:?}",
1196        );
1197        assert!(
1198            p.segments.iter().any(|s| s.command.contains("echo bye")),
1199            "expected 'echo bye' body: {commands:?}",
1200        );
1201    }
1202
1203    #[test]
1204    fn compound_heredoc_pipe_unwraps_body() {
1205        // When a compound command (while/for/if) is the body of a
1206        // redirected_statement with a heredoc pipe, the body must be
1207        // recursively walked so inner commands are extracted — not
1208        // flattened as "while ..." text.
1209        let cmd = "while true; do shred /dev/sda; done <<EOF | cat\nstuff\nEOF";
1210        let (p, _) = parse_with_substitutions(cmd);
1211        let commands: Vec<&str> = p.segments.iter().map(|s| s.command.as_str()).collect();
1212        // The while-loop body should be unwrapped to "shred /dev/sda",
1213        // not left as "while true; do shred /dev/sda; done".
1214        assert!(
1215            !p.segments.iter().any(|s| s.command.starts_with("while")),
1216            "while-loop was not unwrapped in heredoc pipe path: {commands:?}",
1217        );
1218        assert!(
1219            p.segments.iter().any(|s| s.command.contains("shred")),
1220            "expected 'shred' to be extracted from loop body: {commands:?}",
1221        );
1222        assert!(
1223            p.segments.iter().any(|s| s.command.trim() == "cat"),
1224            "expected piped 'cat' segment: {commands:?}",
1225        );
1226    }
1227
1228    // --- Redirection propagation (list vs. control-flow) ---
1229
1230    #[test]
1231    fn redirect_list_only_last_segment_gets_redir() {
1232        // `export FOO=bar && cat > /tmp/file` — only the last segment (cat)
1233        // should carry the redirection; the export segment must not.
1234        let (p, _) = parse_with_substitutions("export FOO=bar && cat > /tmp/file");
1235        assert_eq!(p.segments.len(), 2, "expected 2 segments: {:?}", p.segments);
1236        assert!(
1237            p.segments[0].redirection.is_none(),
1238            "export segment must NOT carry redirection: {:?}",
1239            p.segments[0],
1240        );
1241        assert!(
1242            p.segments[1].redirection.is_some(),
1243            "cat segment must carry redirection: {:?}",
1244            p.segments[1],
1245        );
1246    }
1247
1248    #[test]
1249    fn redirect_for_loop_all_segments_get_redir() {
1250        // `for i in *; do echo $i; done > /tmp/out` — the loop body is a
1251        // control-flow construct, so all inner segments get the redirect.
1252        let (p, _) = parse_with_substitutions("for i in *; do echo $i; done > /tmp/out");
1253        assert!(
1254            !p.segments.is_empty(),
1255            "expected at least one segment from loop body"
1256        );
1257        assert!(
1258            p.segments.iter().all(|s| s.redirection.is_some()),
1259            "all loop-body segments must carry the redirection: {:?}",
1260            p.segments,
1261        );
1262    }
1263
1264    #[test]
1265    fn redirect_list_three_segments_only_last_gets_redir() {
1266        // `a && b && c > file` — 3 segments, only the last should have a redirect.
1267        let (p, _) = parse_with_substitutions("a && b && c > file");
1268        assert_eq!(p.segments.len(), 3, "expected 3 segments: {:?}", p.segments);
1269        assert!(
1270            p.segments[0].redirection.is_none(),
1271            "segment 0 must NOT carry redirection: {:?}",
1272            p.segments[0],
1273        );
1274        assert!(
1275            p.segments[1].redirection.is_none(),
1276            "segment 1 must NOT carry redirection: {:?}",
1277            p.segments[1],
1278        );
1279        assert!(
1280            p.segments[2].redirection.is_some(),
1281            "segment 2 must carry redirection: {:?}",
1282            p.segments[2],
1283        );
1284    }
1285
1286    #[test]
1287    fn redirect_list_original_bug_scenario() {
1288        // Original bug: `export FOO=bar && REPO_ID=$(echo test) && cat > /tmp/file`
1289        // produced 3 segments where export and assignment both had redirection,
1290        // causing them to be incorrectly escalated to Ask.
1291        // Only the last segment (cat) must carry the redirect.
1292        let (p, _) =
1293            parse_with_substitutions("export FOO=bar && REPO_ID=$(echo test) && cat > /tmp/file");
1294        assert_eq!(p.segments.len(), 3, "expected 3 segments: {:?}", p.segments);
1295        assert!(
1296            p.segments[0].redirection.is_none(),
1297            "export segment must NOT carry redirection: {:?}",
1298            p.segments[0],
1299        );
1300        assert!(
1301            p.segments[1].redirection.is_none(),
1302            "assignment segment must NOT carry redirection: {:?}",
1303            p.segments[1],
1304        );
1305        assert!(
1306            p.segments[2].redirection.is_some(),
1307            "cat segment must carry redirection: {:?}",
1308            p.segments[2],
1309        );
1310    }
1311
1312    #[test]
1313    fn redirect_or_list_only_last_segment_gets_redir() {
1314        // `a || b > file` — || chains are also `list` nodes; only last gets redirect.
1315        let (p, _) = parse_with_substitutions("a || b > file");
1316        assert_eq!(p.segments.len(), 2, "expected 2 segments: {:?}", p.segments);
1317        assert!(
1318            p.segments[0].redirection.is_none(),
1319            "first segment must NOT carry redirection: {:?}",
1320            p.segments[0],
1321        );
1322        assert!(
1323            p.segments[1].redirection.is_some(),
1324            "last segment must carry redirection: {:?}",
1325            p.segments[1],
1326        );
1327    }
1328
1329    #[test]
1330    fn redirect_mixed_operators_only_last_gets_redir() {
1331        // `a && b || c > file` — mixed &&/|| chain, only last gets redirect.
1332        let (p, _) = parse_with_substitutions("a && b || c > file");
1333        assert_eq!(p.segments.len(), 3, "expected 3 segments: {:?}", p.segments);
1334        assert!(
1335            p.segments[0].redirection.is_none(),
1336            "segment 0 must NOT carry redirection: {:?}",
1337            p.segments[0],
1338        );
1339        assert!(
1340            p.segments[1].redirection.is_none(),
1341            "segment 1 must NOT carry redirection: {:?}",
1342            p.segments[1],
1343        );
1344        assert!(
1345            p.segments[2].redirection.is_some(),
1346            "last segment must carry redirection: {:?}",
1347            p.segments[2],
1348        );
1349    }
1350
1351    #[test]
1352    fn redirect_pipeline_only_last_segment_gets_redir() {
1353        // `echo hello | cat > /tmp/file` — only the last pipeline stage (cat)
1354        // should carry the redirection; earlier stages' stdout goes to the pipe.
1355        let (p, _) = parse_with_substitutions("echo hello | cat > /tmp/file");
1356        assert_eq!(p.segments.len(), 2, "expected 2 segments: {:?}", p.segments);
1357        assert!(
1358            p.segments[0].redirection.is_none(),
1359            "first pipeline stage must NOT carry redirection: {:?}",
1360            p.segments[0],
1361        );
1362        assert!(
1363            p.segments[1].redirection.is_some(),
1364            "last pipeline stage must carry redirection: {:?}",
1365            p.segments[1],
1366        );
1367    }
1368
1369    #[test]
1370    fn redirect_pipeline_three_stages_only_last_gets_redir() {
1371        // `a | b | c > file` — 3 pipeline stages, only last gets redirect.
1372        let (p, _) = parse_with_substitutions("a | b | c > file");
1373        assert_eq!(p.segments.len(), 3, "expected 3 segments: {:?}", p.segments);
1374        assert!(
1375            p.segments[0].redirection.is_none(),
1376            "segment 0 must NOT carry redirection: {:?}",
1377            p.segments[0],
1378        );
1379        assert!(
1380            p.segments[1].redirection.is_none(),
1381            "segment 1 must NOT carry redirection: {:?}",
1382            p.segments[1],
1383        );
1384        assert!(
1385            p.segments[2].redirection.is_some(),
1386            "last segment must carry redirection: {:?}",
1387            p.segments[2],
1388        );
1389    }
1390
1391    #[test]
1392    fn redirect_pipeline_stderr_only_last_gets_redir() {
1393        // `a |& b > file` — |& produces the same `pipeline` node kind as |.
1394        let (p, _) = parse_with_substitutions("a |& b > file");
1395        assert_eq!(p.segments.len(), 2, "expected 2 segments: {:?}", p.segments);
1396        assert!(
1397            p.segments[0].redirection.is_none(),
1398            "first stage must NOT carry redirection: {:?}",
1399            p.segments[0],
1400        );
1401        assert!(
1402            p.segments[1].redirection.is_some(),
1403            "last stage must carry redirection: {:?}",
1404            p.segments[1],
1405        );
1406    }
1407
1408    #[test]
1409    fn redirect_compound_statement_all_segments_get_redir() {
1410        // `{ a && b; } > /tmp/out` — compound_statement wrapping a list; the
1411        // redirect applies to the entire group, so ALL segments must carry it.
1412        let (p, _) = parse_with_substitutions("{ a && b; } > /tmp/out");
1413        assert!(
1414            !p.segments.is_empty(),
1415            "expected at least one segment from compound body"
1416        );
1417        assert!(
1418            p.segments.iter().all(|s| s.redirection.is_some()),
1419            "all segments in grouped command must carry redirect: {:?}",
1420            p.segments,
1421        );
1422    }
1423}