Skip to main content

agent_shell_parser/parse/
shell.rs

1//! Shell command parsing backed by tree-sitter-bash.
2//!
3//! Public API:
4//!
5//! - [`parse_with_substitutions`] — decomposes a shell command into a
6//!   recursive [`ParsedPipeline`] tree.
7//! - [`has_output_redirection`] — mutation-detection for redirects.
8//! - [`dump_ast`] — diagnostic output.
9//!
10//! The parser uses tree-sitter-bash for a full AST, then walks it to
11//! produce segments joined by operators. Substitutions (`$()`, backticks,
12//! `<()`, `>()`) are recursively parsed into nested pipelines — the
13//! result is a tree that can be evaluated bottom-up (catamorphism).
14//!
15//! # Control flow handling
16//!
17//! Shell keywords (`for`, `if`, `while`, `case`) are grammar structure,
18//! not commands. The walker recurses into their bodies and extracts the
19//! actual commands as segments.
20//!
21//! # Redirection propagation
22//!
23//! When a control flow construct has output redirection
24//! (e.g. `for ... done > file`), it propagates to inner segments via
25//! [`ShellSegment::redirection`].
26
27use super::redirect::detect_redirections;
28use super::subst::{assign_substitutions, build_segments, collect_substitutions};
29use super::types::{ParseError, ParsedPipeline, ShellSegment};
30use super::walk::walk_ast;
31use std::cell::{Cell, RefCell};
32use tree_sitter::{Parser, Tree};
33
34/// Maximum number of tree-sitter parse calls across all recursion levels.
35/// Prevents exponential fan-out DoS (e.g. `echo $(a) $(b) $(c) ...` nested).
36const MAX_TOTAL_PARSES: usize = 512;
37
38/// Maximum input length accepted by the parser (64 KiB).
39const MAX_INPUT_LENGTH: usize = 64 * 1024;
40
41// ---------------------------------------------------------------------------
42// Thread-local parser
43// ---------------------------------------------------------------------------
44
45thread_local! {
46    /// tree-sitter `Parser` is `!Send`, so we use `thread_local!` storage.
47    ///
48    /// # Async safety
49    ///
50    /// The `RefCell` borrow is acquired and released within the synchronous
51    /// `parse_tree()` call — it never crosses an `.await` point. Each
52    /// thread in an async runtime pool gets its own parser instance.
53    /// `parse_tree()` must remain synchronous.
54    static TS_PARSER: RefCell<Parser> = RefCell::new({
55        let mut p = Parser::new();
56        p.set_language(&tree_sitter_bash::LANGUAGE.into())
57            .expect("failed to load bash grammar");
58        p
59    });
60}
61
62fn parse_tree(source: &str, budget: &Cell<usize>) -> Result<Tree, ParseError> {
63    let count = budget.get();
64    if count >= MAX_TOTAL_PARSES {
65        return Err(ParseError);
66    }
67    budget.set(count + 1);
68    TS_PARSER.with(|p| p.borrow_mut().parse(source, None).ok_or(ParseError))
69}
70
71// ---------------------------------------------------------------------------
72// Public API
73// ---------------------------------------------------------------------------
74
75/// Parse a shell command into a recursive pipeline tree.
76///
77/// Substitutions are recursively parsed: `echo $(cmd1 && cmd2)` produces
78/// a segment whose substitution contains a two-segment pipeline. The tree
79/// can be evaluated bottom-up — inner substitutions execute first.
80///
81/// Recursion depth is capped at 32 levels. Deeper nesting produces an
82/// empty pipeline with `has_parse_errors: true`.
83pub fn parse_with_substitutions(command: &str) -> Result<ParsedPipeline, ParseError> {
84    if command.len() > MAX_INPUT_LENGTH {
85        return Ok(ParsedPipeline::empty_with_error());
86    }
87    let budget = Cell::new(0);
88    parse_with_substitutions_impl(command, 0, &budget)
89}
90
91fn parse_with_substitutions_impl(
92    command: &str,
93    depth: usize,
94    budget: &Cell<usize>,
95) -> Result<ParsedPipeline, ParseError> {
96    let tree = parse_tree(command, budget)?;
97    let root = tree.root_node();
98    let source = command.as_bytes();
99    let has_parse_errors = root.has_error();
100
101    let mut raw_substs = Vec::new();
102    collect_substitutions(root, source, &mut raw_substs);
103
104    let walk = walk_ast(root, source);
105
106    let trimmed = command.trim();
107    let is_trivial = walk.segments.len() <= 1
108        && raw_substs.is_empty()
109        && walk
110            .segments
111            .first()
112            .is_none_or(|seg| seg.start == 0 && seg.end >= trimmed.len());
113
114    if is_trivial {
115        let redir = walk
116            .segments
117            .first()
118            .and_then(|seg| seg.redirection.clone())
119            .or_else(|| detect_redirections(root, source));
120        return Ok(ParsedPipeline {
121            segments: vec![ShellSegment {
122                command: trimmed.to_string(),
123                redirection: redir,
124                substitutions: vec![],
125            }],
126            operators: vec![],
127            structural_substitutions: vec![],
128            has_parse_errors,
129        });
130    }
131
132    let built = build_segments(&walk, command);
133    let (per_segment_subs, structural_subs) =
134        assign_substitutions(&raw_substs, &built, depth, &|inner, d| {
135            parse_with_substitutions_impl(inner, d, budget)
136        });
137
138    let segments: Vec<ShellSegment> = built
139        .into_iter()
140        .zip(per_segment_subs)
141        .map(|(b, subs)| ShellSegment {
142            command: b.command,
143            redirection: b.redirection,
144            substitutions: subs,
145        })
146        .collect();
147
148    Ok(ParsedPipeline {
149        segments,
150        operators: walk.operators,
151        structural_substitutions: structural_subs,
152        has_parse_errors,
153    })
154}
155
156/// Check whether `command` contains output redirection.
157pub fn has_output_redirection(
158    command: &str,
159) -> Result<Option<super::types::Redirection>, ParseError> {
160    let budget = Cell::new(0);
161    let tree = parse_tree(command, &budget)?;
162    Ok(detect_redirections(tree.root_node(), command.as_bytes()))
163}
164
165/// Diagnostic: dump the tree-sitter AST and parsed pipeline.
166///
167/// Sections 1 (AST dump) and 3 (redirection check) share a single
168/// parse tree. Section 2 (pipeline decomposition) calls
169/// [`parse_with_substitutions`] separately — it builds the recursive
170/// pipeline structure from scratch.
171pub fn dump_ast(command: &str) -> Result<String, ParseError> {
172    use std::fmt::Write;
173    let mut out = String::new();
174
175    let budget = Cell::new(0);
176    let tree = parse_tree(command, &budget)?;
177    let root = tree.root_node();
178    let source = command.as_bytes();
179
180    // Section 1: raw AST
181    writeln!(out, "── tree-sitter AST ──").unwrap();
182    fn print_node(out: &mut String, node: tree_sitter::Node, source: &[u8], indent: usize) {
183        let text = node.utf8_text(source).unwrap_or("???");
184        let short: String = text.chars().take(60).collect();
185        let tag = if node.is_named() { "named" } else { "anon" };
186        writeln!(
187            out,
188            "{}{} [{}] {:?}",
189            "  ".repeat(indent),
190            node.kind(),
191            tag,
192            short
193        )
194        .unwrap();
195        let mut cursor = node.walk();
196        for child in node.children(&mut cursor) {
197            print_node(out, child, source, indent + 1);
198        }
199    }
200    print_node(&mut out, root, source, 0);
201
202    // Section 2: parsed pipeline (reuses the public API — separate parse is
203    // unavoidable here since parse_with_substitutions_impl builds from scratch,
204    // but this is a diagnostic function so the cost is acceptable)
205    let pipeline = parse_with_substitutions(command)?;
206    writeln!(out, "\n── parsed pipeline ──").unwrap();
207    if pipeline.has_parse_errors {
208        writeln!(out, "  (parse errors detected — best-effort result)").unwrap();
209    }
210    fn print_pipeline(out: &mut String, p: &ParsedPipeline, indent: usize) {
211        let pad = "  ".repeat(indent);
212        for sub in &p.structural_substitutions {
213            writeln!(
214                out,
215                "{pad}structural subst bytes {}..{}:",
216                sub.start, sub.end
217            )
218            .unwrap();
219            print_pipeline(out, &sub.pipeline, indent + 1);
220        }
221        for (i, seg) in p.segments.iter().enumerate() {
222            let redir = seg
223                .redirection
224                .as_ref()
225                .map(|r| format!(" [{r}]"))
226                .unwrap_or_default();
227            writeln!(out, "{pad}segment {i}: {:?}{redir}", seg.command).unwrap();
228            for sub in &seg.substitutions {
229                writeln!(out, "{pad}  subst bytes {}..{}:", sub.start, sub.end).unwrap();
230                print_pipeline(out, &sub.pipeline, indent + 2);
231            }
232            if i < p.operators.len() {
233                writeln!(out, "{pad}operator: {}", p.operators[i]).unwrap();
234            }
235        }
236    }
237    print_pipeline(&mut out, &pipeline, 1);
238
239    // Section 3: redirection check (reuses the tree from section 1)
240    let redir = detect_redirections(root, source);
241    writeln!(out, "\n── output redirection ──").unwrap();
242    match redir {
243        Some(r) => writeln!(out, "  {r}").unwrap(),
244        None => writeln!(out, "  (none)").unwrap(),
245    }
246
247    Ok(out)
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    fn parse(cmd: &str) -> ParsedPipeline {
255        parse_with_substitutions(cmd).expect("parse failed")
256    }
257
258    // --- Compound splitting ---
259
260    #[test]
261    fn simple_command() {
262        let p = parse("ls -la");
263        assert_eq!(p.segments.len(), 1);
264        assert_eq!(p.segments[0].command, "ls -la");
265        assert!(p.operators.is_empty());
266        assert!(p.segments[0].substitutions.is_empty());
267        assert!(p.structural_substitutions.is_empty());
268    }
269
270    #[test]
271    fn pipe() {
272        let p = parse("ls | grep foo");
273        assert_eq!(p.segments.len(), 2);
274        assert_eq!(p.segments[0].command, "ls");
275        assert_eq!(p.segments[1].command, "grep foo");
276        assert_eq!(p.operators, vec![super::super::types::Operator::Pipe]);
277    }
278
279    #[test]
280    fn and_then() {
281        let p = parse("mkdir foo && cd foo");
282        assert_eq!(p.segments.len(), 2);
283        assert_eq!(p.operators, vec![super::super::types::Operator::And]);
284    }
285
286    #[test]
287    fn or_else() {
288        let p = parse("test -f x || echo missing");
289        assert_eq!(p.segments.len(), 2);
290        assert_eq!(p.operators, vec![super::super::types::Operator::Or]);
291    }
292
293    #[test]
294    fn semicolon() {
295        let p = parse("echo a; echo b");
296        assert_eq!(p.segments.len(), 2);
297        assert_eq!(p.segments[0].command, "echo a");
298        assert_eq!(p.segments[1].command, "echo b");
299    }
300
301    #[test]
302    fn triple_and() {
303        let p = parse("a && b && c");
304        assert_eq!(p.segments.len(), 3);
305        assert_eq!(
306            p.operators,
307            vec![
308                super::super::types::Operator::And,
309                super::super::types::Operator::And
310            ]
311        );
312    }
313
314    #[test]
315    fn mixed_operators() {
316        let p = parse("a && b || c");
317        assert_eq!(p.segments.len(), 3);
318        assert_eq!(
319            p.operators,
320            vec![
321                super::super::types::Operator::And,
322                super::super::types::Operator::Or
323            ]
324        );
325    }
326
327    #[test]
328    fn quoted_operator_not_split() {
329        let p = parse(r#"echo "a && b""#);
330        assert_eq!(p.segments.len(), 1);
331    }
332
333    // --- Substitutions (recursive) ---
334
335    #[test]
336    fn dollar_paren_substitution() {
337        let p = parse("echo $(date)");
338        assert_eq!(p.segments[0].command, "echo $(date)");
339        assert_eq!(p.segments[0].substitutions.len(), 1);
340        let sub = &p.segments[0].substitutions[0];
341        assert_eq!(sub.pipeline.segments.len(), 1);
342        assert_eq!(sub.pipeline.segments[0].command, "date");
343    }
344
345    #[test]
346    fn backtick_substitution() {
347        let p = parse("echo `date`");
348        assert_eq!(p.segments[0].command, "echo `date`");
349        assert_eq!(p.segments[0].substitutions.len(), 1);
350        assert_eq!(
351            p.segments[0].substitutions[0].pipeline.segments[0].command,
352            "date"
353        );
354    }
355
356    #[test]
357    fn single_quoted_not_substituted() {
358        let p = parse("echo '$(date)'");
359        assert!(p.segments[0].substitutions.is_empty());
360    }
361
362    #[test]
363    fn double_quoted_is_substituted() {
364        let p = parse(r#"echo "$(date)""#);
365        assert_eq!(p.segments[0].substitutions.len(), 1);
366    }
367
368    #[test]
369    fn process_substitution() {
370        let p = parse("diff <(ls a) <(ls b)");
371        assert_eq!(p.segments[0].substitutions.len(), 2);
372        assert_eq!(
373            p.segments[0].substitutions[0].pipeline.segments[0].command,
374            "ls a"
375        );
376        assert_eq!(
377            p.segments[0].substitutions[1].pipeline.segments[0].command,
378            "ls b"
379        );
380    }
381
382    #[test]
383    fn nested_substitution() {
384        let p = parse("echo $(cat $(find . -name foo))");
385        assert_eq!(p.segments[0].substitutions.len(), 1);
386        let outer = &p.segments[0].substitutions[0].pipeline;
387        assert_eq!(outer.segments[0].substitutions.len(), 1);
388        let inner = &outer.segments[0].substitutions[0].pipeline;
389        assert_eq!(inner.segments[0].command, "find . -name foo");
390    }
391
392    #[test]
393    fn substitution_byte_positions() {
394        let p = parse("echo $(date)");
395        let sub = &p.segments[0].substitutions[0];
396        // "echo $(date)" — $(date) starts at byte 5, ends at 12
397        assert_eq!(sub.start, 5);
398        assert_eq!(sub.end, 12);
399        assert_eq!(&p.segments[0].command[sub.start..sub.end], "$(date)");
400    }
401
402    #[test]
403    fn substitution_in_second_segment() {
404        let p = parse("echo hi && echo $(date)");
405        assert!(p.segments[0].substitutions.is_empty());
406        assert_eq!(p.segments[1].substitutions.len(), 1);
407        let sub = &p.segments[1].substitutions[0];
408        assert_eq!(&p.segments[1].command[sub.start..sub.end], "$(date)");
409    }
410
411    #[test]
412    fn compound_substitution_content() {
413        let p = parse("echo $(cmd1 && cmd2)");
414        let inner = &p.segments[0].substitutions[0].pipeline;
415        assert_eq!(inner.segments.len(), 2);
416        assert_eq!(inner.operators, vec![super::super::types::Operator::And]);
417    }
418
419    // --- Structural (orphan) substitutions ---
420
421    #[test]
422    fn structural_substitution_in_for_loop() {
423        let p = parse("for i in $(seq 10); do echo $i; done");
424        assert_eq!(p.structural_substitutions.len(), 1);
425        assert_eq!(
426            p.structural_substitutions[0].pipeline.segments[0].command,
427            "seq 10"
428        );
429    }
430
431    #[test]
432    fn structural_substitution_in_case_subject() {
433        let p = parse("case $(git status) in clean) echo ok ;; esac");
434        assert_eq!(p.structural_substitutions.len(), 1);
435        assert_eq!(
436            p.structural_substitutions[0].pipeline.segments[0].command,
437            "git status"
438        );
439    }
440
441    // --- Control flow ---
442
443    #[test]
444    fn for_loop_extracts_body() {
445        let p = parse("for i in *; do echo \"$i\"; done");
446        assert!(p.segments.iter().all(|s| !s.command.starts_with("for")));
447        assert!(p.segments.iter().any(|s| s.command.contains("echo")));
448    }
449
450    #[test]
451    fn if_statement_extracts_body() {
452        let p = parse("if test -f x; then echo yes; fi");
453        assert!(p.segments.iter().any(|s| s.command.contains("test")));
454        assert!(p.segments.iter().any(|s| s.command.contains("echo")));
455    }
456
457    #[test]
458    fn while_loop_extracts_body() {
459        let p = parse("while true; do sleep 1; done");
460        assert!(p.segments.iter().any(|s| s.command.contains("true")));
461        assert!(p.segments.iter().any(|s| s.command.contains("sleep")));
462    }
463
464    #[test]
465    fn case_pattern_not_treated_as_command() {
466        let p = parse(r#"case $x in rm) echo hi ;; kubectl) echo bye ;; esac"#);
467        assert!(!p.segments.iter().any(|s| s.command.trim() == "rm"));
468        assert!(p.segments.iter().any(|s| s.command.contains("echo hi")));
469    }
470
471    #[test]
472    fn if_test_command_extracted() {
473        let p = parse("if [[ -f foo ]]; then git commit; fi");
474        assert!(p.segments.iter().any(|s| s.command.contains("[[")));
475        assert!(p.segments.iter().any(|s| s.command.contains("git commit")));
476    }
477
478    #[test]
479    fn if_test_command_substitution_has_segment() {
480        let p = parse(r#"if [[ $(git status) == "clean" ]]; then echo ok; fi"#);
481        let test_seg = p
482            .segments
483            .iter()
484            .find(|s| s.command.contains("[["))
485            .unwrap();
486        assert_eq!(test_seg.substitutions.len(), 1);
487        assert_eq!(
488            test_seg.substitutions[0].pipeline.segments[0].command,
489            "git status"
490        );
491    }
492
493    #[test]
494    fn compound_heredoc_pipe_unwraps_body() {
495        let cmd = "while true; do shred /dev/sda; done <<EOF | cat\nstuff\nEOF";
496        let p = parse(cmd);
497        assert!(!p.segments.iter().any(|s| s.command.starts_with("while")));
498        assert!(p.segments.iter().any(|s| s.command.contains("shred")));
499        assert!(p.segments.iter().any(|s| s.command.trim() == "cat"));
500    }
501
502    // --- Background operator ---
503
504    #[test]
505    fn background_operator() {
506        let p = parse("sleep 10 & git commit -m test");
507        assert_eq!(p.segments.len(), 2);
508        assert_eq!(p.segments[0].command, "sleep 10");
509        assert_eq!(p.segments[1].command, "git commit -m test");
510        assert_eq!(p.operators, vec![super::super::types::Operator::Background]);
511    }
512
513    // --- Redirection detection ---
514
515    #[test]
516    fn redir_simple_gt() {
517        assert!(has_output_redirection("echo hi > file").unwrap().is_some());
518    }
519
520    #[test]
521    fn redir_append() {
522        assert!(has_output_redirection("echo hi >> file").unwrap().is_some());
523    }
524
525    #[test]
526    fn no_redir_devnull() {
527        assert!(has_output_redirection("cmd > /dev/null").unwrap().is_none());
528    }
529
530    #[test]
531    fn no_redir_fd_dup() {
532        assert!(has_output_redirection("cmd 2>&1").unwrap().is_none());
533    }
534
535    #[test]
536    fn no_redir_fd_close() {
537        assert!(has_output_redirection("cmd >&-").unwrap().is_none());
538    }
539
540    #[test]
541    fn redir_custom_fd_target() {
542        let r = has_output_redirection("cmd >&3").unwrap().unwrap();
543        assert_eq!(r.operator, ">&");
544        assert_eq!(r.target, "3");
545    }
546
547    #[test]
548    fn redir_clobber() {
549        assert!(has_output_redirection("echo hi >| file.txt")
550            .unwrap()
551            .is_some());
552    }
553
554    #[test]
555    fn redir_read_write() {
556        let r = has_output_redirection("cat <> file.txt").unwrap();
557        assert!(r.is_some());
558    }
559
560    // --- Redirection propagation ---
561
562    #[test]
563    fn redirect_list_only_last_segment() {
564        let p = parse("export FOO=bar && cat > /tmp/file");
565        assert!(p.segments[0].redirection.is_none());
566        assert!(p.segments[1].redirection.is_some());
567    }
568
569    #[test]
570    fn redirect_for_loop_all_segments() {
571        let p = parse("for i in *; do echo $i; done > /tmp/out");
572        assert!(p.segments.iter().all(|s| s.redirection.is_some()));
573    }
574
575    #[test]
576    fn redirect_pipeline_only_last() {
577        let p = parse("echo hello | cat > /tmp/file");
578        assert!(p.segments[0].redirection.is_none());
579        assert!(p.segments[1].redirection.is_some());
580    }
581
582    // --- has_parse_errors ---
583
584    #[test]
585    fn well_formed_no_errors() {
586        assert!(!parse("echo hello").has_parse_errors);
587    }
588
589    // --- Recursion depth limit ---
590
591    #[test]
592    fn deeply_nested_substitutions_capped() {
593        let mut cmd = "echo x".to_string();
594        for _ in 0..40 {
595            cmd = format!("echo $({cmd})");
596        }
597        let p = parse(&cmd);
598        // Should not stack overflow. Inner pipelines beyond depth 32 have
599        // has_parse_errors: true and empty segments.
600        assert_eq!(p.segments.len(), 1);
601        assert!(p.has_parse_errors_recursive());
602
603        // Walk into substitution chain to verify depth cap
604        let mut current = &p;
605        for _ in 0..33 {
606            let sub = &current.segments[0].substitutions[0];
607            current = &sub.pipeline;
608        }
609        // At depth 33 (past the cap of 32), should have parse errors
610        assert!(current.has_parse_errors);
611        assert!(current.segments.is_empty());
612    }
613
614    // --- Background operator ---
615
616    #[test]
617    fn background_and_disown() {
618        let p = parse("waybar & disown");
619        assert_eq!(p.segments.len(), 2);
620        assert_eq!(p.segments[0].command, "waybar");
621        assert_eq!(p.segments[1].command, "disown");
622        assert_eq!(p.operators, vec![super::super::types::Operator::Background]);
623    }
624
625    // --- Structural substitution byte offsets ---
626
627    #[test]
628    fn structural_substitution_byte_offsets() {
629        // "for i in $(seq 10); do echo $i; done"
630        //           ^        ^
631        //           10       20
632        let cmd = "for i in $(seq 10); do echo $i; done";
633        let p = parse(cmd);
634        assert_eq!(p.structural_substitutions.len(), 1);
635        let sub = &p.structural_substitutions[0];
636        assert_eq!(&cmd[sub.start..sub.end], "$(seq 10)");
637    }
638
639    // --- Redirect edge cases ---
640
641    #[test]
642    fn no_redir_fd_close_input() {
643        assert!(has_output_redirection("cmd <&-").unwrap().is_none());
644    }
645
646    #[test]
647    fn no_redir_fd_close_2() {
648        assert!(has_output_redirection("cmd 2>&-").unwrap().is_none());
649    }
650
651    // --- Additional AST node coverage ---
652
653    #[test]
654    fn until_loop_extracts_body() {
655        let p = parse("until false; do echo waiting; sleep 1; done");
656        assert!(!p.segments.iter().any(|s| s.command.starts_with("until")));
657        assert!(p.segments.iter().any(|s| s.command.contains("echo")));
658        assert!(p.segments.iter().any(|s| s.command.contains("sleep")));
659    }
660
661    #[test]
662    fn elif_clause_extracts_all_branches() {
663        let p = parse("if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi");
664        assert!(p.segments.iter().any(|s| s.command.contains("test -f a")));
665        assert!(p.segments.iter().any(|s| s.command.contains("echo a")));
666        assert!(p.segments.iter().any(|s| s.command.contains("test -f b")));
667        assert!(p.segments.iter().any(|s| s.command.contains("echo b")));
668        assert!(p.segments.iter().any(|s| s.command.contains("echo c")));
669    }
670
671    #[test]
672    fn function_definition_body_extracted() {
673        let p = parse("foo() { echo hello; ls; }");
674        assert!(p.segments.iter().any(|s| s.command.contains("echo hello")));
675        assert!(p.segments.iter().any(|s| s.command == "ls"));
676        assert!(!p.segments.iter().any(|s| s.command.contains("foo()")));
677    }
678
679    #[test]
680    fn c_style_for_loop() {
681        let p = parse("for ((i=0; i<10; i++)); do echo $i; done");
682        assert!(p.segments.iter().any(|s| s.command.contains("echo")));
683    }
684
685    #[test]
686    fn negated_command_extracts_inner() {
687        let p = parse("! git status");
688        assert!(p.segments.iter().any(|s| s.command.contains("git status")));
689    }
690
691    #[test]
692    fn pipe_err_operator() {
693        let p = parse("cmd1 |& cmd2");
694        assert_eq!(p.segments.len(), 2);
695        assert_eq!(p.operators, vec![super::super::types::Operator::PipeErr]);
696    }
697
698    #[test]
699    fn function_with_for_body() {
700        let p = parse("f() for i in *; do echo $i; done");
701        assert!(p.segments.iter().any(|s| s.command.contains("echo")));
702    }
703
704    // --- Input length cap ---
705
706    #[test]
707    fn input_length_cap() {
708        // 65 KB exceeds the 64 KB limit
709        let input = "echo ".to_string() + &"x".repeat(65 * 1024);
710        let p = parse(&input);
711        assert!(p.has_parse_errors);
712        assert!(p.segments.is_empty());
713    }
714}