agent_shell_parser/parse/
shell.rs1use super::redirect::detect_redirections;
28use super::subst::{assign_substitutions, build_segments, collect_substitutions};
29use super::types::{ParseError, ParsedPipeline, ShellSegment};
30use super::walk::walk_ast;
31use std::cell::{Cell, RefCell};
32use tree_sitter::{Parser, Tree};
33
34const MAX_TOTAL_PARSES: usize = 512;
37
38const MAX_INPUT_LENGTH: usize = 64 * 1024;
40
41thread_local! {
46 static TS_PARSER: RefCell<Parser> = RefCell::new({
55 let mut p = Parser::new();
56 p.set_language(&tree_sitter_bash::LANGUAGE.into())
57 .expect("failed to load bash grammar");
58 p
59 });
60}
61
62fn parse_tree(source: &str, budget: &Cell<usize>) -> Result<Tree, ParseError> {
63 let count = budget.get();
64 if count >= MAX_TOTAL_PARSES {
65 return Err(ParseError);
66 }
67 budget.set(count + 1);
68 TS_PARSER.with(|p| p.borrow_mut().parse(source, None).ok_or(ParseError))
69}
70
71pub fn parse_with_substitutions(command: &str) -> Result<ParsedPipeline, ParseError> {
84 if command.len() > MAX_INPUT_LENGTH {
85 return Ok(ParsedPipeline::empty_with_error());
86 }
87 let budget = Cell::new(0);
88 parse_with_substitutions_impl(command, 0, &budget)
89}
90
91fn parse_with_substitutions_impl(
92 command: &str,
93 depth: usize,
94 budget: &Cell<usize>,
95) -> Result<ParsedPipeline, ParseError> {
96 let tree = parse_tree(command, budget)?;
97 let root = tree.root_node();
98 let source = command.as_bytes();
99 let has_parse_errors = root.has_error();
100
101 let mut raw_substs = Vec::new();
102 collect_substitutions(root, source, &mut raw_substs);
103
104 let walk = walk_ast(root, source);
105
106 let trimmed = command.trim();
107 let is_trivial = walk.segments.len() <= 1
108 && raw_substs.is_empty()
109 && walk
110 .segments
111 .first()
112 .is_none_or(|seg| seg.start == 0 && seg.end >= trimmed.len());
113
114 if is_trivial {
115 let redir = walk
116 .segments
117 .first()
118 .and_then(|seg| seg.redirection.clone())
119 .or_else(|| detect_redirections(root, source));
120 return Ok(ParsedPipeline {
121 segments: vec![ShellSegment {
122 command: trimmed.to_string(),
123 redirection: redir,
124 substitutions: vec![],
125 }],
126 operators: vec![],
127 structural_substitutions: vec![],
128 has_parse_errors,
129 });
130 }
131
132 let built = build_segments(&walk, command);
133 let (per_segment_subs, structural_subs) =
134 assign_substitutions(&raw_substs, &built, depth, &|inner, d| {
135 parse_with_substitutions_impl(inner, d, budget)
136 });
137
138 let segments: Vec<ShellSegment> = built
139 .into_iter()
140 .zip(per_segment_subs)
141 .map(|(b, subs)| ShellSegment {
142 command: b.command,
143 redirection: b.redirection,
144 substitutions: subs,
145 })
146 .collect();
147
148 Ok(ParsedPipeline {
149 segments,
150 operators: walk.operators,
151 structural_substitutions: structural_subs,
152 has_parse_errors,
153 })
154}
155
156pub fn has_output_redirection(
158 command: &str,
159) -> Result<Option<super::types::Redirection>, ParseError> {
160 let budget = Cell::new(0);
161 let tree = parse_tree(command, &budget)?;
162 Ok(detect_redirections(tree.root_node(), command.as_bytes()))
163}
164
165pub fn dump_ast(command: &str) -> Result<String, ParseError> {
172 use std::fmt::Write;
173 let mut out = String::new();
174
175 let budget = Cell::new(0);
176 let tree = parse_tree(command, &budget)?;
177 let root = tree.root_node();
178 let source = command.as_bytes();
179
180 writeln!(out, "── tree-sitter AST ──").unwrap();
182 fn print_node(out: &mut String, node: tree_sitter::Node, source: &[u8], indent: usize) {
183 let text = node.utf8_text(source).unwrap_or("???");
184 let short: String = text.chars().take(60).collect();
185 let tag = if node.is_named() { "named" } else { "anon" };
186 writeln!(
187 out,
188 "{}{} [{}] {:?}",
189 " ".repeat(indent),
190 node.kind(),
191 tag,
192 short
193 )
194 .unwrap();
195 let mut cursor = node.walk();
196 for child in node.children(&mut cursor) {
197 print_node(out, child, source, indent + 1);
198 }
199 }
200 print_node(&mut out, root, source, 0);
201
202 let pipeline = parse_with_substitutions(command)?;
206 writeln!(out, "\n── parsed pipeline ──").unwrap();
207 if pipeline.has_parse_errors {
208 writeln!(out, " (parse errors detected — best-effort result)").unwrap();
209 }
210 fn print_pipeline(out: &mut String, p: &ParsedPipeline, indent: usize) {
211 let pad = " ".repeat(indent);
212 for sub in &p.structural_substitutions {
213 writeln!(
214 out,
215 "{pad}structural subst bytes {}..{}:",
216 sub.start, sub.end
217 )
218 .unwrap();
219 print_pipeline(out, &sub.pipeline, indent + 1);
220 }
221 for (i, seg) in p.segments.iter().enumerate() {
222 let redir = seg
223 .redirection
224 .as_ref()
225 .map(|r| format!(" [{r}]"))
226 .unwrap_or_default();
227 writeln!(out, "{pad}segment {i}: {:?}{redir}", seg.command).unwrap();
228 for sub in &seg.substitutions {
229 writeln!(out, "{pad} subst bytes {}..{}:", sub.start, sub.end).unwrap();
230 print_pipeline(out, &sub.pipeline, indent + 2);
231 }
232 if i < p.operators.len() {
233 writeln!(out, "{pad}operator: {}", p.operators[i]).unwrap();
234 }
235 }
236 }
237 print_pipeline(&mut out, &pipeline, 1);
238
239 let redir = detect_redirections(root, source);
241 writeln!(out, "\n── output redirection ──").unwrap();
242 match redir {
243 Some(r) => writeln!(out, " {r}").unwrap(),
244 None => writeln!(out, " (none)").unwrap(),
245 }
246
247 Ok(out)
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 fn parse(cmd: &str) -> ParsedPipeline {
255 parse_with_substitutions(cmd).expect("parse failed")
256 }
257
258 #[test]
261 fn simple_command() {
262 let p = parse("ls -la");
263 assert_eq!(p.segments.len(), 1);
264 assert_eq!(p.segments[0].command, "ls -la");
265 assert!(p.operators.is_empty());
266 assert!(p.segments[0].substitutions.is_empty());
267 assert!(p.structural_substitutions.is_empty());
268 }
269
270 #[test]
271 fn pipe() {
272 let p = parse("ls | grep foo");
273 assert_eq!(p.segments.len(), 2);
274 assert_eq!(p.segments[0].command, "ls");
275 assert_eq!(p.segments[1].command, "grep foo");
276 assert_eq!(p.operators, vec![super::super::types::Operator::Pipe]);
277 }
278
279 #[test]
280 fn and_then() {
281 let p = parse("mkdir foo && cd foo");
282 assert_eq!(p.segments.len(), 2);
283 assert_eq!(p.operators, vec![super::super::types::Operator::And]);
284 }
285
286 #[test]
287 fn or_else() {
288 let p = parse("test -f x || echo missing");
289 assert_eq!(p.segments.len(), 2);
290 assert_eq!(p.operators, vec![super::super::types::Operator::Or]);
291 }
292
293 #[test]
294 fn semicolon() {
295 let p = parse("echo a; echo b");
296 assert_eq!(p.segments.len(), 2);
297 assert_eq!(p.segments[0].command, "echo a");
298 assert_eq!(p.segments[1].command, "echo b");
299 }
300
301 #[test]
302 fn triple_and() {
303 let p = parse("a && b && c");
304 assert_eq!(p.segments.len(), 3);
305 assert_eq!(
306 p.operators,
307 vec![
308 super::super::types::Operator::And,
309 super::super::types::Operator::And
310 ]
311 );
312 }
313
314 #[test]
315 fn mixed_operators() {
316 let p = parse("a && b || c");
317 assert_eq!(p.segments.len(), 3);
318 assert_eq!(
319 p.operators,
320 vec![
321 super::super::types::Operator::And,
322 super::super::types::Operator::Or
323 ]
324 );
325 }
326
327 #[test]
328 fn quoted_operator_not_split() {
329 let p = parse(r#"echo "a && b""#);
330 assert_eq!(p.segments.len(), 1);
331 }
332
333 #[test]
336 fn dollar_paren_substitution() {
337 let p = parse("echo $(date)");
338 assert_eq!(p.segments[0].command, "echo $(date)");
339 assert_eq!(p.segments[0].substitutions.len(), 1);
340 let sub = &p.segments[0].substitutions[0];
341 assert_eq!(sub.pipeline.segments.len(), 1);
342 assert_eq!(sub.pipeline.segments[0].command, "date");
343 }
344
345 #[test]
346 fn backtick_substitution() {
347 let p = parse("echo `date`");
348 assert_eq!(p.segments[0].command, "echo `date`");
349 assert_eq!(p.segments[0].substitutions.len(), 1);
350 assert_eq!(
351 p.segments[0].substitutions[0].pipeline.segments[0].command,
352 "date"
353 );
354 }
355
356 #[test]
357 fn single_quoted_not_substituted() {
358 let p = parse("echo '$(date)'");
359 assert!(p.segments[0].substitutions.is_empty());
360 }
361
362 #[test]
363 fn double_quoted_is_substituted() {
364 let p = parse(r#"echo "$(date)""#);
365 assert_eq!(p.segments[0].substitutions.len(), 1);
366 }
367
368 #[test]
369 fn process_substitution() {
370 let p = parse("diff <(ls a) <(ls b)");
371 assert_eq!(p.segments[0].substitutions.len(), 2);
372 assert_eq!(
373 p.segments[0].substitutions[0].pipeline.segments[0].command,
374 "ls a"
375 );
376 assert_eq!(
377 p.segments[0].substitutions[1].pipeline.segments[0].command,
378 "ls b"
379 );
380 }
381
382 #[test]
383 fn nested_substitution() {
384 let p = parse("echo $(cat $(find . -name foo))");
385 assert_eq!(p.segments[0].substitutions.len(), 1);
386 let outer = &p.segments[0].substitutions[0].pipeline;
387 assert_eq!(outer.segments[0].substitutions.len(), 1);
388 let inner = &outer.segments[0].substitutions[0].pipeline;
389 assert_eq!(inner.segments[0].command, "find . -name foo");
390 }
391
392 #[test]
393 fn substitution_byte_positions() {
394 let p = parse("echo $(date)");
395 let sub = &p.segments[0].substitutions[0];
396 assert_eq!(sub.start, 5);
398 assert_eq!(sub.end, 12);
399 assert_eq!(&p.segments[0].command[sub.start..sub.end], "$(date)");
400 }
401
402 #[test]
403 fn substitution_in_second_segment() {
404 let p = parse("echo hi && echo $(date)");
405 assert!(p.segments[0].substitutions.is_empty());
406 assert_eq!(p.segments[1].substitutions.len(), 1);
407 let sub = &p.segments[1].substitutions[0];
408 assert_eq!(&p.segments[1].command[sub.start..sub.end], "$(date)");
409 }
410
411 #[test]
412 fn compound_substitution_content() {
413 let p = parse("echo $(cmd1 && cmd2)");
414 let inner = &p.segments[0].substitutions[0].pipeline;
415 assert_eq!(inner.segments.len(), 2);
416 assert_eq!(inner.operators, vec![super::super::types::Operator::And]);
417 }
418
419 #[test]
422 fn structural_substitution_in_for_loop() {
423 let p = parse("for i in $(seq 10); do echo $i; done");
424 assert_eq!(p.structural_substitutions.len(), 1);
425 assert_eq!(
426 p.structural_substitutions[0].pipeline.segments[0].command,
427 "seq 10"
428 );
429 }
430
431 #[test]
432 fn structural_substitution_in_case_subject() {
433 let p = parse("case $(git status) in clean) echo ok ;; esac");
434 assert_eq!(p.structural_substitutions.len(), 1);
435 assert_eq!(
436 p.structural_substitutions[0].pipeline.segments[0].command,
437 "git status"
438 );
439 }
440
441 #[test]
444 fn for_loop_extracts_body() {
445 let p = parse("for i in *; do echo \"$i\"; done");
446 assert!(p.segments.iter().all(|s| !s.command.starts_with("for")));
447 assert!(p.segments.iter().any(|s| s.command.contains("echo")));
448 }
449
450 #[test]
451 fn if_statement_extracts_body() {
452 let p = parse("if test -f x; then echo yes; fi");
453 assert!(p.segments.iter().any(|s| s.command.contains("test")));
454 assert!(p.segments.iter().any(|s| s.command.contains("echo")));
455 }
456
457 #[test]
458 fn while_loop_extracts_body() {
459 let p = parse("while true; do sleep 1; done");
460 assert!(p.segments.iter().any(|s| s.command.contains("true")));
461 assert!(p.segments.iter().any(|s| s.command.contains("sleep")));
462 }
463
464 #[test]
465 fn case_pattern_not_treated_as_command() {
466 let p = parse(r#"case $x in rm) echo hi ;; kubectl) echo bye ;; esac"#);
467 assert!(!p.segments.iter().any(|s| s.command.trim() == "rm"));
468 assert!(p.segments.iter().any(|s| s.command.contains("echo hi")));
469 }
470
471 #[test]
472 fn if_test_command_extracted() {
473 let p = parse("if [[ -f foo ]]; then git commit; fi");
474 assert!(p.segments.iter().any(|s| s.command.contains("[[")));
475 assert!(p.segments.iter().any(|s| s.command.contains("git commit")));
476 }
477
478 #[test]
479 fn if_test_command_substitution_has_segment() {
480 let p = parse(r#"if [[ $(git status) == "clean" ]]; then echo ok; fi"#);
481 let test_seg = p
482 .segments
483 .iter()
484 .find(|s| s.command.contains("[["))
485 .unwrap();
486 assert_eq!(test_seg.substitutions.len(), 1);
487 assert_eq!(
488 test_seg.substitutions[0].pipeline.segments[0].command,
489 "git status"
490 );
491 }
492
493 #[test]
494 fn compound_heredoc_pipe_unwraps_body() {
495 let cmd = "while true; do shred /dev/sda; done <<EOF | cat\nstuff\nEOF";
496 let p = parse(cmd);
497 assert!(!p.segments.iter().any(|s| s.command.starts_with("while")));
498 assert!(p.segments.iter().any(|s| s.command.contains("shred")));
499 assert!(p.segments.iter().any(|s| s.command.trim() == "cat"));
500 }
501
502 #[test]
505 fn background_operator() {
506 let p = parse("sleep 10 & git commit -m test");
507 assert_eq!(p.segments.len(), 2);
508 assert_eq!(p.segments[0].command, "sleep 10");
509 assert_eq!(p.segments[1].command, "git commit -m test");
510 assert_eq!(p.operators, vec![super::super::types::Operator::Background]);
511 }
512
513 #[test]
516 fn redir_simple_gt() {
517 assert!(has_output_redirection("echo hi > file").unwrap().is_some());
518 }
519
520 #[test]
521 fn redir_append() {
522 assert!(has_output_redirection("echo hi >> file").unwrap().is_some());
523 }
524
525 #[test]
526 fn no_redir_devnull() {
527 assert!(has_output_redirection("cmd > /dev/null").unwrap().is_none());
528 }
529
530 #[test]
531 fn no_redir_fd_dup() {
532 assert!(has_output_redirection("cmd 2>&1").unwrap().is_none());
533 }
534
535 #[test]
536 fn no_redir_fd_close() {
537 assert!(has_output_redirection("cmd >&-").unwrap().is_none());
538 }
539
540 #[test]
541 fn redir_custom_fd_target() {
542 let r = has_output_redirection("cmd >&3").unwrap().unwrap();
543 assert_eq!(r.operator, ">&");
544 assert_eq!(r.target, "3");
545 }
546
547 #[test]
548 fn redir_clobber() {
549 assert!(has_output_redirection("echo hi >| file.txt")
550 .unwrap()
551 .is_some());
552 }
553
554 #[test]
555 fn redir_read_write() {
556 let r = has_output_redirection("cat <> file.txt").unwrap();
557 assert!(r.is_some());
558 }
559
560 #[test]
563 fn redirect_list_only_last_segment() {
564 let p = parse("export FOO=bar && cat > /tmp/file");
565 assert!(p.segments[0].redirection.is_none());
566 assert!(p.segments[1].redirection.is_some());
567 }
568
569 #[test]
570 fn redirect_for_loop_all_segments() {
571 let p = parse("for i in *; do echo $i; done > /tmp/out");
572 assert!(p.segments.iter().all(|s| s.redirection.is_some()));
573 }
574
575 #[test]
576 fn redirect_pipeline_only_last() {
577 let p = parse("echo hello | cat > /tmp/file");
578 assert!(p.segments[0].redirection.is_none());
579 assert!(p.segments[1].redirection.is_some());
580 }
581
582 #[test]
585 fn well_formed_no_errors() {
586 assert!(!parse("echo hello").has_parse_errors);
587 }
588
589 #[test]
592 fn deeply_nested_substitutions_capped() {
593 let mut cmd = "echo x".to_string();
594 for _ in 0..40 {
595 cmd = format!("echo $({cmd})");
596 }
597 let p = parse(&cmd);
598 assert_eq!(p.segments.len(), 1);
601 assert!(p.has_parse_errors_recursive());
602
603 let mut current = &p;
605 for _ in 0..33 {
606 let sub = ¤t.segments[0].substitutions[0];
607 current = &sub.pipeline;
608 }
609 assert!(current.has_parse_errors);
611 assert!(current.segments.is_empty());
612 }
613
614 #[test]
617 fn background_and_disown() {
618 let p = parse("waybar & disown");
619 assert_eq!(p.segments.len(), 2);
620 assert_eq!(p.segments[0].command, "waybar");
621 assert_eq!(p.segments[1].command, "disown");
622 assert_eq!(p.operators, vec![super::super::types::Operator::Background]);
623 }
624
625 #[test]
628 fn structural_substitution_byte_offsets() {
629 let cmd = "for i in $(seq 10); do echo $i; done";
633 let p = parse(cmd);
634 assert_eq!(p.structural_substitutions.len(), 1);
635 let sub = &p.structural_substitutions[0];
636 assert_eq!(&cmd[sub.start..sub.end], "$(seq 10)");
637 }
638
639 #[test]
642 fn no_redir_fd_close_input() {
643 assert!(has_output_redirection("cmd <&-").unwrap().is_none());
644 }
645
646 #[test]
647 fn no_redir_fd_close_2() {
648 assert!(has_output_redirection("cmd 2>&-").unwrap().is_none());
649 }
650
651 #[test]
654 fn until_loop_extracts_body() {
655 let p = parse("until false; do echo waiting; sleep 1; done");
656 assert!(!p.segments.iter().any(|s| s.command.starts_with("until")));
657 assert!(p.segments.iter().any(|s| s.command.contains("echo")));
658 assert!(p.segments.iter().any(|s| s.command.contains("sleep")));
659 }
660
661 #[test]
662 fn elif_clause_extracts_all_branches() {
663 let p = parse("if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi");
664 assert!(p.segments.iter().any(|s| s.command.contains("test -f a")));
665 assert!(p.segments.iter().any(|s| s.command.contains("echo a")));
666 assert!(p.segments.iter().any(|s| s.command.contains("test -f b")));
667 assert!(p.segments.iter().any(|s| s.command.contains("echo b")));
668 assert!(p.segments.iter().any(|s| s.command.contains("echo c")));
669 }
670
671 #[test]
672 fn function_definition_body_extracted() {
673 let p = parse("foo() { echo hello; ls; }");
674 assert!(p.segments.iter().any(|s| s.command.contains("echo hello")));
675 assert!(p.segments.iter().any(|s| s.command == "ls"));
676 assert!(!p.segments.iter().any(|s| s.command.contains("foo()")));
677 }
678
679 #[test]
680 fn c_style_for_loop() {
681 let p = parse("for ((i=0; i<10; i++)); do echo $i; done");
682 assert!(p.segments.iter().any(|s| s.command.contains("echo")));
683 }
684
685 #[test]
686 fn negated_command_extracts_inner() {
687 let p = parse("! git status");
688 assert!(p.segments.iter().any(|s| s.command.contains("git status")));
689 }
690
691 #[test]
692 fn pipe_err_operator() {
693 let p = parse("cmd1 |& cmd2");
694 assert_eq!(p.segments.len(), 2);
695 assert_eq!(p.operators, vec![super::super::types::Operator::PipeErr]);
696 }
697
698 #[test]
699 fn function_with_for_body() {
700 let p = parse("f() for i in *; do echo $i; done");
701 assert!(p.segments.iter().any(|s| s.command.contains("echo")));
702 }
703
704 #[test]
707 fn input_length_cap() {
708 let input = "echo ".to_string() + &"x".repeat(65 * 1024);
710 let p = parse(&input);
711 assert!(p.has_parse_errors);
712 assert!(p.segments.is_empty());
713 }
714}