use super::redirect::detect_redirections;
use super::subst::{assign_substitutions, build_segments, collect_substitutions};
use super::types::{ParseError, ParsedPipeline, ShellSegment};
use super::walk::walk_ast;
use std::cell::{Cell, RefCell};
use tree_sitter::{Parser, Tree};
const MAX_TOTAL_PARSES: usize = 512;
const MAX_INPUT_LENGTH: usize = 64 * 1024;
thread_local! {
static TS_PARSER: RefCell<Parser> = RefCell::new({
let mut p = Parser::new();
p.set_language(&tree_sitter_bash::LANGUAGE.into())
.expect("failed to load bash grammar");
p
});
}
fn parse_tree(source: &str, budget: &Cell<usize>) -> Result<Tree, ParseError> {
let count = budget.get();
if count >= MAX_TOTAL_PARSES {
return Err(ParseError);
}
budget.set(count + 1);
TS_PARSER.with(|p| p.borrow_mut().parse(source, None).ok_or(ParseError))
}
pub fn parse_with_substitutions(command: &str) -> Result<ParsedPipeline, ParseError> {
if command.len() > MAX_INPUT_LENGTH {
return Ok(ParsedPipeline::empty_with_error());
}
let budget = Cell::new(0);
parse_with_substitutions_impl(command, 0, &budget)
}
fn parse_with_substitutions_impl(
command: &str,
depth: usize,
budget: &Cell<usize>,
) -> Result<ParsedPipeline, ParseError> {
let tree = parse_tree(command, budget)?;
let root = tree.root_node();
let source = command.as_bytes();
let has_parse_errors = root.has_error();
let mut raw_substs = Vec::new();
collect_substitutions(root, source, &mut raw_substs);
let walk = walk_ast(root, source);
let trimmed = command.trim();
let is_trivial = walk.segments.len() <= 1
&& raw_substs.is_empty()
&& walk
.segments
.first()
.is_none_or(|seg| seg.start == 0 && seg.end >= trimmed.len());
if is_trivial {
let redir = walk
.segments
.first()
.and_then(|seg| seg.redirection.clone())
.or_else(|| detect_redirections(root, source));
return Ok(ParsedPipeline {
segments: vec![ShellSegment {
command: trimmed.to_string(),
redirection: redir,
substitutions: vec![],
}],
operators: vec![],
structural_substitutions: vec![],
has_parse_errors,
});
}
let built = build_segments(&walk, command);
let (per_segment_subs, structural_subs) =
assign_substitutions(&raw_substs, &built, depth, &|inner, d| {
parse_with_substitutions_impl(inner, d, budget)
});
let segments: Vec<ShellSegment> = built
.into_iter()
.zip(per_segment_subs)
.map(|(b, subs)| ShellSegment {
command: b.command,
redirection: b.redirection,
substitutions: subs,
})
.collect();
Ok(ParsedPipeline {
segments,
operators: walk.operators,
structural_substitutions: structural_subs,
has_parse_errors,
})
}
pub fn has_output_redirection(
command: &str,
) -> Result<Option<super::types::Redirection>, ParseError> {
let budget = Cell::new(0);
let tree = parse_tree(command, &budget)?;
Ok(detect_redirections(tree.root_node(), command.as_bytes()))
}
pub fn dump_ast(command: &str) -> Result<String, ParseError> {
use std::fmt::Write;
let mut out = String::new();
let budget = Cell::new(0);
let tree = parse_tree(command, &budget)?;
let root = tree.root_node();
let source = command.as_bytes();
writeln!(out, "── tree-sitter AST ──").unwrap();
fn print_node(out: &mut String, node: tree_sitter::Node, source: &[u8], indent: usize) {
let text = node.utf8_text(source).unwrap_or("???");
let short: String = text.chars().take(60).collect();
let tag = if node.is_named() { "named" } else { "anon" };
writeln!(
out,
"{}{} [{}] {:?}",
" ".repeat(indent),
node.kind(),
tag,
short
)
.unwrap();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
print_node(out, child, source, indent + 1);
}
}
print_node(&mut out, root, source, 0);
let pipeline = parse_with_substitutions(command)?;
writeln!(out, "\n── parsed pipeline ──").unwrap();
if pipeline.has_parse_errors {
writeln!(out, " (parse errors detected — best-effort result)").unwrap();
}
fn print_pipeline(out: &mut String, p: &ParsedPipeline, indent: usize) {
let pad = " ".repeat(indent);
for sub in &p.structural_substitutions {
writeln!(
out,
"{pad}structural subst bytes {}..{}:",
sub.start, sub.end
)
.unwrap();
print_pipeline(out, &sub.pipeline, indent + 1);
}
for (i, seg) in p.segments.iter().enumerate() {
let redir = seg
.redirection
.as_ref()
.map(|r| format!(" [{r}]"))
.unwrap_or_default();
writeln!(out, "{pad}segment {i}: {:?}{redir}", seg.command).unwrap();
for sub in &seg.substitutions {
writeln!(out, "{pad} subst bytes {}..{}:", sub.start, sub.end).unwrap();
print_pipeline(out, &sub.pipeline, indent + 2);
}
if i < p.operators.len() {
writeln!(out, "{pad}operator: {}", p.operators[i]).unwrap();
}
}
}
print_pipeline(&mut out, &pipeline, 1);
let redir = detect_redirections(root, source);
writeln!(out, "\n── output redirection ──").unwrap();
match redir {
Some(r) => writeln!(out, " {r}").unwrap(),
None => writeln!(out, " (none)").unwrap(),
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(cmd: &str) -> ParsedPipeline {
parse_with_substitutions(cmd).expect("parse failed")
}
#[test]
fn simple_command() {
let p = parse("ls -la");
assert_eq!(p.segments.len(), 1);
assert_eq!(p.segments[0].command, "ls -la");
assert!(p.operators.is_empty());
assert!(p.segments[0].substitutions.is_empty());
assert!(p.structural_substitutions.is_empty());
}
#[test]
fn pipe() {
let p = parse("ls | grep foo");
assert_eq!(p.segments.len(), 2);
assert_eq!(p.segments[0].command, "ls");
assert_eq!(p.segments[1].command, "grep foo");
assert_eq!(p.operators, vec![super::super::types::Operator::Pipe]);
}
#[test]
fn and_then() {
let p = parse("mkdir foo && cd foo");
assert_eq!(p.segments.len(), 2);
assert_eq!(p.operators, vec![super::super::types::Operator::And]);
}
#[test]
fn or_else() {
let p = parse("test -f x || echo missing");
assert_eq!(p.segments.len(), 2);
assert_eq!(p.operators, vec![super::super::types::Operator::Or]);
}
#[test]
fn semicolon() {
let p = parse("echo a; echo b");
assert_eq!(p.segments.len(), 2);
assert_eq!(p.segments[0].command, "echo a");
assert_eq!(p.segments[1].command, "echo b");
}
#[test]
fn triple_and() {
let p = parse("a && b && c");
assert_eq!(p.segments.len(), 3);
assert_eq!(
p.operators,
vec![
super::super::types::Operator::And,
super::super::types::Operator::And
]
);
}
#[test]
fn mixed_operators() {
let p = parse("a && b || c");
assert_eq!(p.segments.len(), 3);
assert_eq!(
p.operators,
vec![
super::super::types::Operator::And,
super::super::types::Operator::Or
]
);
}
#[test]
fn quoted_operator_not_split() {
let p = parse(r#"echo "a && b""#);
assert_eq!(p.segments.len(), 1);
}
#[test]
fn dollar_paren_substitution() {
let p = parse("echo $(date)");
assert_eq!(p.segments[0].command, "echo $(date)");
assert_eq!(p.segments[0].substitutions.len(), 1);
let sub = &p.segments[0].substitutions[0];
assert_eq!(sub.pipeline.segments.len(), 1);
assert_eq!(sub.pipeline.segments[0].command, "date");
}
#[test]
fn backtick_substitution() {
let p = parse("echo `date`");
assert_eq!(p.segments[0].command, "echo `date`");
assert_eq!(p.segments[0].substitutions.len(), 1);
assert_eq!(
p.segments[0].substitutions[0].pipeline.segments[0].command,
"date"
);
}
#[test]
fn single_quoted_not_substituted() {
let p = parse("echo '$(date)'");
assert!(p.segments[0].substitutions.is_empty());
}
#[test]
fn double_quoted_is_substituted() {
let p = parse(r#"echo "$(date)""#);
assert_eq!(p.segments[0].substitutions.len(), 1);
}
#[test]
fn process_substitution() {
let p = parse("diff <(ls a) <(ls b)");
assert_eq!(p.segments[0].substitutions.len(), 2);
assert_eq!(
p.segments[0].substitutions[0].pipeline.segments[0].command,
"ls a"
);
assert_eq!(
p.segments[0].substitutions[1].pipeline.segments[0].command,
"ls b"
);
}
#[test]
fn nested_substitution() {
let p = parse("echo $(cat $(find . -name foo))");
assert_eq!(p.segments[0].substitutions.len(), 1);
let outer = &p.segments[0].substitutions[0].pipeline;
assert_eq!(outer.segments[0].substitutions.len(), 1);
let inner = &outer.segments[0].substitutions[0].pipeline;
assert_eq!(inner.segments[0].command, "find . -name foo");
}
#[test]
fn substitution_byte_positions() {
let p = parse("echo $(date)");
let sub = &p.segments[0].substitutions[0];
assert_eq!(sub.start, 5);
assert_eq!(sub.end, 12);
assert_eq!(&p.segments[0].command[sub.start..sub.end], "$(date)");
}
#[test]
fn substitution_in_second_segment() {
let p = parse("echo hi && echo $(date)");
assert!(p.segments[0].substitutions.is_empty());
assert_eq!(p.segments[1].substitutions.len(), 1);
let sub = &p.segments[1].substitutions[0];
assert_eq!(&p.segments[1].command[sub.start..sub.end], "$(date)");
}
#[test]
fn compound_substitution_content() {
let p = parse("echo $(cmd1 && cmd2)");
let inner = &p.segments[0].substitutions[0].pipeline;
assert_eq!(inner.segments.len(), 2);
assert_eq!(inner.operators, vec![super::super::types::Operator::And]);
}
#[test]
fn structural_substitution_in_for_loop() {
let p = parse("for i in $(seq 10); do echo $i; done");
assert_eq!(p.structural_substitutions.len(), 1);
assert_eq!(
p.structural_substitutions[0].pipeline.segments[0].command,
"seq 10"
);
}
#[test]
fn structural_substitution_in_case_subject() {
let p = parse("case $(git status) in clean) echo ok ;; esac");
assert_eq!(p.structural_substitutions.len(), 1);
assert_eq!(
p.structural_substitutions[0].pipeline.segments[0].command,
"git status"
);
}
#[test]
fn for_loop_extracts_body() {
let p = parse("for i in *; do echo \"$i\"; done");
assert!(p.segments.iter().all(|s| !s.command.starts_with("for")));
assert!(p.segments.iter().any(|s| s.command.contains("echo")));
}
#[test]
fn if_statement_extracts_body() {
let p = parse("if test -f x; then echo yes; fi");
assert!(p.segments.iter().any(|s| s.command.contains("test")));
assert!(p.segments.iter().any(|s| s.command.contains("echo")));
}
#[test]
fn while_loop_extracts_body() {
let p = parse("while true; do sleep 1; done");
assert!(p.segments.iter().any(|s| s.command.contains("true")));
assert!(p.segments.iter().any(|s| s.command.contains("sleep")));
}
#[test]
fn case_pattern_not_treated_as_command() {
let p = parse(r#"case $x in rm) echo hi ;; kubectl) echo bye ;; esac"#);
assert!(!p.segments.iter().any(|s| s.command.trim() == "rm"));
assert!(p.segments.iter().any(|s| s.command.contains("echo hi")));
}
#[test]
fn if_test_command_extracted() {
let p = parse("if [[ -f foo ]]; then git commit; fi");
assert!(p.segments.iter().any(|s| s.command.contains("[[")));
assert!(p.segments.iter().any(|s| s.command.contains("git commit")));
}
#[test]
fn if_test_command_substitution_has_segment() {
let p = parse(r#"if [[ $(git status) == "clean" ]]; then echo ok; fi"#);
let test_seg = p
.segments
.iter()
.find(|s| s.command.contains("[["))
.unwrap();
assert_eq!(test_seg.substitutions.len(), 1);
assert_eq!(
test_seg.substitutions[0].pipeline.segments[0].command,
"git status"
);
}
#[test]
fn compound_heredoc_pipe_unwraps_body() {
let cmd = "while true; do shred /dev/sda; done <<EOF | cat\nstuff\nEOF";
let p = parse(cmd);
assert!(!p.segments.iter().any(|s| s.command.starts_with("while")));
assert!(p.segments.iter().any(|s| s.command.contains("shred")));
assert!(p.segments.iter().any(|s| s.command.trim() == "cat"));
}
#[test]
fn background_operator() {
let p = parse("sleep 10 & git commit -m test");
assert_eq!(p.segments.len(), 2);
assert_eq!(p.segments[0].command, "sleep 10");
assert_eq!(p.segments[1].command, "git commit -m test");
assert_eq!(p.operators, vec![super::super::types::Operator::Background]);
}
#[test]
fn redir_simple_gt() {
assert!(has_output_redirection("echo hi > file").unwrap().is_some());
}
#[test]
fn redir_append() {
assert!(has_output_redirection("echo hi >> file").unwrap().is_some());
}
#[test]
fn no_redir_devnull() {
assert!(has_output_redirection("cmd > /dev/null").unwrap().is_none());
}
#[test]
fn no_redir_fd_dup() {
assert!(has_output_redirection("cmd 2>&1").unwrap().is_none());
}
#[test]
fn no_redir_fd_close() {
assert!(has_output_redirection("cmd >&-").unwrap().is_none());
}
#[test]
fn redir_custom_fd_target() {
let r = has_output_redirection("cmd >&3").unwrap().unwrap();
assert_eq!(r.operator, ">&");
assert_eq!(r.target, "3");
}
#[test]
fn redir_clobber() {
assert!(has_output_redirection("echo hi >| file.txt")
.unwrap()
.is_some());
}
#[test]
fn redir_read_write() {
let r = has_output_redirection("cat <> file.txt").unwrap();
assert!(r.is_some());
}
#[test]
fn redirect_list_only_last_segment() {
let p = parse("export FOO=bar && cat > /tmp/file");
assert!(p.segments[0].redirection.is_none());
assert!(p.segments[1].redirection.is_some());
}
#[test]
fn redirect_for_loop_all_segments() {
let p = parse("for i in *; do echo $i; done > /tmp/out");
assert!(p.segments.iter().all(|s| s.redirection.is_some()));
}
#[test]
fn redirect_pipeline_only_last() {
let p = parse("echo hello | cat > /tmp/file");
assert!(p.segments[0].redirection.is_none());
assert!(p.segments[1].redirection.is_some());
}
#[test]
fn well_formed_no_errors() {
assert!(!parse("echo hello").has_parse_errors);
}
#[test]
fn deeply_nested_substitutions_capped() {
let mut cmd = "echo x".to_string();
for _ in 0..40 {
cmd = format!("echo $({cmd})");
}
let p = parse(&cmd);
assert_eq!(p.segments.len(), 1);
assert!(p.has_parse_errors_recursive());
let mut current = &p;
for _ in 0..33 {
let sub = ¤t.segments[0].substitutions[0];
current = &sub.pipeline;
}
assert!(current.has_parse_errors);
assert!(current.segments.is_empty());
}
#[test]
fn background_and_disown() {
let p = parse("waybar & disown");
assert_eq!(p.segments.len(), 2);
assert_eq!(p.segments[0].command, "waybar");
assert_eq!(p.segments[1].command, "disown");
assert_eq!(p.operators, vec![super::super::types::Operator::Background]);
}
#[test]
fn structural_substitution_byte_offsets() {
let cmd = "for i in $(seq 10); do echo $i; done";
let p = parse(cmd);
assert_eq!(p.structural_substitutions.len(), 1);
let sub = &p.structural_substitutions[0];
assert_eq!(&cmd[sub.start..sub.end], "$(seq 10)");
}
#[test]
fn no_redir_fd_close_input() {
assert!(has_output_redirection("cmd <&-").unwrap().is_none());
}
#[test]
fn no_redir_fd_close_2() {
assert!(has_output_redirection("cmd 2>&-").unwrap().is_none());
}
#[test]
fn until_loop_extracts_body() {
let p = parse("until false; do echo waiting; sleep 1; done");
assert!(!p.segments.iter().any(|s| s.command.starts_with("until")));
assert!(p.segments.iter().any(|s| s.command.contains("echo")));
assert!(p.segments.iter().any(|s| s.command.contains("sleep")));
}
#[test]
fn elif_clause_extracts_all_branches() {
let p = parse("if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi");
assert!(p.segments.iter().any(|s| s.command.contains("test -f a")));
assert!(p.segments.iter().any(|s| s.command.contains("echo a")));
assert!(p.segments.iter().any(|s| s.command.contains("test -f b")));
assert!(p.segments.iter().any(|s| s.command.contains("echo b")));
assert!(p.segments.iter().any(|s| s.command.contains("echo c")));
}
#[test]
fn function_definition_body_extracted() {
let p = parse("foo() { echo hello; ls; }");
assert!(p.segments.iter().any(|s| s.command.contains("echo hello")));
assert!(p.segments.iter().any(|s| s.command == "ls"));
assert!(!p.segments.iter().any(|s| s.command.contains("foo()")));
}
#[test]
fn c_style_for_loop() {
let p = parse("for ((i=0; i<10; i++)); do echo $i; done");
assert!(p.segments.iter().any(|s| s.command.contains("echo")));
}
#[test]
fn negated_command_extracts_inner() {
let p = parse("! git status");
assert!(p.segments.iter().any(|s| s.command.contains("git status")));
}
#[test]
fn pipe_err_operator() {
let p = parse("cmd1 |& cmd2");
assert_eq!(p.segments.len(), 2);
assert_eq!(p.operators, vec![super::super::types::Operator::PipeErr]);
}
#[test]
fn function_with_for_body() {
let p = parse("f() for i in *; do echo $i; done");
assert!(p.segments.iter().any(|s| s.command.contains("echo")));
}
#[test]
fn input_length_cap() {
let input = "echo ".to_string() + &"x".repeat(65 * 1024);
let p = parse(&input);
assert!(p.has_parse_errors);
assert!(p.segments.is_empty());
}
}