use super::redirect::detect_redirections;
use super::subst::{assign_substitutions, build_segments, collect_substitutions};
use super::types::{ParseError, ParsedPipeline, ShellSegment, Word};
use super::walk::walk_ast;
use std::cell::{Cell, RefCell};
use tree_sitter::{Parser, Tree};
const MAX_TOTAL_PARSES: usize = 512;
const MAX_INPUT_LENGTH: usize = 64 * 1024;
thread_local! {
static TS_PARSER: RefCell<Parser> = RefCell::new({
let mut p = Parser::new();
p.set_language(&tree_sitter_bash::LANGUAGE.into())
.expect("failed to load bash grammar");
p
});
}
fn parse_tree(source: &str, budget: &Cell<usize>) -> Result<Tree, ParseError> {
let count = budget.get();
if count >= MAX_TOTAL_PARSES {
return Err(ParseError);
}
budget.set(count + 1);
TS_PARSER.with(|p| p.borrow_mut().parse(source, None).ok_or(ParseError))
}
pub fn parse_with_substitutions(command: &str) -> Result<ParsedPipeline, ParseError> {
if command.len() > MAX_INPUT_LENGTH {
return Ok(ParsedPipeline::empty_with_error());
}
let budget = Cell::new(0);
parse_with_substitutions_impl(command, 0, &budget)
}
fn parse_with_substitutions_impl(
command: &str,
depth: usize,
budget: &Cell<usize>,
) -> Result<ParsedPipeline, ParseError> {
let tree = parse_tree(command, budget)?;
let root = tree.root_node();
let source = command.as_bytes();
let has_parse_errors = root.has_error();
let mut raw_substs = Vec::new();
collect_substitutions(root, source, &mut raw_substs);
let walk = walk_ast(root, source);
let trimmed = command.trim();
let is_trivial = walk.segments.len() <= 1
&& raw_substs.is_empty()
&& walk
.segments
.first()
.is_none_or(|seg| seg.start == 0 && seg.end >= trimmed.len());
if is_trivial {
let first_seg = walk.segments.first();
let redir = first_seg
.and_then(|seg| seg.redirection.clone())
.or_else(|| detect_redirections(root, source));
let words = first_seg.map(|seg| seg.words.clone()).unwrap_or_else(|| {
shlex::split(trimmed)
.unwrap_or_else(|| trimmed.split_whitespace().map(String::from).collect())
.into_iter()
.map(Word::from)
.collect()
});
return Ok(ParsedPipeline {
segments: vec![ShellSegment {
command: trimmed.to_string(),
words,
redirection: redir,
substitutions: vec![],
}],
operators: vec![],
structural_substitutions: vec![],
has_parse_errors,
});
}
let built = build_segments(&walk, command);
let (per_segment_subs, structural_subs) =
assign_substitutions(&raw_substs, &built, depth, &|inner, d| {
parse_with_substitutions_impl(inner, d, budget)
});
let segments: Vec<ShellSegment> = built
.into_iter()
.zip(per_segment_subs)
.map(|(b, subs)| ShellSegment {
command: b.command,
words: b.words,
redirection: b.redirection,
substitutions: subs,
})
.collect();
Ok(ParsedPipeline {
segments,
operators: walk.operators,
structural_substitutions: structural_subs,
has_parse_errors,
})
}
pub fn has_output_redirection(
command: &str,
) -> Result<Option<super::types::Redirection>, ParseError> {
let budget = Cell::new(0);
let tree = parse_tree(command, &budget)?;
Ok(detect_redirections(tree.root_node(), command.as_bytes()))
}
pub fn dump_ast(command: &str) -> Result<String, ParseError> {
use std::fmt::Write;
let mut out = String::new();
let budget = Cell::new(0);
let tree = parse_tree(command, &budget)?;
let root = tree.root_node();
let source = command.as_bytes();
writeln!(out, "── tree-sitter AST ──").unwrap();
fn print_node(out: &mut String, node: tree_sitter::Node, source: &[u8], indent: usize) {
let text = node.utf8_text(source).unwrap_or("???");
let short: String = text.chars().take(60).collect();
let tag = if node.is_named() { "named" } else { "anon" };
writeln!(
out,
"{}{} [{}] {:?}",
" ".repeat(indent),
node.kind(),
tag,
short
)
.unwrap();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
print_node(out, child, source, indent + 1);
}
}
print_node(&mut out, root, source, 0);
let pipeline = parse_with_substitutions(command)?;
writeln!(out, "\n── parsed pipeline ──").unwrap();
if pipeline.has_parse_errors {
writeln!(out, " (parse errors detected — best-effort result)").unwrap();
}
fn print_pipeline(out: &mut String, p: &ParsedPipeline, indent: usize) {
let pad = " ".repeat(indent);
for sub in &p.structural_substitutions {
writeln!(
out,
"{pad}structural subst bytes {}..{}:",
sub.start, sub.end
)
.unwrap();
print_pipeline(out, &sub.pipeline, indent + 1);
}
for (i, seg) in p.segments.iter().enumerate() {
let redir = seg
.redirection
.as_ref()
.map(|r| format!(" [{r}]"))
.unwrap_or_default();
writeln!(out, "{pad}segment {i}: {:?}{redir}", seg.command).unwrap();
if !seg.words.is_empty() {
writeln!(out, "{pad} words: {:?}", seg.words).unwrap();
}
for sub in &seg.substitutions {
writeln!(out, "{pad} subst bytes {}..{}:", sub.start, sub.end).unwrap();
print_pipeline(out, &sub.pipeline, indent + 2);
}
if i < p.operators.len() {
writeln!(out, "{pad}operator: {}", p.operators[i]).unwrap();
}
}
}
print_pipeline(&mut out, &pipeline, 1);
let redir = detect_redirections(root, source);
writeln!(out, "\n── output redirection ──").unwrap();
match redir {
Some(r) => writeln!(out, " {r}").unwrap(),
None => writeln!(out, " (none)").unwrap(),
}
Ok(out)
}
#[cfg(test)]
#[path = "shell_inline_tests.rs"]
mod shell_inline_tests;
#[cfg(test)]
#[path = "shell_tests.rs"]
mod shell_tests;