use super::redirect::detect_redirections;
use super::tokenize::shlex_or_whitespace_words;
use super::types::{Operator, Redirection, Word};
use tree_sitter::Node;
fn strip_quotes(word: &str) -> Word {
if let Some(inner) = word.strip_prefix("$'") {
if let Some(inner) = inner.strip_suffix('\'') {
return Word::from(inner);
}
return Word::from(word);
}
if let Some(inner) = word.strip_prefix('\'') {
if let Some(inner) = inner.strip_suffix('\'') {
return Word::from(inner);
}
return Word::from(word);
}
if let Some(inner) = word.strip_prefix('"') {
if let Some(inner) = inner.strip_suffix('"') {
return Word::from(inner);
}
return Word::from(word);
}
Word::from(word)
}
pub(super) struct WalkResult {
pub(super) segments: Vec<SegmentInfo>,
pub(super) operators: Vec<Operator>,
}
pub(super) struct SegmentInfo {
pub(super) start: usize,
pub(super) end: usize,
pub(super) redirection: Option<Redirection>,
pub(super) words: Vec<Word>,
}
impl WalkResult {
pub(super) fn empty() -> Self {
Self {
segments: vec![],
operators: vec![],
}
}
pub(super) fn single_with_words(
start: usize,
end: usize,
redir: Option<Redirection>,
words: Vec<Word>,
) -> Self {
Self {
segments: vec![SegmentInfo {
start,
end,
redirection: redir,
words,
}],
operators: vec![],
}
}
pub(super) fn append(&mut self, other: WalkResult, join_op: Option<Operator>) {
if other.segments.is_empty() {
return;
}
if !self.segments.is_empty() {
if let Some(op) = join_op {
self.operators.push(op);
}
}
self.segments.extend(other.segments);
self.operators.extend(other.operators);
}
}
fn propagate_redirect(result: &mut WalkResult, node_kind: &str, redir: &Redirection) {
if node_kind == "list" || node_kind == "pipeline" {
if let Some(last) = result.segments.last_mut() {
if last.redirection.is_none() {
last.redirection = Some(redir.clone());
}
}
} else {
for seg in &mut result.segments {
if seg.redirection.is_none() {
seg.redirection = Some(redir.clone());
}
}
}
}
fn extract_command_words(node: Node, source: &[u8]) -> Vec<Word> {
let mut words = Vec::new();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if matches!(
child.kind(),
"file_redirect" | "herestring_redirect" | "heredoc_redirect" | "heredoc_body"
) {
continue;
}
if let Ok(text) = child.utf8_text(source) {
words.push(strip_quotes(text));
}
}
words
}
fn extract_declaration_words(node: Node, source: &[u8]) -> Vec<Word> {
let mut words = Vec::new();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"export" | "declare" | "local" | "readonly" | "typeset" => {
if let Ok(text) = child.utf8_text(source) {
words.push(strip_quotes(text));
}
}
_ if child.is_named() => {
if matches!(
child.kind(),
"file_redirect" | "herestring_redirect" | "heredoc_redirect" | "heredoc_body"
) {
continue;
}
if let Ok(text) = child.utf8_text(source) {
words.push(strip_quotes(text));
}
}
_ => {}
}
}
words
}
fn extract_variable_assignments_words(node: Node, source: &[u8]) -> Vec<Word> {
let mut words = Vec::new();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if let Ok(text) = child.utf8_text(source) {
let trimmed = text.trim();
if !trimmed.is_empty() {
words.push(strip_quotes(trimmed));
}
}
}
words
}
fn extract_unset_words(node: Node, source: &[u8]) -> Vec<Word> {
let mut words = Vec::new();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"unset" | "unsetenv" => {
if let Ok(text) = child.utf8_text(source) {
words.push(strip_quotes(text));
}
}
_ if child.is_named() => {
if let Ok(text) = child.utf8_text(source) {
words.push(strip_quotes(text));
}
}
_ => {}
}
}
words
}
fn extract_test_words(node: Node, source: &[u8]) -> Vec<Word> {
let mut words = Vec::new();
extract_test_words_recursive(node, source, &mut words);
words
}
fn extract_test_words_recursive(node: Node, source: &[u8], words: &mut Vec<Word>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"[[" | "]]" | "[" | "]" => {
if let Ok(text) = child.utf8_text(source) {
words.push(Word::from(text));
}
}
"binary_expression" | "unary_expression" => {
extract_test_words_recursive(child, source, words);
}
"test_operator" => {
if let Ok(text) = child.utf8_text(source) {
words.push(Word::from(text));
}
}
_ if child.is_named() => {
if let Ok(text) = child.utf8_text(source) {
words.push(strip_quotes(text));
}
}
_ => {
let text = child.utf8_text(source).unwrap_or("");
if !text.is_empty() && text != "(" && text != ")" {
if text.starts_with('-')
|| text.contains('=')
|| text == "!"
|| text == ">"
|| text == "<"
|| text == "&&"
|| text == "||"
{
words.push(Word::from(text));
}
}
}
}
}
}
pub(super) fn walk_ast(node: Node, source: &[u8]) -> WalkResult {
match node.kind() {
"program" => walk_program(node, source),
"list" => walk_list(node, source),
"pipeline" => walk_pipeline(node, source),
"command" => {
let redir = detect_redirections(node, source);
let words = extract_command_words(node, source);
WalkResult::single_with_words(node.start_byte(), node.end_byte(), redir, words)
}
"declaration_command" => {
let redir = detect_redirections(node, source);
let words = extract_declaration_words(node, source);
WalkResult::single_with_words(node.start_byte(), node.end_byte(), redir, words)
}
"unset_command" => {
let redir = detect_redirections(node, source);
let words = extract_unset_words(node, source);
WalkResult::single_with_words(node.start_byte(), node.end_byte(), redir, words)
}
"test_command" => {
let redir = detect_redirections(node, source);
let words = extract_test_words(node, source);
WalkResult::single_with_words(node.start_byte(), node.end_byte(), redir, words)
}
"variable_assignment" => {
let text = node.utf8_text(source).unwrap_or("").trim();
let words: Vec<Word> = if text.is_empty() {
vec![]
} else {
vec![strip_quotes(text)]
};
WalkResult::single_with_words(node.start_byte(), node.end_byte(), None, words)
}
"variable_assignments" => {
let words = extract_variable_assignments_words(node, source);
WalkResult::single_with_words(node.start_byte(), node.end_byte(), None, words)
}
"redirected_statement" => walk_redirected(node, source),
"for_statement" | "while_statement" | "until_statement" | "c_style_for_statement" => {
walk_loop(node, source)
}
"if_statement" => walk_if(node, source),
"case_statement" => walk_case(node, source),
"subshell" | "compound_statement" | "do_group" | "else_clause" | "elif_clause" => {
walk_block(node, source)
}
"case_item" => walk_case_item(node, source),
"negated_command" => walk_negated(node, source),
"function_definition" => walk_function(node, source),
"comment" | "heredoc_body" => WalkResult::empty(),
"ERROR" => WalkResult::empty(),
_ if node.is_named() => {
let text = node.utf8_text(source).unwrap_or("");
let words = shlex_or_whitespace_words(text);
WalkResult::single_with_words(node.start_byte(), node.end_byte(), None, words)
}
_ => WalkResult::empty(),
}
}
fn walk_program(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut cursor = node.walk();
let all: Vec<Node> = node.children(&mut cursor).collect();
for (i, child) in all.iter().enumerate() {
if !child.is_named() {
continue;
}
let join_op = if result.segments.is_empty() {
None
} else {
let bg = (0..i)
.rev()
.take_while(|&j| !all[j].is_named())
.any(|j| all[j].kind() == "&");
Some(if bg {
Operator::Background
} else {
Operator::Semi
})
};
result.append(walk_ast(*child, source), join_op);
}
result
}
fn walk_list(node: Node, source: &[u8]) -> WalkResult {
let mut parts: Vec<(Node, Operator)> = Vec::new();
let mut current = node;
loop {
let mut cursor = current.walk();
let named: Vec<Node> = current.named_children(&mut cursor).collect();
if named.len() < 2 {
break;
}
let op = list_operator(current);
parts.push((named[1], op));
if named[0].kind() == "list" {
current = named[0];
} else {
current = named[0];
break;
}
}
let mut result = walk_ast(current, source);
for (right_node, op) in parts.into_iter().rev() {
result.append(walk_ast(right_node, source), Some(op));
}
result
}
fn list_operator(node: Node) -> Operator {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if !child.is_named() {
match child.kind() {
"&&" => return Operator::And,
"||" => return Operator::Or,
_ => {}
}
}
}
Operator::Semi
}
fn walk_pipeline(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut pending_op: Option<Operator> = None;
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.is_named() {
result.append(walk_ast(child, source), pending_op.take());
} else {
match child.kind() {
"|" => pending_op = Some(Operator::Pipe),
"|&" => pending_op = Some(Operator::PipeErr),
_ => {}
}
}
}
result
}
fn walk_redirected(node: Node, source: &[u8]) -> WalkResult {
let redir = detect_redirections(node, source);
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if child.kind() == "heredoc_redirect" {
let inner = walk_heredoc_redirect(child, source);
if !inner.segments.is_empty() {
let mut full = WalkResult::empty();
let mut c2 = node.walk();
for sib in node.named_children(&mut c2) {
if sib.kind() == "heredoc_redirect" {
break;
}
if matches!(sib.kind(), "file_redirect" | "herestring_redirect") {
continue;
}
if is_leaf_command(sib) {
let end = effective_end(node).min(child.start_byte());
let words = extract_leaf_words(sib, source);
let wr = WalkResult::single_with_words(
sib.start_byte(),
end,
redir.clone(),
words,
);
full.append(wr, None);
} else {
let mut body = walk_ast(sib, source);
if let Some(ref r) = redir {
propagate_redirect(&mut body, sib.kind(), r);
}
full.append(body, None);
}
break;
}
let join_op = heredoc_join_operator(child);
full.append(inner, Some(join_op));
return full;
}
}
}
let mut cursor2 = node.walk();
for child in node.named_children(&mut cursor2) {
if matches!(
child.kind(),
"file_redirect" | "herestring_redirect" | "heredoc_redirect"
) {
continue;
}
if is_leaf_command(child) {
let end = effective_end(node);
let words = extract_leaf_words(child, source);
return WalkResult::single_with_words(node.start_byte(), end, redir, words);
}
let mut result = walk_ast(child, source);
if let Some(ref r) = redir {
propagate_redirect(&mut result, child.kind(), r);
}
return result;
}
let end = effective_end(node);
let text = source
.get(node.start_byte()..end)
.and_then(|b| std::str::from_utf8(b).ok())
.unwrap_or("");
let words = shlex_or_whitespace_words(text);
WalkResult::single_with_words(node.start_byte(), end, redir, words)
}
fn walk_heredoc_redirect(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut cursor = node.walk();
let mut loose_words_start: Option<usize> = None;
let mut loose_words_end: usize = 0;
for child in node.named_children(&mut cursor) {
match child.kind() {
"pipeline" | "list" | "command" | "redirected_statement" => {
if let Some(start) = loose_words_start.take() {
let text = source
.get(start..loose_words_end)
.and_then(|b| std::str::from_utf8(b).ok())
.unwrap_or("");
let words = shlex_or_whitespace_words(text);
result.append(
WalkResult::single_with_words(start, loose_words_end, None, words),
Some(Operator::Semi),
);
}
let op = heredoc_operator_before(node, child);
result.append(walk_ast(child, source), Some(op));
}
"word" => {
if loose_words_start.is_none() {
loose_words_start = Some(child.start_byte());
}
loose_words_end = child.end_byte();
}
_ => {}
}
}
if let Some(start) = loose_words_start {
let text = source
.get(start..loose_words_end)
.and_then(|b| std::str::from_utf8(b).ok())
.unwrap_or("");
let words = shlex_or_whitespace_words(text);
result.append(
WalkResult::single_with_words(start, loose_words_end, None, words),
Some(Operator::Semi),
);
}
result
}
fn heredoc_operator_before(heredoc_node: Node, child: Node) -> Operator {
let mut cursor = heredoc_node.walk();
let mut last_op = None;
for sib in heredoc_node.children(&mut cursor) {
if sib.start_byte() >= child.start_byte() {
break;
}
if !sib.is_named() {
match sib.kind() {
"&&" => last_op = Some(Operator::And),
"||" => last_op = Some(Operator::Or),
"|&" => last_op = Some(Operator::PipeErr),
"|" => last_op = Some(Operator::Pipe),
_ => {}
}
}
}
last_op.unwrap_or(Operator::Pipe)
}
fn heredoc_join_operator(heredoc_node: Node) -> Operator {
let mut cursor = heredoc_node.walk();
for child in heredoc_node.children(&mut cursor) {
if !child.is_named() {
match child.kind() {
"&&" => return Operator::And,
"||" => return Operator::Or,
"|&" => return Operator::PipeErr,
_ => {}
}
} else {
match child.kind() {
"pipeline" => return Operator::Pipe,
"command" | "list" | "redirected_statement" => break,
_ => {}
}
}
}
Operator::Pipe
}
fn walk_loop(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
match child.kind() {
"do_group" => result.append(walk_block(child, source), Some(Operator::Semi)),
_ if node.kind() == "while_statement" || node.kind() == "until_statement" => {
result.append(walk_ast(child, source), Some(Operator::Semi));
}
_ => {}
}
}
result
}
fn walk_if(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
match child.kind() {
"command"
| "declaration_command"
| "test_command"
| "pipeline"
| "list"
| "redirected_statement"
| "compound_statement"
| "subshell"
| "negated_command" => {
result.append(walk_ast(child, source), Some(Operator::Semi));
}
"else_clause" | "elif_clause" => {
result.append(walk_ast(child, source), Some(Operator::Semi));
}
_ => {}
}
}
result
}
fn walk_case(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if child.kind() == "case_item" {
result.append(walk_case_item(child, source), Some(Operator::Semi));
}
}
result
}
fn walk_case_item(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut past_paren = false;
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if !child.is_named() && child.kind() == ")" {
past_paren = true;
continue;
}
if past_paren && child.is_named() {
result.append(walk_ast(child, source), Some(Operator::Semi));
}
}
result
}
fn walk_block(node: Node, source: &[u8]) -> WalkResult {
let mut result = WalkResult::empty();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
result.append(walk_ast(child, source), Some(Operator::Semi));
}
result
}
fn walk_negated(node: Node, source: &[u8]) -> WalkResult {
let mut cursor = node.walk();
if let Some(child) = node.named_children(&mut cursor).next() {
return walk_ast(child, source);
}
WalkResult::empty()
}
fn walk_function(node: Node, source: &[u8]) -> WalkResult {
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if child.kind() == "word" {
continue;
}
return walk_ast(child, source);
}
WalkResult::empty()
}
fn is_leaf_command(node: Node) -> bool {
matches!(
node.kind(),
"command"
| "declaration_command"
| "test_command"
| "unset_command"
| "variable_assignment"
| "variable_assignments"
)
}
fn extract_leaf_words(node: Node, source: &[u8]) -> Vec<Word> {
match node.kind() {
"command" => extract_command_words(node, source),
"declaration_command" => extract_declaration_words(node, source),
"unset_command" => extract_unset_words(node, source),
"test_command" => extract_test_words(node, source),
"variable_assignment" => {
let text = node.utf8_text(source).unwrap_or("").trim();
if text.is_empty() {
vec![]
} else {
vec![strip_quotes(text)]
}
}
"variable_assignments" => extract_variable_assignments_words(node, source),
_ => {
let text = node.utf8_text(source).unwrap_or("");
shlex_or_whitespace_words(text)
}
}
}
fn effective_end(node: Node) -> usize {
let mut end = node.end_byte();
trim_at_heredoc_body(node, &mut end);
end
}
fn trim_at_heredoc_body(node: Node, end: &mut usize) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "heredoc_body" {
*end = (*end).min(child.start_byte());
return;
}
trim_at_heredoc_body(child, end);
}
}
#[cfg(test)]
mod walk_tests {
use super::strip_quotes;
#[test]
fn strip_quotes_empty_string() {
assert_eq!(strip_quotes(""), "");
}
#[test]
fn strip_quotes_empty_single_quotes() {
let w = strip_quotes("''");
assert_eq!(w, "");
}
#[test]
fn strip_quotes_empty_double_quotes() {
let w = strip_quotes("\"\"");
assert_eq!(w, "");
}
#[test]
fn strip_quotes_ansi_c_quotes() {
let w = strip_quotes("$'hello'");
assert_eq!(w, "hello");
}
#[test]
fn strip_quotes_unclosed_double_quote() {
let w = strip_quotes("\"unclosed");
assert_eq!(w, "\"unclosed");
}
#[test]
fn strip_quotes_unmatched_single_quote() {
let w = strip_quotes("'unmatched");
assert_eq!(w, "'unmatched");
}
}