use tree_sitter::Node;
use super::{ArgMeta, Assignment, Redirect, RedirectOp};
pub fn node_text<'a>(node: Node, source: &'a str) -> &'a str {
&source[node.byte_range()]
}
pub fn resolve_node_text(node: Node, source: &str) -> String {
match node.kind() {
"raw_string" => {
let text = node_text(node, source);
if text.starts_with('\'') && text.ends_with('\'') && text.len() >= 2 {
text[1..text.len() - 1].to_string()
} else {
text.to_string()
}
}
"string" => {
let text = node_text(node, source);
if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
text[1..text.len() - 1].to_string()
} else {
text.to_string()
}
}
"command_name" => {
let mut cursor = node.walk();
let result = node
.named_children(&mut cursor)
.next()
.map(|child| resolve_node_text(child, source))
.unwrap_or_else(|| node_text(node, source).to_string());
result
}
_ => node_text(node, source).to_string(),
}
}
pub fn parse_assignment(node: Node, source: &str) -> (Assignment, Vec<super::Statement>) {
let mut name = String::new();
let mut value = String::new();
let mut substitutions = Vec::new();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
match child.kind() {
"variable_name" => {
name = node_text(child, source).to_string();
}
_ => {
value = resolve_node_text(child, source);
super::convert::collect_descendant_substitutions_pub(
child,
source,
&mut substitutions,
);
}
}
}
(Assignment { name, value }, substitutions)
}
pub fn parse_redirect(node: Node, source: &str) -> (Redirect, Vec<super::Statement>) {
let mut fd: Option<u32> = None;
let mut op = RedirectOp::Write;
let mut target = String::new();
let mut substitutions = Vec::new();
let child_count = node.child_count();
for i in 0..child_count as u32 {
let child = node.child(i).unwrap();
match child.kind() {
"file_descriptor" => {
fd = node_text(child, source).parse::<u32>().ok();
}
">" => op = RedirectOp::Write,
">>" => op = RedirectOp::Append,
"<" => op = RedirectOp::Read,
"<>" => op = RedirectOp::ReadWrite,
">&" => op = RedirectOp::DupOutput,
"<&" => op = RedirectOp::DupInput,
">|" => op = RedirectOp::Clobber,
"word" | "string" | "raw_string" | "number" | "concatenation" => {
target = resolve_node_text(child, source);
super::convert::collect_descendant_substitutions_pub(
child,
source,
&mut substitutions,
);
}
_ => {
if child.is_named() && target.is_empty() {
target = resolve_node_text(child, source);
super::convert::collect_descendant_substitutions_pub(
child,
source,
&mut substitutions,
);
}
}
}
}
(Redirect { fd, op, target }, substitutions)
}
pub fn classify_arg_node(node: Node, source: &str) -> ArgMeta {
match node.kind() {
"word" | "number" | "variable_name" => ArgMeta::PlainWord,
"raw_string" => ArgMeta::RawString,
"string" => classify_string_node(node, source),
"ansi_c_string"
| "translated_string"
| "simple_expansion"
| "expansion"
| "command_substitution"
| "process_substitution"
| "arithmetic_expansion"
| "brace_expression"
| "concatenation"
| "$"
| "ERROR" => ArgMeta::UnsafeString,
_ => ArgMeta::UnsafeString,
}
}
fn classify_string_node(node: Node, source: &str) -> ArgMeta {
let mut cursor = node.walk();
let mut children = node.named_children(&mut cursor).peekable();
if children.peek().is_none() {
return ArgMeta::SafeString;
}
for child in children {
match child.kind() {
"string_content" => {
if node_text(child, source).contains('\\') {
return ArgMeta::UnsafeString;
}
}
"simple_expansion" | "expansion" | "command_substitution" => {
return ArgMeta::UnsafeString;
}
_ => {
return ArgMeta::UnsafeString;
}
}
}
ArgMeta::SafeString
}
#[cfg(test)]
mod classification_tests {
use crate::parser::{Arg, ArgMeta};
use tree_sitter::Parser as TsParser;
fn classify_first_arg(input: &str) -> Arg {
let mut parser = TsParser::new();
parser
.set_language(&tree_sitter_bash::LANGUAGE.into())
.expect("load bash grammar");
let source = format!("cmd {input}");
let tree = parser.parse(&source, None).expect("parse");
let root = tree.root_node();
fn find_command<'a>(n: tree_sitter::Node<'a>) -> Option<tree_sitter::Node<'a>> {
if n.kind() == "command" {
return Some(n);
}
let mut cur = n.walk();
for child in n.named_children(&mut cur) {
if let Some(found) = find_command(child) {
return Some(found);
}
}
None
}
let cmd_node = find_command(root).expect("command node");
let mut saw_name = false;
let mut cursor = cmd_node.walk();
for child in cmd_node.named_children(&mut cursor) {
if child.kind() == "command_name" {
saw_name = true;
continue;
}
if saw_name {
return crate::parser::convert::convert_arg_node(child, &source);
}
}
panic!("no argv element found in: {input}");
}
#[test]
fn plain_word_bareword() {
let arg = classify_first_arg("ls");
assert_eq!(arg.text, "ls");
assert_eq!(arg.meta, ArgMeta::PlainWord);
}
#[test]
fn plain_word_flag() {
assert_eq!(classify_first_arg("--flag").meta, ArgMeta::PlainWord);
}
#[test]
fn plain_word_assignment_shape() {
assert_eq!(classify_first_arg("FOO=bar").meta, ArgMeta::PlainWord);
}
#[test]
fn plain_word_absolute_path() {
assert_eq!(classify_first_arg("/usr/bin/ls").meta, ArgMeta::PlainWord);
}
#[test]
fn plain_word_number() {
assert_eq!(classify_first_arg("42").meta, ArgMeta::PlainWord);
}
#[test]
fn raw_string_single_quoted() {
let arg = classify_first_arg("'docker ps'");
assert_eq!(arg.text, "docker ps");
assert_eq!(arg.meta, ArgMeta::RawString);
}
#[test]
fn raw_string_empty() {
assert_eq!(classify_first_arg("''").meta, ArgMeta::RawString);
}
#[test]
fn raw_string_with_double_quote_inside() {
let arg = classify_first_arg("'echo \"hi\"'");
assert_eq!(arg.text, "echo \"hi\"");
assert_eq!(arg.meta, ArgMeta::RawString);
}
#[test]
fn safe_string_double_quoted() {
let arg = classify_first_arg("\"docker ps\"");
assert_eq!(arg.text, "docker ps");
assert_eq!(arg.meta, ArgMeta::SafeString);
}
#[test]
fn safe_string_empty() {
let arg = classify_first_arg("\"\"");
assert_eq!(arg.meta, ArgMeta::SafeString);
}
#[test]
fn safe_string_single_space() {
assert_eq!(classify_first_arg("\" \"").meta, ArgMeta::SafeString);
}
#[test]
fn unsafe_escaped_double_quote() {
assert_eq!(
classify_first_arg("\"echo \\\"hi\\\"\"").meta,
ArgMeta::UnsafeString
);
}
#[test]
fn unsafe_escape_sequence() {
assert_eq!(classify_first_arg("\"a\\nb\"").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_simple_expansion_inside_string() {
assert_eq!(classify_first_arg("\"$HOME\"").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_expansion_inside_string() {
assert_eq!(
classify_first_arg("\"${VAR:-default}\"").meta,
ArgMeta::UnsafeString
);
}
#[test]
fn unsafe_bare_simple_expansion() {
assert_eq!(classify_first_arg("$HOME").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_command_substitution_inside_string() {
assert_eq!(
classify_first_arg("\"$(date)\"").meta,
ArgMeta::UnsafeString
);
}
#[test]
fn unsafe_bare_command_substitution() {
assert_eq!(classify_first_arg("$(date)").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_backtick_substitution() {
assert_eq!(classify_first_arg("\"`date`\"").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_process_substitution_input() {
assert_eq!(classify_first_arg("<(date)").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_process_substitution_output() {
assert_eq!(classify_first_arg(">(cat)").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_arithmetic_expansion() {
assert_eq!(classify_first_arg("$((1+2))").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_ansi_c_string() {
assert_eq!(classify_first_arg("$'ansi\\n'").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_brace_expression() {
assert_eq!(classify_first_arg("{a,b,c}").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_concat_word_string() {
assert_eq!(classify_first_arg("foo\"bar\"").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_concat_two_strings() {
assert_eq!(classify_first_arg("\"a\"\"b\"").meta, ArgMeta::UnsafeString);
}
#[test]
fn unsafe_concat_word_raw_string() {
assert_eq!(classify_first_arg("foo'bar'").meta, ArgMeta::UnsafeString);
}
}