use brush_parser::ast;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SimpleCmd {
pub program: String,
pub args: Vec<String>,
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct Analysis {
pub commands: Vec<SimpleCmd>,
pub substitutions: Vec<String>,
pub truncated: bool,
}
pub fn analyze(raw: &str) -> Option<Analysis> {
let program = parse_program(raw)?;
let mut a = Analysis::default();
collect_program(&program, &mut a, 0);
Some(a)
}
pub fn ast_commands(raw: &str) -> Option<Vec<SimpleCmd>> {
analyze(raw).map(|a| a.commands)
}
fn basename(arg0: &str) -> &str {
let base = arg0.rsplit(['/', '\\']).next().unwrap_or(arg0);
base.strip_suffix(".exe").unwrap_or(base)
}
fn parse_program(raw: &str) -> Option<ast::Program> {
if exceeds_nesting(raw) {
return None;
}
let prepared = neutralize_here_operators(raw);
let tokens = brush_parser::tokenize_str(&prepared).ok()?;
let opts = brush_parser::ParserOptions::default();
brush_parser::parse_tokens(&tokens, &opts).ok()
}
fn neutralize_here_operators(raw: &str) -> std::borrow::Cow<'_, str> {
if !raw.contains("<<") {
return std::borrow::Cow::Borrowed(raw);
}
let mut out = String::with_capacity(raw.len());
let mut chars = raw.chars().peekable();
while let Some(c) = chars.next() {
if c == '<' && chars.peek() == Some(&'<') {
while chars.peek() == Some(&'<') {
chars.next();
}
out.push(' ');
} else {
out.push(c);
}
}
std::borrow::Cow::Owned(out)
}
const MAX_NESTING: usize = 48;
fn exceeds_nesting(raw: &str) -> bool {
let mut depth: i32 = 0;
let mut max_depth: i32 = 0;
let mut backticks = 0usize;
for b in raw.bytes() {
match b {
b'(' | b'{' => {
depth += 1;
max_depth = max_depth.max(depth);
}
b')' | b'}' => depth = (depth - 1).max(0),
b'`' => backticks += 1,
_ => {}
}
}
let keywords = raw
.split_whitespace()
.filter(|t| {
matches!(
*t,
"if" | "for" | "while" | "until" | "case" | "select" | "do" | "then"
)
})
.count();
max_depth as usize > MAX_NESTING || backticks > MAX_NESTING || keywords > MAX_NESTING
}
const MAX_DEPTH: u8 = 64;
fn collect_program(program: &ast::Program, a: &mut Analysis, depth: u8) {
if depth > MAX_DEPTH {
a.truncated = true;
return;
}
for complete in &program.complete_commands {
collect_compound_list(complete, a, depth);
}
}
fn collect_compound_list(list: &ast::CompoundList, a: &mut Analysis, depth: u8) {
if depth > MAX_DEPTH {
a.truncated = true;
return;
}
for item in &list.0 {
collect_and_or(&item.0, a, depth);
}
}
fn collect_and_or(and_or: &ast::AndOrList, a: &mut Analysis, depth: u8) {
collect_pipeline(&and_or.first, a, depth);
for extra in &and_or.additional {
let pipeline = match extra {
ast::AndOr::And(p) | ast::AndOr::Or(p) => p,
};
collect_pipeline(pipeline, a, depth);
}
}
fn collect_pipeline(pipeline: &ast::Pipeline, a: &mut Analysis, depth: u8) {
for cmd in &pipeline.seq {
collect_command(cmd, a, depth);
}
}
fn collect_command(cmd: &ast::Command, a: &mut Analysis, depth: u8) {
match cmd {
ast::Command::Simple(sc) => collect_simple(sc, a, depth),
ast::Command::Compound(compound, _redirects) => collect_compound(compound, a, depth + 1),
ast::Command::Function(func) => collect_compound(&func.body.0, a, depth + 1),
ast::Command::ExtendedTest(_, _) => {}
}
}
fn collect_compound(compound: &ast::CompoundCommand, a: &mut Analysis, depth: u8) {
if depth > MAX_DEPTH {
a.truncated = true;
return;
}
use ast::CompoundCommand::*;
match compound {
BraceGroup(g) => collect_compound_list(&g.list, a, depth),
Subshell(s) => collect_compound_list(&s.list, a, depth),
ForClause(f) => collect_compound_list(&f.body.list, a, depth),
WhileClause(w) | UntilClause(w) => {
collect_compound_list(&w.0, a, depth);
collect_compound_list(&w.1.list, a, depth);
}
IfClause(i) => {
collect_compound_list(&i.condition, a, depth);
collect_compound_list(&i.then, a, depth);
if let Some(elses) = &i.elses {
for e in elses {
if let Some(cond) = &e.condition {
collect_compound_list(cond, a, depth);
}
collect_compound_list(&e.body, a, depth);
}
}
}
CaseClause(c) => {
for item in &c.cases {
if let Some(cmds) = &item.cmd {
collect_compound_list(cmds, a, depth);
}
}
}
Arithmetic(_) | ArithmeticForClause(_) | Coprocess(_) => {}
}
}
fn collect_simple(sc: &ast::SimpleCommand, a: &mut Analysis, depth: u8) {
let mut scan_words: Vec<String> = Vec::new();
if let Some(prefix) = &sc.prefix {
for item in &prefix.0 {
match item {
ast::CommandPrefixOrSuffixItem::AssignmentWord(_, w) => {
scan_words.push(w.value.clone())
}
ast::CommandPrefixOrSuffixItem::Word(w) => scan_words.push(w.value.clone()),
ast::CommandPrefixOrSuffixItem::ProcessSubstitution(_, sub) => {
collect_compound_list(&sub.list, a, depth + 1)
}
_ => {}
}
}
}
let is_shell = sc
.word_or_name
.as_ref()
.map(|n| {
matches!(
basename(&n.value),
"sh" | "bash" | "zsh" | "dash" | "ash" | "ksh"
)
})
.unwrap_or(false);
let mut args = Vec::new();
if let Some(suffix) = &sc.suffix {
for item in &suffix.0 {
match item {
ast::CommandPrefixOrSuffixItem::Word(w) => args.push(w.value.clone()),
ast::CommandPrefixOrSuffixItem::ProcessSubstitution(_, sub) => {
collect_compound_list(&sub.list, a, depth + 1)
}
ast::CommandPrefixOrSuffixItem::IoRedirect(io) => {
if let ast::IoRedirect::File(
_,
_,
ast::IoFileRedirectTarget::ProcessSubstitution(_, sub),
) = io
{
collect_compound_list(&sub.list, a, depth + 1);
}
if is_shell {
let body = match io {
ast::IoRedirect::HereDocument(_, hd) => Some(hd.doc.value.clone()),
ast::IoRedirect::HereString(_, w) => {
Some(w.value.trim_matches(['"', '\'']).to_string())
}
_ => None,
};
if let Some(body) = body {
if let Some(inner) = parse_program(&body) {
collect_program(&inner, a, depth + 1);
}
}
}
}
_ => {}
}
}
}
if let Some(name) = &sc.word_or_name {
scan_words.push(name.value.clone());
}
for arg in &args {
scan_words.push(arg.clone());
}
for word in &scan_words {
for sub in command_substitutions(word) {
if let Some(inner) = parse_program(&sub) {
collect_program(&inner, a, depth + 1);
}
a.substitutions.push(sub);
}
}
if let Some(name) = &sc.word_or_name {
a.commands.push(SimpleCmd {
program: name.value.clone(),
args,
});
}
}
fn command_substitutions(word: &str) -> Vec<String> {
let mut subs = Vec::new();
let bytes = word.as_bytes();
let mut i = 0;
let mut in_single = false;
while i < bytes.len() {
let c = bytes[i] as char;
if c == '\'' {
in_single = !in_single;
i += 1;
continue;
}
if in_single {
i += 1;
continue;
}
if c == '$' && i + 1 < bytes.len() && bytes[i + 1] == b'(' {
let start = i + 2;
let mut depth = 1;
let mut j = start;
while j < bytes.len() && depth > 0 {
match bytes[j] {
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
j += 1;
}
if depth == 0 {
subs.push(word[start..j - 1].to_string());
i = j;
continue;
}
}
if c == '`' {
if let Some(end) = word[i + 1..].find('`') {
subs.push(word[i + 1..i + 1 + end].to_string());
i = i + 1 + end + 1;
continue;
}
}
i += 1;
}
subs
}
#[cfg(test)]
mod tests {
use super::*;
fn progs(raw: &str) -> Vec<String> {
ast_commands(raw)
.unwrap_or_default()
.into_iter()
.map(|c| c.program)
.collect()
}
#[test]
fn flattens_pipelines_lists_and_separators() {
let p = progs("cd build && rm -rf ../dist; echo a | sh");
assert!(p.contains(&"cd".to_string()));
assert!(p.contains(&"rm".to_string()));
assert!(p.contains(&"echo".to_string()));
assert!(p.contains(&"sh".to_string()));
}
#[test]
fn recurses_command_substitution_and_backticks() {
assert!(progs("echo \"$(rm -rf /)\"").contains(&"rm".to_string()));
assert!(progs("x=`git push --force`").contains(&"git".to_string()));
assert!(progs("echo $( echo $(terraform destroy) )").contains(&"terraform".to_string()));
}
#[test]
fn single_quotes_are_not_substitutions() {
let p = progs("echo '$(rm -rf /)'");
assert!(p.contains(&"echo".to_string()));
assert!(
!p.contains(&"rm".to_string()),
"single-quoted is literal: {p:?}"
);
}
#[test]
fn descends_into_compounds() {
assert!(progs("if true; then rm -rf /; fi").contains(&"rm".to_string()));
assert!(progs("( cd x && git push --force )").contains(&"git".to_string()));
}
#[test]
fn descends_into_process_substitution() {
assert!(progs("grep x <(rm -rf /)").contains(&"rm".to_string()));
assert!(progs("diff <(git push --force) /dev/null").contains(&"git".to_string()));
assert!(progs("echo hi > >(rm -rf /)").contains(&"rm".to_string()));
}
#[test]
fn descends_into_function_bodies() {
assert!(progs("f(){ rm -rf /; }; f").contains(&"rm".to_string()));
assert!(progs("function g { git push --force; }; g").contains(&"git".to_string()));
}
#[test]
fn deep_nesting_is_refused_not_aborted() {
let bomb = format!("echo {}rm -rf /{}", "$(".repeat(300), ")".repeat(300));
assert!(analyze(&bomb).is_none(), "deep nesting must be refused");
}
#[test]
fn moderate_nesting_is_fully_walked() {
let nested = format!("echo {}rm -rf /{}", "$(".repeat(12), ")".repeat(12));
assert!(progs(&nested).contains(&"rm".to_string()));
}
#[test]
fn backtick_and_keyword_bombs_are_refused() {
let backticks: String = "`".repeat(MAX_NESTING + 5);
assert!(analyze(&backticks).is_none());
let keywords = "if true; then ".repeat(MAX_NESTING + 5);
assert!(analyze(&keywords).is_none());
}
#[test]
fn heredoc_bodies_are_conservatively_surfaced() {
let p = progs("cat <<EOF\nrm -rf /\nEOF\n");
assert!(
p.contains(&"rm".to_string()),
"body must be surfaced: {p:?}"
);
}
#[test]
fn unparseable_is_none() {
assert!(ast_commands("echo 'unterminated").is_none());
}
#[test]
fn args_are_captured() {
let cmds = ast_commands("rm -rf build").unwrap();
let rm = cmds.iter().find(|c| c.program == "rm").unwrap();
assert!(rm.args.iter().any(|a| a == "-rf"));
assert!(rm.args.iter().any(|a| a == "build"));
}
}