1use brush_parser::ast;
22
23#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct SimpleCmd {
28 pub program: String,
29 pub args: Vec<String>,
30}
31
32#[derive(Debug, Default, Clone, PartialEq, Eq)]
37pub struct Analysis {
38 pub commands: Vec<SimpleCmd>,
39 pub substitutions: Vec<String>,
40 pub truncated: bool,
44}
45
46pub fn analyze(raw: &str) -> Option<Analysis> {
49 let program = parse_program(raw)?;
50 let mut a = Analysis::default();
51 collect_program(&program, &mut a, 0);
52 Some(a)
53}
54
55pub fn ast_commands(raw: &str) -> Option<Vec<SimpleCmd>> {
57 analyze(raw).map(|a| a.commands)
58}
59
60fn basename(arg0: &str) -> &str {
62 let base = arg0.rsplit(['/', '\\']).next().unwrap_or(arg0);
63 base.strip_suffix(".exe").unwrap_or(base)
64}
65
66fn parse_program(raw: &str) -> Option<ast::Program> {
67 if exceeds_nesting(raw) {
71 return None;
72 }
73 let prepared = neutralize_here_operators(raw);
80 let tokens = brush_parser::tokenize_str(&prepared).ok()?;
81 let opts = brush_parser::ParserOptions::default();
82 brush_parser::parse_tokens(&tokens, &opts).ok()
83}
84
85fn neutralize_here_operators(raw: &str) -> std::borrow::Cow<'_, str> {
94 if !raw.contains("<<") {
95 return std::borrow::Cow::Borrowed(raw);
96 }
97 let mut out = String::with_capacity(raw.len());
98 let mut chars = raw.chars().peekable();
99 while let Some(c) = chars.next() {
100 if c == '<' && chars.peek() == Some(&'<') {
101 while chars.peek() == Some(&'<') {
102 chars.next();
103 }
104 out.push(' ');
105 } else {
106 out.push(c);
107 }
108 }
109 std::borrow::Cow::Owned(out)
110}
111
112const MAX_NESTING: usize = 48;
115
116fn exceeds_nesting(raw: &str) -> bool {
119 let mut depth: i32 = 0;
120 let mut max_depth: i32 = 0;
121 let mut backticks = 0usize;
122 for b in raw.bytes() {
123 match b {
124 b'(' | b'{' => {
125 depth += 1;
126 max_depth = max_depth.max(depth);
127 }
128 b')' | b'}' => depth = (depth - 1).max(0),
129 b'`' => backticks += 1,
130 _ => {}
131 }
132 }
133 let keywords = raw
135 .split_whitespace()
136 .filter(|t| {
137 matches!(
138 *t,
139 "if" | "for" | "while" | "until" | "case" | "select" | "do" | "then"
140 )
141 })
142 .count();
143 max_depth as usize > MAX_NESTING || backticks > MAX_NESTING || keywords > MAX_NESTING
144}
145
146const MAX_DEPTH: u8 = 64;
151
152fn collect_program(program: &ast::Program, a: &mut Analysis, depth: u8) {
153 if depth > MAX_DEPTH {
154 a.truncated = true;
155 return;
156 }
157 for complete in &program.complete_commands {
158 collect_compound_list(complete, a, depth);
159 }
160}
161
162fn collect_compound_list(list: &ast::CompoundList, a: &mut Analysis, depth: u8) {
163 if depth > MAX_DEPTH {
164 a.truncated = true;
165 return;
166 }
167 for item in &list.0 {
168 collect_and_or(&item.0, a, depth);
169 }
170}
171
172fn collect_and_or(and_or: &ast::AndOrList, a: &mut Analysis, depth: u8) {
173 collect_pipeline(&and_or.first, a, depth);
174 for extra in &and_or.additional {
175 let pipeline = match extra {
176 ast::AndOr::And(p) | ast::AndOr::Or(p) => p,
177 };
178 collect_pipeline(pipeline, a, depth);
179 }
180}
181
182fn collect_pipeline(pipeline: &ast::Pipeline, a: &mut Analysis, depth: u8) {
183 for cmd in &pipeline.seq {
184 collect_command(cmd, a, depth);
185 }
186}
187
188fn collect_command(cmd: &ast::Command, a: &mut Analysis, depth: u8) {
189 match cmd {
190 ast::Command::Simple(sc) => collect_simple(sc, a, depth),
191 ast::Command::Compound(compound, _redirects) => collect_compound(compound, a, depth + 1),
192 ast::Command::Function(func) => collect_compound(&func.body.0, a, depth + 1),
197 ast::Command::ExtendedTest(_, _) => {}
199 }
200}
201
202fn collect_compound(compound: &ast::CompoundCommand, a: &mut Analysis, depth: u8) {
203 if depth > MAX_DEPTH {
204 a.truncated = true;
205 return;
206 }
207 use ast::CompoundCommand::*;
208 match compound {
209 BraceGroup(g) => collect_compound_list(&g.list, a, depth),
210 Subshell(s) => collect_compound_list(&s.list, a, depth),
211 ForClause(f) => collect_compound_list(&f.body.list, a, depth),
212 WhileClause(w) | UntilClause(w) => {
214 collect_compound_list(&w.0, a, depth);
215 collect_compound_list(&w.1.list, a, depth);
216 }
217 IfClause(i) => {
218 collect_compound_list(&i.condition, a, depth);
219 collect_compound_list(&i.then, a, depth);
220 if let Some(elses) = &i.elses {
221 for e in elses {
222 if let Some(cond) = &e.condition {
223 collect_compound_list(cond, a, depth);
224 }
225 collect_compound_list(&e.body, a, depth);
226 }
227 }
228 }
229 CaseClause(c) => {
230 for item in &c.cases {
231 if let Some(cmds) = &item.cmd {
232 collect_compound_list(cmds, a, depth);
233 }
234 }
235 }
236 Arithmetic(_) | ArithmeticForClause(_) | Coprocess(_) => {}
237 }
238}
239
240fn collect_simple(sc: &ast::SimpleCommand, a: &mut Analysis, depth: u8) {
241 let mut scan_words: Vec<String> = Vec::new();
244
245 if let Some(prefix) = &sc.prefix {
246 for item in &prefix.0 {
247 match item {
248 ast::CommandPrefixOrSuffixItem::AssignmentWord(_, w) => {
249 scan_words.push(w.value.clone())
250 }
251 ast::CommandPrefixOrSuffixItem::Word(w) => scan_words.push(w.value.clone()),
252 ast::CommandPrefixOrSuffixItem::ProcessSubstitution(_, sub) => {
254 collect_compound_list(&sub.list, a, depth + 1)
255 }
256 _ => {}
257 }
258 }
259 }
260
261 let is_shell = sc
263 .word_or_name
264 .as_ref()
265 .map(|n| {
266 matches!(
267 basename(&n.value),
268 "sh" | "bash" | "zsh" | "dash" | "ash" | "ksh"
269 )
270 })
271 .unwrap_or(false);
272
273 let mut args = Vec::new();
274 if let Some(suffix) = &sc.suffix {
275 for item in &suffix.0 {
276 match item {
277 ast::CommandPrefixOrSuffixItem::Word(w) => args.push(w.value.clone()),
278 ast::CommandPrefixOrSuffixItem::ProcessSubstitution(_, sub) => {
281 collect_compound_list(&sub.list, a, depth + 1)
282 }
283 ast::CommandPrefixOrSuffixItem::IoRedirect(io) => {
284 if let ast::IoRedirect::File(
287 _,
288 _,
289 ast::IoFileRedirectTarget::ProcessSubstitution(_, sub),
290 ) = io
291 {
292 collect_compound_list(&sub.list, a, depth + 1);
293 }
294 if is_shell {
296 let body = match io {
297 ast::IoRedirect::HereDocument(_, hd) => Some(hd.doc.value.clone()),
298 ast::IoRedirect::HereString(_, w) => {
301 Some(w.value.trim_matches(['"', '\'']).to_string())
302 }
303 _ => None,
304 };
305 if let Some(body) = body {
306 if let Some(inner) = parse_program(&body) {
307 collect_program(&inner, a, depth + 1);
308 }
309 }
310 }
311 }
312 _ => {}
313 }
314 }
316 }
317
318 if let Some(name) = &sc.word_or_name {
319 scan_words.push(name.value.clone());
320 }
321 for arg in &args {
322 scan_words.push(arg.clone());
323 }
324
325 for word in &scan_words {
329 for sub in command_substitutions(word) {
330 if let Some(inner) = parse_program(&sub) {
331 collect_program(&inner, a, depth + 1);
332 }
333 a.substitutions.push(sub);
334 }
335 }
336
337 if let Some(name) = &sc.word_or_name {
340 a.commands.push(SimpleCmd {
341 program: name.value.clone(),
342 args,
343 });
344 }
345}
346
347fn command_substitutions(word: &str) -> Vec<String> {
351 let mut subs = Vec::new();
352 let bytes = word.as_bytes();
353 let mut i = 0;
354 let mut in_single = false;
355 while i < bytes.len() {
356 let c = bytes[i] as char;
357 if c == '\'' {
358 in_single = !in_single;
359 i += 1;
360 continue;
361 }
362 if in_single {
363 i += 1;
364 continue;
365 }
366 if c == '$' && i + 1 < bytes.len() && bytes[i + 1] == b'(' {
368 let start = i + 2;
369 let mut depth = 1;
370 let mut j = start;
371 while j < bytes.len() && depth > 0 {
372 match bytes[j] {
373 b'(' => depth += 1,
374 b')' => depth -= 1,
375 _ => {}
376 }
377 j += 1;
378 }
379 if depth == 0 {
380 subs.push(word[start..j - 1].to_string());
381 i = j;
382 continue;
383 }
384 }
385 if c == '`' {
387 if let Some(end) = word[i + 1..].find('`') {
388 subs.push(word[i + 1..i + 1 + end].to_string());
389 i = i + 1 + end + 1;
390 continue;
391 }
392 }
393 i += 1;
394 }
395 subs
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 fn progs(raw: &str) -> Vec<String> {
403 ast_commands(raw)
404 .unwrap_or_default()
405 .into_iter()
406 .map(|c| c.program)
407 .collect()
408 }
409
410 #[test]
411 fn flattens_pipelines_lists_and_separators() {
412 let p = progs("cd build && rm -rf ../dist; echo a | sh");
413 assert!(p.contains(&"cd".to_string()));
414 assert!(p.contains(&"rm".to_string()));
415 assert!(p.contains(&"echo".to_string()));
416 assert!(p.contains(&"sh".to_string()));
417 }
418
419 #[test]
420 fn recurses_command_substitution_and_backticks() {
421 assert!(progs("echo \"$(rm -rf /)\"").contains(&"rm".to_string()));
423 assert!(progs("x=`git push --force`").contains(&"git".to_string()));
424 assert!(progs("echo $( echo $(terraform destroy) )").contains(&"terraform".to_string()));
426 }
427
428 #[test]
429 fn single_quotes_are_not_substitutions() {
430 let p = progs("echo '$(rm -rf /)'");
432 assert!(p.contains(&"echo".to_string()));
433 assert!(
434 !p.contains(&"rm".to_string()),
435 "single-quoted is literal: {p:?}"
436 );
437 }
438
439 #[test]
440 fn descends_into_compounds() {
441 assert!(progs("if true; then rm -rf /; fi").contains(&"rm".to_string()));
442 assert!(progs("( cd x && git push --force )").contains(&"git".to_string()));
443 }
444
445 #[test]
446 fn descends_into_process_substitution() {
447 assert!(progs("grep x <(rm -rf /)").contains(&"rm".to_string()));
449 assert!(progs("diff <(git push --force) /dev/null").contains(&"git".to_string()));
450 assert!(progs("echo hi > >(rm -rf /)").contains(&"rm".to_string()));
452 }
453
454 #[test]
455 fn descends_into_function_bodies() {
456 assert!(progs("f(){ rm -rf /; }; f").contains(&"rm".to_string()));
458 assert!(progs("function g { git push --force; }; g").contains(&"git".to_string()));
459 }
460
461 #[test]
462 fn deep_nesting_is_refused_not_aborted() {
463 let bomb = format!("echo {}rm -rf /{}", "$(".repeat(300), ")".repeat(300));
466 assert!(analyze(&bomb).is_none(), "deep nesting must be refused");
467 }
468
469 #[test]
470 fn moderate_nesting_is_fully_walked() {
471 let nested = format!("echo {}rm -rf /{}", "$(".repeat(12), ")".repeat(12));
473 assert!(progs(&nested).contains(&"rm".to_string()));
474 }
475
476 #[test]
477 fn backtick_and_keyword_bombs_are_refused() {
478 let backticks: String = "`".repeat(MAX_NESTING + 5);
481 assert!(analyze(&backticks).is_none());
482 let keywords = "if true; then ".repeat(MAX_NESTING + 5);
483 assert!(analyze(&keywords).is_none());
484 }
485
486 #[test]
487 fn heredoc_bodies_are_conservatively_surfaced() {
488 let p = progs("cat <<EOF\nrm -rf /\nEOF\n");
493 assert!(
494 p.contains(&"rm".to_string()),
495 "body must be surfaced: {p:?}"
496 );
497 }
498
499 #[test]
500 fn unparseable_is_none() {
501 assert!(ast_commands("echo 'unterminated").is_none());
503 }
504
505 #[test]
506 fn args_are_captured() {
507 let cmds = ast_commands("rm -rf build").unwrap();
508 let rm = cmds.iter().find(|c| c.program == "rm").unwrap();
509 assert!(rm.args.iter().any(|a| a == "-rf"));
510 assert!(rm.args.iter().any(|a| a == "build"));
511 }
512}