#[cfg(all(test, feature = "semantic-bash"))]
mod tests {
use crate::semantic::adapters::bash::{parse_bash_segments, parse_bash_segments_full};
#[test]
fn test_simple_command() {
let segments = parse_bash_segments("cargo test --all");
assert_eq!(segments, vec!["cargo test --all"]);
}
#[test]
fn test_double_ampersand_splits() {
let segments = parse_bash_segments("cargo test && echo done");
assert_eq!(segments, vec!["cargo test", "echo done"]);
}
#[test]
fn test_semicolon_splits() {
let segments = parse_bash_segments("echo a; echo b");
assert_eq!(segments, vec!["echo a", "echo b"]);
}
#[test]
fn test_pipe_splits() {
let segments = parse_bash_segments("cat file | grep foo | wc -l");
assert_eq!(segments, vec!["cat file", "grep foo", "wc -l"]);
}
#[test]
fn test_mixed_separators() {
let segments = parse_bash_segments("a && b | c");
assert_eq!(segments, vec!["a", "b", "c"]);
}
#[test]
fn test_command_substitution_is_complex() {
let (segments, complex) = parse_bash_segments_full("echo $(rm -rf /)").unwrap();
assert!(complex, "command substitution should be marked complex");
assert_eq!(segments.len(), 1);
}
#[test]
fn test_single_quotes_are_safe() {
let (segments, complex) = parse_bash_segments_full("echo 'safe $(not expanded)'").unwrap();
assert!(!complex, "single quotes should not trigger complex");
assert_eq!(segments.len(), 1);
}
#[test]
fn test_double_quotes_are_complex() {
let (_segments, complex) =
parse_bash_segments_full("echo \"dangerous $(expanded)\"").unwrap();
assert!(complex, "double quotes with substitution should be complex");
}
#[test]
fn test_git_commands_parse() {
let segments = parse_bash_segments("git diff --staged && git status");
assert_eq!(segments, vec!["git diff --staged", "git status"]);
}
#[test]
fn test_parse_error_fallback() {
let segments = parse_bash_segments("for i in");
assert_eq!(segments.len(), 1);
assert_eq!(segments[0], "for i in");
}
#[test]
fn test_brace_group_recurses_into_commands() {
let segments = parse_bash_segments("{ echo a; rm -rf /tmp/x; }");
assert!(
segments.iter().any(|s| s.starts_with("echo")),
"got: {segments:?}"
);
assert!(
segments.iter().any(|s| s.starts_with("rm")),
"got: {segments:?}"
);
assert!(
!segments.iter().any(|s| s.contains("{ echo")),
"got: {segments:?}"
);
}
#[test]
fn test_if_statement_recurses_into_body() {
let segments = parse_bash_segments("if true; then rm /tmp/x; echo done; fi");
assert!(
segments.iter().any(|s| s.starts_with("rm")),
"got: {segments:?}"
);
assert!(
segments.iter().any(|s| s.starts_with("echo")),
"got: {segments:?}"
);
}
#[test]
fn test_while_loop_recurses() {
let segments = parse_bash_segments("while true; do rm -rf /tmp/x; done");
assert!(
segments.iter().any(|s| s.starts_with("rm")),
"got: {segments:?}"
);
}
#[test]
fn test_for_loop_recurses() {
let segments = parse_bash_segments("for f in a b c; do rm $f; done");
assert!(
segments.iter().any(|s| s.starts_with("rm")),
"got: {segments:?}"
);
}
#[test]
fn test_case_statement_recurses() {
let segments = parse_bash_segments("case $x in foo) rm /tmp/x;; bar) echo b;; esac");
assert!(
segments.iter().any(|s| s.starts_with("rm")),
"got: {segments:?}"
);
assert!(
segments.iter().any(|s| s.starts_with("echo")),
"got: {segments:?}"
);
}
use crate::semantic::adapters::bash::extract_redirect_targets;
#[test]
fn extract_redirect_targets_output_redirect() {
let t = extract_redirect_targets("echo pwned > /etc/something");
assert_eq!(t, vec!["/etc/something".to_string()]);
}
#[test]
fn extract_redirect_targets_append() {
let t = extract_redirect_targets("echo line >> /var/log/foo");
assert_eq!(t, vec!["/var/log/foo".to_string()]);
}
#[test]
fn extract_redirect_targets_multiple() {
let t = extract_redirect_targets("rustc src.rs > out.log 2> err.log");
assert!(t.contains(&"out.log".to_string()), "got: {t:?}");
assert!(t.contains(&"err.log".to_string()), "got: {t:?}");
}
#[test]
fn extract_redirect_targets_strips_quotes() {
let t = extract_redirect_targets("echo x > \"/tmp/with spaces\"");
assert_eq!(t, vec!["/tmp/with spaces".to_string()]);
}
#[test]
fn extract_redirect_targets_no_redirects() {
assert!(extract_redirect_targets("echo hello").is_empty());
assert!(extract_redirect_targets("cargo test --all").is_empty());
}
#[test]
fn extract_redirect_targets_heredoc_skipped() {
let t = extract_redirect_targets("cat <<EOF\nhi\nEOF");
assert!(t.is_empty(), "got: {t:?}");
}
#[test]
fn extract_redirect_targets_skips_fd_duplication() {
assert!(extract_redirect_targets("cargo test 2>&1").is_empty());
assert!(extract_redirect_targets("cmd >&2").is_empty());
assert!(extract_redirect_targets("cmd 1>&2").is_empty());
}
#[test]
fn test_redirected_statement_recurses_to_inner_command() {
let segments = parse_bash_segments("echo pwned > /etc/something");
assert!(
segments.iter().any(|s| s.starts_with("echo")),
"got: {segments:?}"
);
assert!(
!segments.iter().any(|s| s.contains("/etc/something")),
"segment should NOT include the redirect target; got: {segments:?}"
);
}
}
#[allow(dead_code)]
pub fn extract_redirect_targets(command: &str) -> Vec<String> {
#[cfg(feature = "semantic-bash")]
{
use tree_sitter::Parser;
let lang: tree_sitter::Language = tree_sitter_bash::LANGUAGE.into();
let mut parser = Parser::new();
if parser.set_language(&lang).is_err() {
return Vec::new();
}
let Some(tree) = parser.parse(command, None) else {
return Vec::new();
};
if tree.root_node().has_error() {
return Vec::new();
}
let mut targets = Vec::new();
collect_redirect_targets(tree.root_node(), command.as_bytes(), &mut targets);
targets
}
#[cfg(not(feature = "semantic-bash"))]
{
let _ = command;
Vec::new()
}
}
#[cfg(feature = "semantic-bash")]
pub fn extract_mutation_paths(command: &str) -> Vec<String> {
const FILE_MUTATORS: &[&str] = &[
"rm", "cp", "mv", "mkdir", "rmdir", "touch", "chmod", "chown", "ln", "tee", "dd",
];
use tree_sitter::Parser;
let lang: tree_sitter::Language = tree_sitter_bash::LANGUAGE.into();
let mut parser = Parser::new();
if parser.set_language(&lang).is_err() {
return Vec::new();
}
let Some(tree) = parser.parse(command, None) else {
return Vec::new();
};
if tree.root_node().has_error() {
return Vec::new();
}
let mut paths = Vec::new();
collect_mutation_paths(
tree.root_node(),
command.as_bytes(),
FILE_MUTATORS,
&mut paths,
);
paths
}
#[cfg(feature = "semantic-bash")]
fn collect_mutation_paths(
node: tree_sitter::Node,
source: &[u8],
mutators: &[&str],
out: &mut Vec<String>,
) {
if node.kind() == "command" {
let mut head: Option<String> = None;
let mut args: Vec<String> = Vec::new();
for i in 0..node.named_child_count() {
let Some(child) = node.named_child(i) else {
continue;
};
if child.kind() == "file_redirect"
|| child.kind() == "heredoc_redirect"
|| child.kind() == "herestring_redirect"
{
continue;
}
let text = match child.utf8_text(source) {
Ok(t) => t.trim().to_string(),
Err(_) => continue,
};
if head.is_none() {
head = Some(text);
continue;
}
if text.starts_with('-') {
continue;
}
if matches!(head.as_deref(), Some("chmod")) && text.starts_with('+') {
continue;
}
args.push(unquote_simple(&text));
}
if let Some(h) = head {
let basename = std::path::Path::new(&h)
.file_name()
.and_then(|f| f.to_str())
.unwrap_or(h.as_str());
if mutators.contains(&basename) {
let path_args: &[String] = if matches!(basename, "chmod" | "chown") {
args.get(1..).unwrap_or(&[])
} else {
&args
};
for p in path_args {
if !p.is_empty() {
out.push(p.clone());
}
}
}
}
return;
}
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i) {
collect_mutation_paths(child, source, mutators, out);
}
}
}
#[cfg(feature = "semantic-bash")]
fn collect_redirect_targets(node: tree_sitter::Node, source: &[u8], out: &mut Vec<String>) {
match node.kind() {
"file_redirect" => {
for i in (0..node.named_child_count()).rev() {
if let Some(child) = node.named_child(i) {
if let Ok(text) = child.utf8_text(source) {
let trimmed = unquote_simple(text.trim());
if !trimmed.is_empty()
&& !trimmed.starts_with("&")
&& !trimmed.chars().all(|c| c.is_ascii_digit())
{
out.push(trimmed);
}
}
break;
}
}
}
"heredoc_redirect" | "herestring_redirect" => {}
_ => {
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i) {
collect_redirect_targets(child, source, out);
}
}
}
}
}
#[cfg(feature = "semantic-bash")]
fn unquote_simple(s: &str) -> String {
let bytes = s.as_bytes();
if bytes.len() >= 2
&& ((bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"')
|| (bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\''))
{
return s[1..s.len() - 1].to_string();
}
s.to_string()
}
#[allow(dead_code)]
pub fn parse_bash_segments(command: &str) -> Vec<String> {
parse_bash_segments_full(command)
.map(|(segs, _)| segs)
.unwrap_or_else(|_| vec![command.to_string()])
}
pub fn parse_bash_segments_full(command: &str) -> Result<(Vec<String>, bool), String> {
#[cfg(feature = "semantic-bash")]
{
use tree_sitter::Parser;
let lang: tree_sitter::Language = tree_sitter_bash::LANGUAGE.into();
let mut parser = Parser::new();
parser
.set_language(&lang)
.map_err(|e| format!("Failed to set bash language: {e}"))?;
let tree = parser
.parse(command, None)
.ok_or("Failed to parse bash command")?;
let root = tree.root_node();
let source = command.as_bytes();
let mut segments = Vec::new();
let mut is_complex = false;
if has_complex_constructs(root) {
is_complex = true;
segments.push(command.to_string());
return Ok((segments, is_complex));
}
if root.has_error() {
segments.push(command.to_string());
return Ok((segments, is_complex));
}
collect_segments(root, source, &mut segments);
if segments.is_empty() {
segments.push(command.to_string());
}
Ok((segments, is_complex))
}
#[cfg(not(feature = "semantic-bash"))]
{
Ok((vec![command.to_string()], false))
}
}
#[cfg(feature = "semantic-bash")]
fn has_complex_constructs(node: tree_sitter::Node) -> bool {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"command_substitution"
| "process_substitution"
| "subshell"
| "arithmetic_expansion" => return true,
_ => {
if has_complex_constructs(child) {
return true;
}
}
}
}
}
false
}
#[cfg(feature = "semantic-bash")]
fn collect_segments(node: tree_sitter::Node, source: &[u8], out: &mut Vec<String>) {
match node.kind() {
"program" | "list" => {
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i) {
collect_segments(child, source, out);
}
}
}
"pipeline" => {
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i) {
if child.kind() == "redirected_statement"
|| child.kind() == "compound_statement"
|| child.kind() == "if_statement"
|| child.kind() == "while_statement"
|| child.kind() == "for_statement"
|| child.kind() == "case_statement"
|| child.kind() == "function_definition"
|| child.kind() == "c_style_for_statement"
{
collect_segments(child, source, out);
} else {
let text = child.utf8_text(source).unwrap_or("").trim().to_string();
if !text.is_empty() {
out.push(text);
}
}
}
}
}
"compound_statement"
| "if_statement"
| "while_statement"
| "for_statement"
| "case_statement"
| "function_definition"
| "c_style_for_statement"
| "case_item"
| "elif_clause"
| "else_clause"
| "do_group" => {
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i) {
collect_segments(child, source, out);
}
}
}
"redirected_statement" => {
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i) {
match child.kind() {
"command" | "pipeline" | "compound_statement" | "subshell" => {
collect_segments(child, source, out);
}
_ => {} }
}
}
}
"command" => {
let text = node.utf8_text(source).unwrap_or("").trim().to_string();
if !text.is_empty() {
out.push(text);
}
}
_ => {
for i in 0..node.named_child_count() {
if let Some(child) = node.named_child(i) {
collect_segments(child, source, out);
}
}
}
}
}