use ai_sandbox::{
execpolicy::{Decision, Policy, RuleType},
sandboxing::{
NetworkSandboxPolicy, SandboxCommand, SandboxManager, SandboxPolicy, SandboxPolicyExt,
},
};
use std::collections::HashMap;
use std::ffi::OsString;
use std::path::PathBuf;
#[derive(Debug)]
struct TestResult {
name: String,
passed: bool,
details: String,
}
impl TestResult {
fn new(name: &str, passed: bool, details: String) -> Self {
Self {
name: name.to_string(),
passed,
details: details.to_string(),
}
}
}
fn is_attack_blocked(result: &Option<ai_sandbox::execpolicy::RuleMatch>) -> bool {
match result {
None => false, Some(rm) => rm.decision == Decision::Deny, }
}
fn main() {
println!("=== Black-box Destructive Testing for AI Sandbox ===\n");
let mut results: Vec<TestResult> = Vec::new();
println!("[Category 1] Execution Policy Bypass Tests\n");
results.push(test_case_sensitivity_bypass());
results.push(test_whitespace_bypass());
results.push(test_empty_command_bypass());
results.push(test_very_long_command_bypass());
results.push(test_multiple_spaces_bypass());
results.push(test_null_byte_bypass());
results.push(test_unicode_homoglyph_bypass());
results.push(test_alternative_separator_bypass());
results.push(test_newline_injection_bypass());
results.push(test_tab_injection_bypass());
println!("\n[Category 2] Path Traversal Bypass Tests\n");
results.push(test_double_url_encoding_bypass());
results.push(test_unicode_fullwidth_bypass());
results.push(test_mixed_case_path_bypass());
results.push(test_symlink_traversal_bypass());
results.push(test_null_terminated_path_bypass());
println!("\n[Category 3] Environment Variable Bypass Tests\n");
results.push(test_lowercase_export_bypass());
results.push(test_env_command_bypass());
results.push(test_quoted_env_bypass());
results.push(test_multiple_env_bypass());
println!("\n[Category 4] Sandbox Policy Bypass Tests\n");
results.push(test_default_policy_danger());
results.push(test_empty_workspace_paths());
results.push(test_long_path_policy_bypass());
results.push(test_special_char_path_bypass());
println!("\n[Category 5] Logic Flaw Tests\n");
results.push(test_wildcard_exploitation());
results.push(test_priority_inversion());
results.push(test_regex_pattern_bypass());
results.push(test_empty_arglist_bypass());
println!("\n[Category 6] Advanced Attack Vectors\n");
results.push(test_whitelist_mode_bypass());
results.push(test_directory_restriction_bypass());
results.push(test_shell_escape_allowed_command());
results.push(test_path_normalization_bypass());
results.push(test_set_command_bypass());
results.push(test_indirect_command_bypass());
results.push(test_alias_expansion_bypass());
results.push(test_subshell_bypass());
results.push(test_heredoc_bypass());
results.push(test_process_substitution_bypass());
println!("\n=== TEST SUMMARY ===\n");
let total = results.len();
let passed = results.iter().filter(|r| r.passed).count();
let failed = total - passed;
println!("Total tests: {}", total);
println!("Passed (vulnerability found): {}", passed);
println!("Passed (secure): {}", failed);
println!("\n=== VULNERABILITIES FOUND ===\n");
for r in results.iter().filter(|r| r.passed) {
println!("[VULN] {}: {}", r.name, &r.details);
}
println!("\n=== TESTS THAT BLOCKED ATTACKS ===\n");
for r in results.iter().filter(|r| !r.passed) {
println!("[SECURE] {}: {}", r.name, &r.details);
}
}
fn test_case_sensitivity_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/etc/passwd".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["CAT".to_string(), "/etc/passwd".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Case Sensitivity Bypass",
!blocked, if blocked {
"Case-insensitive matching works correctly - attack blocked".to_string()
} else {
"Uppercase command 'CAT' bypassed the rule for 'cat'".to_string()
},
)
}
fn test_whitespace_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["rm".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&[" rm".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Whitespace Bypass",
!blocked,
if blocked {
"Whitespace is handled correctly - attack blocked".to_string()
} else {
"Leading whitespace bypassed the deny rule".to_string()
},
)
}
fn test_empty_command_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&[]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Empty Command Bypass",
!blocked,
if blocked {
"Empty command is handled - attack blocked".to_string()
} else {
"Empty command list returns None (potential bypass)".to_string()
},
)
}
fn test_very_long_command_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string()], Decision::Deny, None)
.unwrap();
let long_ls = "ls".to_string() + &"x".repeat(10000);
let result = policy.check(&[long_ls]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Very Long Command Bypass",
!blocked, if blocked {
"Long commands are handled - attack blocked".to_string()
} else {
"Very long command name bypassed the rule".to_string()
},
)
}
fn test_multiple_spaces_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&["ls".to_string(), " ".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Multiple Spaces Bypass",
!blocked,
if blocked {
"Multiple spaces are handled - attack blocked".to_string()
} else {
"Multiple spaces in argument bypassed the rule".to_string()
},
)
}
fn test_null_byte_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/etc/passwd".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["cat".to_string(), "/etc/pass\x00wd".to_string()]);
TestResult::new(
"Null Byte Bypass",
result.is_none(),
if result.is_none() {
"Null byte in path might bypass detection".to_string()
} else {
"Null byte is handled".to_string()
},
)
}
fn test_unicode_homoglyph_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/etc/passwd".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["cat".to_string(), "/еtc/passwd".to_string()]);
TestResult::new(
"Unicode Homoglyph Bypass",
result.is_none(),
if result.is_none() {
"Unicode homoglyph might bypass path check".to_string()
} else {
"Unicode is handled correctly".to_string()
},
)
}
fn test_alternative_separator_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["curl".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&["curl".to_string(), "http://evil.com".to_string()]);
TestResult::new(
"Alternative Separator Bypass",
result.is_none(),
if result.is_none() {
"Alternative separator might bypass".to_string()
} else {
"Separators are handled".to_string()
},
)
}
fn test_newline_injection_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&["ls\n".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Newline Injection Bypass",
!blocked,
if blocked {
"Newline is handled - attack blocked".to_string()
} else {
"Newline in command might bypass".to_string()
},
)
}
fn test_tab_injection_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&["ls\t".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Tab Injection Bypass",
!blocked,
if blocked {
"Tab is handled - attack blocked".to_string()
} else {
"Tab in command might bypass".to_string()
},
)
}
fn test_double_url_encoding_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/etc/passwd".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["cat".to_string(), "%252e%252e%252fetc/passwd".to_string()]);
TestResult::new(
"Double URL Encoding Bypass",
result.is_none(),
if result.is_none() {
"Double URL encoding might bypass path traversal detection".to_string()
} else {
"Double encoding is handled".to_string()
},
)
}
fn test_unicode_fullwidth_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["ls".to_string(), "/root".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["ls".to_string(), "/root\u{FF0F}..".to_string()]);
TestResult::new(
"Unicode Fullwidth Bypass",
result.is_none(),
if result.is_none() {
"Fullwidth characters might bypass path check".to_string()
} else {
"Fullwidth is handled".to_string()
},
)
}
fn test_mixed_case_path_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/etc/passwd".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["cat".to_string(), "/ETC/PASSWD".to_string()]);
TestResult::new(
"Mixed Case Path Bypass",
result.is_none(),
if result.is_none() {
"Case-insensitive path might bypass".to_string()
} else {
"Path case is handled".to_string()
},
)
}
fn test_symlink_traversal_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
let result = policy.check(&["cat".to_string(), "symlink_to_etc".to_string()]);
TestResult::new(
"Symlink Traversal Bypass",
result.is_none(),
"Symlink resolution not performed by policy engine (expected)".to_string(),
)
}
fn test_null_terminated_path_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/etc/passwd".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["cat".to_string(), "/etc/passwd\x00".to_string()]);
TestResult::new(
"Null-Terminated Path Bypass",
result.is_none(),
if result.is_none() {
"Null-terminated path might bypass".to_string()
} else {
"Null-terminated path is handled".to_string()
},
)
}
fn test_lowercase_export_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&["export".to_string(), "PATH=/evil".to_string()]);
TestResult::new(
"Lowercase Export Bypass",
result.is_none(),
if result.is_none() {
"Lowercase export might bypass detection".to_string()
} else {
"Lowercase export is handled".to_string()
},
)
}
fn test_env_command_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&["env".to_string(), "PATH=/evil".to_string()]);
TestResult::new(
"Env Command Bypass",
result.is_none(),
if result.is_none() {
"env command manipulation might bypass".to_string()
} else {
"env command is handled".to_string()
},
)
}
fn test_quoted_env_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&["export".to_string(), "\"PATH=/evil\"".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Quoted Env Bypass",
!blocked, if blocked {
"Quoted env is handled".to_string()
} else {
"Quoted environment variable might bypass".to_string()
},
)
}
fn test_multiple_env_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&[
"bash".to_string(),
"-c".to_string(),
"export A=1;export B=2;cat /etc/passwd".to_string(),
]);
TestResult::new(
"Multiple Env Bypass",
result.is_none(),
if result.is_none() {
"Multiple env in command string might bypass".to_string()
} else {
"Multiple env is handled".to_string()
},
)
}
fn test_default_policy_danger() -> TestResult {
let policy = SandboxPolicy::default();
let is_dangerous = matches!(policy, SandboxPolicy::DangerFullAccess);
TestResult::new(
"Default Policy Danger",
!is_dangerous, if is_dangerous {
"Default policy is DangerFullAccess - insecure by default!".to_string()
} else {
"Default policy is secure (ReadOnly)".to_string()
},
)
}
fn test_empty_workspace_paths() -> TestResult {
use ai_sandbox::sandboxing::FileSystemSandboxPolicy;
let policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![],
network_access: NetworkSandboxPolicy::NoAccess,
};
let fs_policy = policy.filesystem_policy();
let is_safe = !matches!(fs_policy, FileSystemSandboxPolicy::WorkspaceWrite { writable_roots } if writable_roots.is_empty());
TestResult::new(
"Empty Workspace Paths",
is_safe, if is_safe {
"Empty writable_roots is handled safely (downgraded to ReadOnly)".to_string()
} else {
"Empty writable_roots accepted - might allow writing anywhere!".to_string()
},
)
}
fn test_long_path_policy_bypass() -> TestResult {
let manager = SandboxManager::new();
let command = SandboxCommand {
program: OsString::from("ls"),
args: vec![],
cwd: PathBuf::from("/tmp"),
env: HashMap::new(),
};
let long_path = "/".repeat(10000);
let policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![PathBuf::from(&long_path)],
network_access: NetworkSandboxPolicy::NoAccess,
};
let is_safe = SandboxPolicyExt::is_safe(&policy);
let _result = manager.create_exec_request(command, policy);
TestResult::new(
"Long Path Policy Bypass",
!is_safe, if is_safe {
"Long path is handled safely (rejected)".to_string()
} else {
"Long path was rejected as unsafe".to_string()
},
)
}
fn test_special_char_path_bypass() -> TestResult {
let manager = SandboxManager::new();
let command = SandboxCommand {
program: OsString::from("ls"),
args: vec![],
cwd: PathBuf::from("/tmp"),
env: HashMap::new(),
};
let policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![PathBuf::from("/tmp/../etc")],
network_access: NetworkSandboxPolicy::NoAccess,
};
let is_safe = SandboxPolicyExt::is_safe(&policy);
let _result = manager.create_exec_request(command, policy);
TestResult::new(
"Special Char Path Bypass",
!is_safe, if is_safe {
"Special char path is handled safely (rejected)".to_string()
} else {
"Special char path was rejected as unsafe".to_string()
},
)
}
fn test_wildcard_exploitation() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string(), "*".to_string()], Decision::Allow, None)
.unwrap();
let result = policy.check(&["ls".to_string(), "subdir".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Wildcard Exploitation",
blocked,
if blocked {
"Wildcard works as expected".to_string()
} else {
"Wildcard rule might not match literal arguments".to_string()
},
)
}
fn test_priority_inversion() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Deny, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/tmp/file.txt".to_string()],
Decision::Allow,
None,
)
.unwrap();
let result = policy.check(&["cat".to_string(), "/tmp/file.txt".to_string()]);
let blocked = is_attack_blocked(&result);
TestResult::new(
"Priority Inversion",
blocked,
if blocked {
"Rule priority is correct".to_string()
} else {
"Allow rule takes precedence over deny - might be exploitable!".to_string()
},
)
}
fn test_regex_pattern_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "file.txt".to_string()],
Decision::Deny,
None,
)
.unwrap();
let result = policy.check(&["cat".to_string(), "file.tx".to_string()]);
TestResult::new(
"Regex Pattern Bypass",
result.is_none(),
if result.is_none() {
"Similar but different filename bypassed".to_string()
} else {
"Pattern matching works".to_string()
},
)
}
fn test_empty_arglist_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&["ls".to_string()]);
TestResult::new(
"Empty Argument List Bypass",
result.is_none(),
if result.is_none() {
"Empty args bypassed deny rule".to_string()
} else {
"Empty args handled correctly".to_string()
},
)
}
fn test_whitelist_mode_bypass() -> TestResult {
let mut policy = Policy::new();
policy.set_whitelist_mode(true);
let result = policy.check(&["cat".to_string(), "/etc/passwd".to_string()]);
TestResult::new(
"Whitelist Mode Bypass",
result.is_none(),
if result.is_none() {
"Whitelist mode - unlisted command returned None (not explicitly denied)".to_string()
} else {
"Whitelist mode properly denies unlisted commands".to_string()
},
)
}
fn test_directory_restriction_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule_ext(
&["cat".to_string()],
Decision::Allow,
None,
RuleType::Whitelist,
Some(vec!["/tmp".to_string()]),
true,
)
.unwrap();
let result = policy.check_with_cwd(
&["cat".to_string(), "/etc/passwd".to_string()],
Some("/tmp"),
);
TestResult::new(
"Directory Restriction Bypass",
result.is_none(),
if result.is_none() {
"Directory restriction might be bypassable".to_string()
} else {
"Directory restriction is enforced".to_string()
},
)
}
fn test_shell_escape_allowed_command() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["find".to_string()], Decision::Allow, None)
.unwrap();
let result = policy.check(&[
"find".to_string(),
"/tmp".to_string(),
"-exec".to_string(),
"cat".to_string(),
"/etc/passwd".to_string(),
";".to_string(),
]);
TestResult::new(
"Shell Escape via Allowed Command",
result.is_none(),
if result.is_none() {
"Shell escape via -exec might bypass".to_string()
} else {
"Shell escape is blocked".to_string()
},
)
}
fn test_path_normalization_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["cat".to_string()], Decision::Allow, None)
.unwrap();
policy
.add_prefix_rule(
&["cat".to_string(), "/etc/passwd".to_string()],
Decision::Deny,
None,
)
.unwrap();
let paths = vec![
"/./etc/passwd",
"/etc/./passwd",
"/etc/../etc/passwd",
"/tmp/../etc/passwd",
];
let mut bypassed = false;
for path in paths {
let result = policy.check(&["cat".to_string(), path.to_string()]);
if result.is_none() {
bypassed = true;
break;
}
}
TestResult::new(
"Path Normalization Bypass",
bypassed,
if bypassed {
"Path normalization can be bypassed".to_string()
} else {
"Path normalization is handled".to_string()
},
)
}
fn test_set_command_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&["set".to_string(), "PATH=/evil".to_string()]);
TestResult::new(
"Set Command Bypass",
result.is_none(),
if result.is_none() {
"set command might bypass env restrictions".to_string()
} else {
"set command is handled".to_string()
},
)
}
fn test_indirect_command_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["python3".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&[
"python3".to_string(),
"-c".to_string(),
"import os; os.system('cat /etc/passwd')".to_string(),
]);
TestResult::new(
"Indirect Command Bypass",
result.is_none(),
if result.is_none() {
"Indirect command execution might bypass".to_string()
} else {
"Indirect execution is handled".to_string()
},
)
}
fn test_alias_expansion_bypass() -> TestResult {
let mut policy = Policy::new();
policy
.add_prefix_rule(&["ls".to_string()], Decision::Deny, None)
.unwrap();
let result = policy.check(&["ls".to_string(), "--color=auto".to_string()]);
TestResult::new(
"Alias Expansion Bypass",
result.is_none(),
if result.is_none() {
"Alias-like arguments might bypass".to_string()
} else {
"Alias expansion is handled".to_string()
},
)
}
fn test_subshell_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&[
"sh".to_string(),
"-c".to_string(),
"cat /etc/passwd".to_string(),
]);
TestResult::new(
"Subshell Bypass",
result.is_none(),
if result.is_none() {
"Subshell execution might bypass".to_string()
} else {
"Subshell is handled".to_string()
},
)
}
fn test_heredoc_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&[
"sh".to_string(),
"-c".to_string(),
"cat <<EOF\n#!/bin/bash\ncat /etc/passwd\nEOF".to_string(),
]);
TestResult::new(
"Here-document Bypass",
result.is_none(),
if result.is_none() {
"Here-document might bypass".to_string()
} else {
"Here-document is handled".to_string()
},
)
}
fn test_process_substitution_bypass() -> TestResult {
let policy = Policy::new();
let result = policy.check(&[
"sh".to_string(),
"-c".to_string(),
"cat <(cat /etc/passwd)".to_string(),
]);
TestResult::new(
"Process Substitution Bypass",
result.is_none(),
if result.is_none() {
"Process substitution might bypass".to_string()
} else {
"Process substitution is handled".to_string()
},
)
}