use aho_corasick::AhoCorasick;
use smallvec::SmallVec;
use std::borrow::Cow;
use std::ops::Range;
use std::sync::LazyLock;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SpanKind {
Executed,
Argument,
InlineCode,
Data,
HeredocBody,
Unknown,
Comment,
}
impl SpanKind {
#[inline]
#[must_use]
pub const fn requires_pattern_check(self) -> bool {
match self {
Self::Executed | Self::InlineCode | Self::HeredocBody | Self::Unknown => true,
Self::Argument | Self::Data | Self::Comment => false,
}
}
#[inline]
#[must_use]
pub const fn is_safe_data(self) -> bool {
matches!(self, Self::Data | Self::Comment)
}
#[inline]
#[must_use]
pub const fn is_executable(self) -> bool {
matches!(self, Self::Executed | Self::InlineCode | Self::Unknown)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Span {
pub kind: SpanKind,
pub byte_range: Range<usize>,
}
impl Span {
#[inline]
#[must_use]
pub const fn new(kind: SpanKind, start: usize, end: usize) -> Self {
Self {
kind,
byte_range: start..end,
}
}
#[inline]
#[must_use]
pub fn text<'a>(&self, command: &'a str) -> &'a str {
&command[self.byte_range.clone()]
}
#[inline]
#[must_use]
pub const fn len(&self) -> usize {
self.byte_range.end - self.byte_range.start
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
self.byte_range.start == self.byte_range.end
}
}
type SpanVec = SmallVec<[Span; 32]>;
#[derive(Debug, Clone, Default)]
pub struct CommandSpans {
spans: SpanVec,
}
impl CommandSpans {
#[inline]
#[must_use]
pub fn new() -> Self {
Self {
spans: SpanVec::new(),
}
}
#[inline]
pub fn push(&mut self, span: Span) {
self.spans.push(span);
}
#[inline]
#[must_use]
pub fn spans(&self) -> &[Span] {
&self.spans
}
pub fn executable_spans(&self) -> impl Iterator<Item = &Span> {
self.spans
.iter()
.filter(|s| s.kind.requires_pattern_check())
}
pub fn data_spans(&self) -> impl Iterator<Item = &Span> {
self.spans.iter().filter(|s| s.kind.is_safe_data())
}
#[must_use]
pub fn has_executable_content(&self) -> bool {
self.spans.iter().any(|s| s.kind.requires_pattern_check())
}
#[must_use]
pub fn is_all_data(&self) -> bool {
!self.spans.is_empty() && self.spans.iter().all(|s| s.kind.is_safe_data())
}
#[must_use]
pub fn executable_text<'a>(&self, command: &'a str) -> Vec<&'a str> {
self.executable_spans().map(|s| s.text(command)).collect()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TokenizerState {
Normal,
SingleQuote,
DoubleQuote,
CommandSubst, Backtick, Comment, }
pub struct ContextClassifier {
inline_code_commands: &'static [&'static str],
}
impl Default for ContextClassifier {
fn default() -> Self {
Self::new()
}
}
impl ContextClassifier {
#[must_use]
pub const fn new() -> Self {
Self {
inline_code_commands: &[
"bash", "sh", "zsh", "ksh", "dash", "python", "python3", "python2", "node", "nodejs", "ruby", "perl", "php", "lua", ],
}
}
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn classify(&self, command: &str) -> CommandSpans {
let bytes = command.as_bytes();
let len = bytes.len();
if len == 0 {
return CommandSpans::new();
}
let mut spans = CommandSpans::new();
let mut stack = vec![TokenizerState::Normal];
let mut span_start = 0;
let mut current_kind = SpanKind::Executed;
let mut pending_inline_code = false;
let mut last_word_start = 0;
let mut in_command_position = true;
let in_inline_context = |state_stack: &[TokenizerState]| {
state_stack
.iter()
.any(|s| matches!(s, TokenizerState::CommandSubst | TokenizerState::Backtick))
};
let mut i = 0;
while i < len {
let byte = bytes[i];
let Some(current_state) = stack.last().copied() else {
break;
};
if byte == b'\\' && current_state != TokenizerState::SingleQuote {
let effective = !matches!(current_state, TokenizerState::Comment);
if effective {
i += 1; if i < len {
i += 1;
}
continue;
}
}
match current_state {
TokenizerState::Normal => {
match byte {
b'\'' => {
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
in_command_position = false;
}
span_start = i;
stack.push(TokenizerState::SingleQuote);
let inline_here = if pending_inline_code {
pending_inline_code = false;
true
} else if last_word_start < i {
let word = &command[last_word_start..i];
is_inline_code_flag(word)
&& self.check_inline_code_context(
command,
last_word_start,
word,
)
} else {
false
};
current_kind = if inline_here {
SpanKind::InlineCode
} else if in_command_position {
SpanKind::Executed
} else {
SpanKind::Data
};
}
b'"' => {
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
in_command_position = false;
}
span_start = i;
stack.push(TokenizerState::DoubleQuote);
let inline_here = if pending_inline_code {
pending_inline_code = false;
true
} else if last_word_start < i {
let word = &command[last_word_start..i];
is_inline_code_flag(word)
&& self.check_inline_code_context(
command,
last_word_start,
word,
)
} else {
false
};
current_kind = if inline_here {
SpanKind::InlineCode
} else if in_command_position {
SpanKind::Executed
} else {
SpanKind::Argument
};
}
b'$' if i + 1 < len && bytes[i + 1] == b'(' => {
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
}
span_start = i;
i += 1; stack.push(TokenizerState::CommandSubst);
}
b'`' => {
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
}
span_start = i;
stack.push(TokenizerState::Backtick);
}
b'|' | b';' | b'&' => {
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
}
let mut op_len = 1;
if i + 1 < len {
let next = bytes[i + 1];
if (byte == b'|' && next == b'|') || (byte == b'&' && next == b'&')
{
op_len = 2;
}
}
spans.push(Span::new(SpanKind::Executed, i, i + op_len));
i += op_len;
span_start = i;
current_kind = SpanKind::Executed;
pending_inline_code = false;
in_command_position = true;
continue;
}
b'#' => {
if i == 0
|| bytes[i - 1].is_ascii_whitespace()
|| matches!(bytes[i - 1], b'|' | b'&' | b';')
{
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
}
span_start = i;
stack.push(TokenizerState::Comment);
}
}
b' ' | b'\t' | b'\n' => {
if i > last_word_start {
let word = &command[last_word_start..i];
if is_inline_code_flag(word) {
pending_inline_code = self.check_inline_code_context(
command,
last_word_start,
word,
);
}
if in_command_position && !word.is_empty() {
in_command_position = false;
}
}
last_word_start = i + 1;
}
_ => {}
}
}
TokenizerState::DoubleQuote => {
match byte {
b'"' => {
stack.pop();
if !matches!(
stack.last(),
Some(TokenizerState::CommandSubst | TokenizerState::Backtick)
) {
spans.push(Span::new(current_kind, span_start, i + 1));
span_start = i + 1;
if current_kind == SpanKind::Executed {
in_command_position = false;
}
current_kind = SpanKind::Executed;
}
}
b'$' if i + 1 < len && bytes[i + 1] == b'(' => {
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
}
span_start = i;
i += 1; stack.push(TokenizerState::CommandSubst);
}
b'`' => {
if i > span_start {
spans.push(Span::new(current_kind, span_start, i));
}
span_start = i;
stack.push(TokenizerState::Backtick);
}
_ => {}
}
}
TokenizerState::SingleQuote => {
if byte == b'\'' {
stack.pop();
if !matches!(
stack.last(),
Some(TokenizerState::CommandSubst | TokenizerState::Backtick)
) {
spans.push(Span::new(current_kind, span_start, i + 1));
span_start = i + 1;
if current_kind == SpanKind::Executed {
in_command_position = false;
}
current_kind = SpanKind::Executed;
}
}
}
TokenizerState::CommandSubst => {
match byte {
b')' => {
stack.pop();
if !in_inline_context(&stack) {
spans.push(Span::new(SpanKind::InlineCode, span_start, i + 1));
span_start = i + 1;
match stack.last() {
Some(TokenizerState::Normal) => {
current_kind = SpanKind::Executed;
}
Some(TokenizerState::DoubleQuote) => {
current_kind = SpanKind::Argument;
}
_ => {}
}
}
}
b'(' => {
stack.push(TokenizerState::CommandSubst);
}
b'"' => stack.push(TokenizerState::DoubleQuote),
b'\'' => stack.push(TokenizerState::SingleQuote),
b'`' => stack.push(TokenizerState::Backtick),
b'#' => {
if i == 0 || bytes[i - 1].is_ascii_whitespace() {
stack.push(TokenizerState::Comment);
}
}
_ => {}
}
}
TokenizerState::Backtick => {
if byte == b'`' {
stack.pop();
if !in_inline_context(&stack) {
spans.push(Span::new(SpanKind::InlineCode, span_start, i + 1));
span_start = i + 1;
match stack.last() {
Some(TokenizerState::Normal) => {
current_kind = SpanKind::Executed;
}
Some(TokenizerState::DoubleQuote) => {
current_kind = SpanKind::Argument;
}
_ => {}
}
}
}
}
TokenizerState::Comment => {
if byte == b'\n' {
stack.pop();
if matches!(stack.last(), Some(TokenizerState::Normal)) {
spans.push(Span::new(SpanKind::Comment, span_start, i + 1));
span_start = i + 1;
current_kind = SpanKind::Executed;
}
}
}
}
i += 1;
}
if span_start < len {
let kind = match stack.last() {
Some(TokenizerState::Normal) => current_kind,
Some(TokenizerState::DoubleQuote) => {
if current_kind == SpanKind::Argument {
SpanKind::Unknown
} else {
current_kind
}
}
Some(TokenizerState::SingleQuote) | None => SpanKind::Unknown,
Some(TokenizerState::Comment) => SpanKind::Comment,
Some(TokenizerState::CommandSubst | TokenizerState::Backtick) => {
SpanKind::InlineCode
}
};
spans.push(Span::new(kind, span_start, len));
}
spans
}
fn check_inline_code_context(&self, command: &str, flag_start: usize, flag: &str) -> bool {
if flag == "-S" {
return env_split_string_context(command, flag_start);
}
let before = &command[..flag_start];
let segment_start = segment_start_before_flag(command, flag_start);
let segment = &before[segment_start..];
for token in segment.split_whitespace().rev() {
if token.starts_with('-') && token.len() > 1 {
continue;
}
if token.contains('=') {
continue;
}
let token_unquoted = if (token.starts_with('"') && token.ends_with('"'))
|| (token.starts_with('\'') && token.ends_with('\''))
{
if token.len() >= 2 {
&token[1..token.len() - 1]
} else {
token
}
} else {
token
};
let base_name = token_unquoted
.rsplit(&['/', '\\'][..])
.next()
.unwrap_or(token_unquoted);
let base_name = base_name
.strip_suffix(".exe")
.or_else(|| base_name.strip_suffix(".EXE"))
.unwrap_or(base_name);
if matches!(base_name, "sudo" | "time" | "nohup" | "env" | "command") {
continue;
}
let is_interpreter = self.inline_code_commands.iter().any(|&known| {
if base_name == known {
return true;
}
if let Some(suffix) = base_name.strip_prefix(known) {
return !suffix.is_empty()
&& suffix.chars().all(|c| c.is_ascii_digit() || c == '.');
}
false
});
if is_interpreter {
return true;
}
}
false
}
}
#[inline]
#[must_use]
pub fn classify_command(command: &str) -> CommandSpans {
ContextClassifier::new().classify(command)
}
#[derive(Debug, Clone)]
pub struct SafeStringRegistry {
all_args_data: &'static [&'static str],
flag_data_pairs: &'static [SafeFlagEntry],
}
#[derive(Debug, Clone, Copy)]
pub struct SafeFlagEntry {
pub command: &'static str,
pub short_flag: Option<&'static str>,
pub long_flag: Option<&'static str>,
pub multi_value: bool,
}
impl SafeFlagEntry {
#[must_use]
pub const fn new(
command: &'static str,
short_flag: Option<&'static str>,
long_flag: Option<&'static str>,
) -> Self {
Self {
command,
short_flag,
long_flag,
multi_value: false,
}
}
#[must_use]
pub const fn short(command: &'static str, flag: &'static str) -> Self {
Self {
command,
short_flag: Some(flag),
long_flag: None,
multi_value: false,
}
}
#[must_use]
pub const fn long(command: &'static str, flag: &'static str) -> Self {
Self {
command,
short_flag: None,
long_flag: Some(flag),
multi_value: false,
}
}
#[must_use]
pub const fn both(command: &'static str, short: &'static str, long: &'static str) -> Self {
Self {
command,
short_flag: Some(short),
long_flag: Some(long),
multi_value: false,
}
}
#[must_use]
pub const fn long_multi(command: &'static str, flag: &'static str) -> Self {
Self {
command,
short_flag: None,
long_flag: Some(flag),
multi_value: true,
}
}
#[must_use]
pub const fn both_multi(
command: &'static str,
short: &'static str,
long: &'static str,
) -> Self {
Self {
command,
short_flag: Some(short),
long_flag: Some(long),
multi_value: true,
}
}
}
pub static SAFE_STRING_REGISTRY: SafeStringRegistry = SafeStringRegistry {
all_args_data: &["echo", "printf"],
flag_data_pairs: &[
SafeFlagEntry::both("git", "-m", "--message"),
SafeFlagEntry::long("git", "--grep"),
SafeFlagEntry::long_multi("bd", "--description"),
SafeFlagEntry::long_multi("bd", "--title"),
SafeFlagEntry::long_multi("bd", "--notes"),
SafeFlagEntry::long_multi("bd", "--reason"),
SafeFlagEntry::both("grep", "-e", "--regexp"),
SafeFlagEntry::both("rg", "-e", "--regexp"),
SafeFlagEntry::both("ag", "-e", "--pattern"), SafeFlagEntry::both("ack", "-e", "--pattern"), SafeFlagEntry::both("gh", "-t", "--title"),
SafeFlagEntry::both("gh", "-b", "--body"),
SafeFlagEntry::both("gh", "-m", "--message"),
SafeFlagEntry::both("curl", "-d", "--data"),
SafeFlagEntry::both("curl", "-H", "--header"),
SafeFlagEntry::long("curl", "--data-raw"),
SafeFlagEntry::long("curl", "--data-binary"),
SafeFlagEntry::long("jq", "--arg"),
SafeFlagEntry::long("jq", "--argjson"),
SafeFlagEntry::long("jq", "--slurpfile"),
SafeFlagEntry::both("docker", "-l", "--label"),
SafeFlagEntry::long("kubectl", "--annotation"),
SafeFlagEntry::both("kubectl", "-l", "--label"),
SafeFlagEntry::short("xargs", "-I"),
SafeFlagEntry::long("cargo", "--message"),
SafeFlagEntry::long("npm", "--message"),
],
};
static SAFE_COMMANDS_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
let commands: &[&str] = &[
"echo", "printf", "git", "bd", "grep", "rg", "ag", "ack", "gh", "curl", "jq", "docker", "kubectl", "xargs",
"cargo", "npm",
"command",
];
AhoCorasick::new(commands).expect("static patterns should compile")
});
impl SafeStringRegistry {
#[must_use]
pub fn is_all_args_data(&self, command: &str) -> bool {
let base_name = command.rsplit('/').next().unwrap_or(command);
self.all_args_data.contains(&base_name)
}
#[must_use]
pub fn is_flag_data(&self, command: &str, flag: &str) -> bool {
let base_name = command.rsplit('/').next().unwrap_or(command);
self.flag_data_pairs.iter().any(|entry| {
entry.command == base_name
&& (entry.short_flag == Some(flag) || entry.long_flag == Some(flag))
})
}
#[must_use]
pub fn is_flag_data_multivalue(&self, command: &str, flag: &str) -> bool {
let base_name = command.rsplit('/').next().unwrap_or(command);
self.flag_data_pairs.iter().any(|entry| {
entry.command == base_name
&& entry.multi_value
&& (entry.short_flag == Some(flag) || entry.long_flag == Some(flag))
})
}
#[must_use]
pub fn data_flags_for_command(&self, command: &str) -> Vec<&'static str> {
let base_name = command.rsplit('/').next().unwrap_or(command);
self.flag_data_pairs
.iter()
.filter(|entry| entry.command == base_name)
.flat_map(|entry| {
let short = entry.short_flag.into_iter();
let long = entry.long_flag.into_iter();
short.chain(long)
})
.collect()
}
}
#[must_use]
pub fn is_argument_data(command: &str, preceding_flag: Option<&str>) -> bool {
let parts: Vec<&str> = command.split_whitespace().collect();
if parts.is_empty() {
return false;
}
let cmd = parts[0];
if SAFE_STRING_REGISTRY.is_all_args_data(cmd) {
return true;
}
if let Some(flag) = preceding_flag {
return SAFE_STRING_REGISTRY.is_flag_data(cmd, flag);
}
false
}
fn is_piped_segment(command: &str, tokens: &[SanitizeToken], current_idx: usize) -> bool {
for token in &tokens[current_idx..] {
if token.kind == SanitizeTokenKind::Separator {
let sep = &command[token.byte_range.clone()];
return sep == "|" || sep == "|&";
}
}
false
}
#[derive(Clone, Copy)]
struct PendingSafeFlag<'a> {
flag: &'a str,
multi_value: bool,
}
#[must_use]
#[allow(clippy::too_many_lines)] pub fn sanitize_for_pattern_matching(command: &str) -> Cow<'_, str> {
let has_comment_char = command.contains('#');
if !SAFE_COMMANDS_MATCHER.is_match(command) && !has_comment_char {
return Cow::Borrowed(command);
}
let tokens = tokenize_command(command);
if tokens.is_empty() {
return Cow::Borrowed(command);
}
let mut mask_ranges: SmallVec<[Range<usize>; 8]> = SmallVec::new();
let mut segment_cmd: Option<&str> = None;
let mut segment_cmd_is_all_args_data = false;
let mut pending_safe_flag: Option<PendingSafeFlag<'_>> = None; let mut options_ended = false;
let mut search_pattern_masked = false;
let mut wrapper: WrapperState = WrapperState::None;
let mut command_query_mode = false;
let mut search_cmd_override: Option<&str> = None;
let mut git_subcommand: Option<&str> = None;
let mut git_waiting_for_value = false;
let mut git_options_ended = false;
for (i, token) in tokens.iter().enumerate() {
if token.kind == SanitizeTokenKind::Separator {
segment_cmd = None;
segment_cmd_is_all_args_data = false;
pending_safe_flag = None;
options_ended = false;
search_pattern_masked = false;
wrapper = WrapperState::None;
command_query_mode = false;
search_cmd_override = None;
git_subcommand = None;
git_waiting_for_value = false;
git_options_ended = false;
continue;
}
if token.kind == SanitizeTokenKind::Comment {
mask_ranges.push(token.byte_range.clone());
continue;
}
let Some(token_text) = token.text(command) else {
return Cow::Borrowed(command);
};
if token_text == "\\\n" || token_text == "\\\r\n" {
continue;
}
if command_query_mode {
if !token.has_inline_code {
mask_ranges.push(token.byte_range.clone());
}
continue;
}
if segment_cmd.is_none() {
if let Some(next_wrapper) = WrapperState::from_command_word(token_text) {
wrapper = next_wrapper;
continue;
}
if matches!(
wrapper,
WrapperState::Command {
options_ended: false,
..
}
) && command_option_is_query(token_text)
{
command_query_mode = true;
}
let (next_wrapper, skip) = wrapper.consume_token(token_text);
wrapper = next_wrapper;
if skip {
continue;
}
if is_env_assignment(token_text) {
continue;
}
segment_cmd = Some(token_text);
segment_cmd_is_all_args_data = SAFE_STRING_REGISTRY.is_all_args_data(token_text);
search_cmd_override = None;
git_subcommand = None;
git_waiting_for_value = false;
git_options_ended = false;
if segment_cmd_is_all_args_data && is_piped_segment(command, &tokens, i) {
segment_cmd_is_all_args_data = false;
}
pending_safe_flag = None;
options_ended = false;
search_pattern_masked = false;
continue;
}
let Some(cmd) = segment_cmd else {
continue;
};
let mut is_git_subcommand_token = false;
if cmd == "git" && git_subcommand.is_none() {
if git_waiting_for_value {
git_waiting_for_value = false;
} else if token_text == "--" {
git_options_ended = true;
} else if !git_options_ended && token_text.starts_with('-') && token_text != "-" {
let takes_value = matches!(
token_text,
"-C" | "-c"
| "--git-dir"
| "--work-tree"
| "--namespace"
| "--exec-path"
| "--pager"
| "--config-env"
) || token_text.starts_with("-C")
|| token_text.starts_with("-c")
|| token_text.starts_with("--git-dir=")
|| token_text.starts_with("--work-tree=")
|| token_text.starts_with("--namespace=")
|| token_text.starts_with("--exec-path=")
|| token_text.starts_with("--pager=")
|| token_text.starts_with("--config-env=");
if takes_value && !token_text.contains('=') {
git_waiting_for_value = true;
}
} else {
git_subcommand = Some(token_text);
if token_text == "grep" {
search_cmd_override = Some("grep");
is_git_subcommand_token = true;
}
}
}
if segment_cmd_is_all_args_data {
if !token.has_inline_code {
mask_ranges.push(token.byte_range.clone());
}
continue;
}
if let Some(pending) = pending_safe_flag {
let is_flag_token = token_text.starts_with('-') && token_text != "-";
if pending.multi_value {
if token.has_inline_code || is_flag_token {
pending_safe_flag = None;
} else {
if !token.has_inline_code {
mask_ranges.push(token.byte_range.clone());
if is_search_pattern_flag(cmd, pending.flag) {
search_pattern_masked = true;
}
}
pending_safe_flag = Some(pending);
continue;
}
} else {
pending_safe_flag = None;
if !token.has_inline_code {
mask_ranges.push(token.byte_range.clone());
if is_search_pattern_flag(cmd, pending.flag) {
search_pattern_masked = true;
}
}
continue;
}
}
if let Some((flag, value_range)) = split_flag_assignment(token_text, token.byte_range.start)
{
if SAFE_STRING_REGISTRY.is_flag_data(cmd, flag) && !token.has_inline_code {
mask_ranges.push(value_range);
if is_search_pattern_flag(cmd, flag) {
search_pattern_masked = true;
}
}
continue;
}
if let Some((flag, value_range)) =
split_short_flag_attached_value(cmd, token_text, token.byte_range.start)
{
if !token.has_inline_code {
mask_ranges.push(value_range);
if is_search_pattern_flag(cmd, flag) {
search_pattern_masked = true;
}
}
continue;
}
if SAFE_STRING_REGISTRY.is_flag_data(cmd, token_text) {
pending_safe_flag = Some(PendingSafeFlag {
flag: token_text,
multi_value: SAFE_STRING_REGISTRY.is_flag_data_multivalue(cmd, token_text),
});
continue;
}
if let Some(data_flag) = combined_short_data_flag_value(cmd, token_text) {
pending_safe_flag = Some(PendingSafeFlag {
flag: data_flag,
multi_value: SAFE_STRING_REGISTRY.is_flag_data_multivalue(cmd, data_flag),
});
continue;
}
let search_cmd = search_cmd_override.unwrap_or(cmd);
if is_search_command(search_cmd) {
if is_git_subcommand_token {
continue;
}
if token_text == "--" {
options_ended = true;
continue;
}
let is_option = !options_ended && token_text.starts_with('-') && token_text != "-";
if is_option {
continue;
}
if !search_pattern_masked && !token.has_inline_code {
mask_ranges.push(token.byte_range.clone());
search_pattern_masked = true;
}
}
}
if mask_ranges.is_empty() {
return Cow::Borrowed(command);
}
mask_ranges.sort_by_key(|r| r.start);
let merged = merge_ranges(&mask_ranges);
let bytes = command.as_bytes();
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
let mut last = 0;
for range in merged {
if range.start > last {
out.extend_from_slice(&bytes[last..range.start]);
}
out.extend(std::iter::repeat_n(
b' ',
range.end.saturating_sub(range.start),
));
last = range.end;
}
if last < bytes.len() {
out.extend_from_slice(&bytes[last..]);
}
String::from_utf8(out).map_or(Cow::Borrowed(command), Cow::Owned)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum WrapperState {
None,
Sudo {
options_ended: bool,
pending_value: bool,
},
Env {
options_ended: bool,
pending_value: bool,
},
Command {
options_ended: bool,
pending_value: bool,
},
}
impl WrapperState {
#[inline]
#[must_use]
fn from_command_word(word: &str) -> Option<Self> {
let base_name = word.rsplit('/').next().unwrap_or(word);
match base_name {
"sudo" => Some(Self::Sudo {
options_ended: false,
pending_value: false,
}),
"env" => Some(Self::Env {
options_ended: false,
pending_value: false,
}),
"command" => Some(Self::Command {
options_ended: false,
pending_value: false,
}),
_ => None,
}
}
#[inline]
#[must_use]
fn consume_token(self, token: &str) -> (Self, bool) {
match self {
Self::None => (Self::None, false),
Self::Sudo {
options_ended,
pending_value,
} => consume_wrapper_token(
token,
Self::Sudo {
options_ended,
pending_value,
},
sudo_option_takes_value,
),
Self::Env {
options_ended,
pending_value,
} => consume_wrapper_token(
token,
Self::Env {
options_ended,
pending_value,
},
env_option_takes_value,
),
Self::Command {
options_ended,
pending_value,
} => consume_wrapper_token(
token,
Self::Command {
options_ended,
pending_value,
},
|_t| None,
),
}
}
}
#[inline]
#[must_use]
fn consume_wrapper_token<F>(
token: &str,
state: WrapperState,
takes_value: F,
) -> (WrapperState, bool)
where
F: Fn(&str) -> Option<WrapperOptionValueMode>,
{
let (options_ended, pending_value) = match state {
WrapperState::Sudo {
options_ended,
pending_value,
}
| WrapperState::Env {
options_ended,
pending_value,
}
| WrapperState::Command {
options_ended,
pending_value,
} => (options_ended, pending_value),
WrapperState::None => return (WrapperState::None, false),
};
if pending_value {
return (
set_wrapper_pending(state, options_ended, false),
true, );
}
if options_ended {
return (state, false);
}
if token == "--" {
return (
set_wrapper_options_ended(state, true),
true, );
}
if !token.starts_with('-') {
return (state, false);
}
let pending_value = match takes_value(token) {
Some(WrapperOptionValueMode::SeparateToken) => true,
Some(WrapperOptionValueMode::Attached) | None => false,
};
(
set_wrapper_pending(state, options_ended, pending_value),
true,
)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum WrapperOptionValueMode {
Attached,
SeparateToken,
}
#[inline]
#[must_use]
const fn set_wrapper_options_ended(state: WrapperState, options_ended: bool) -> WrapperState {
match state {
WrapperState::Sudo { pending_value, .. } => WrapperState::Sudo {
options_ended,
pending_value,
},
WrapperState::Env { pending_value, .. } => WrapperState::Env {
options_ended,
pending_value,
},
WrapperState::Command { pending_value, .. } => WrapperState::Command {
options_ended,
pending_value,
},
WrapperState::None => WrapperState::None,
}
}
#[inline]
#[must_use]
const fn set_wrapper_pending(
state: WrapperState,
options_ended: bool,
pending_value: bool,
) -> WrapperState {
match state {
WrapperState::Sudo { .. } => WrapperState::Sudo {
options_ended,
pending_value,
},
WrapperState::Env { .. } => WrapperState::Env {
options_ended,
pending_value,
},
WrapperState::Command { .. } => WrapperState::Command {
options_ended,
pending_value,
},
WrapperState::None => WrapperState::None,
}
}
#[inline]
#[must_use]
fn sudo_option_takes_value(token: &str) -> Option<WrapperOptionValueMode> {
const SHORT_VALUE_OPTS: &[&str] = &["-u", "-g", "-h", "-p", "-C", "-t", "-a", "-U", "-r", "-D"];
const LONG_VALUE_OPTS: &[&str] = &[
"--user", "--group", "--host", "--prompt", "--role", "--chdir",
];
if token.starts_with("--") {
for opt in LONG_VALUE_OPTS {
if token == *opt {
return Some(WrapperOptionValueMode::SeparateToken);
}
if token
.strip_prefix(opt)
.is_some_and(|rest| rest.starts_with('='))
{
return Some(WrapperOptionValueMode::Attached);
}
}
return None;
}
for opt in SHORT_VALUE_OPTS {
if token == *opt {
return Some(WrapperOptionValueMode::SeparateToken);
}
if token.starts_with(opt) && token.len() > opt.len() {
return Some(WrapperOptionValueMode::Attached);
}
}
None
}
#[inline]
#[must_use]
fn env_option_takes_value(token: &str) -> Option<WrapperOptionValueMode> {
const SHORT_VALUE_OPTS: &[&str] = &["-u", "-C"];
const LONG_VALUE_OPTS: &[&str] = &["--unset", "--chdir"];
if token.starts_with("--") {
for opt in LONG_VALUE_OPTS {
if token == *opt {
return Some(WrapperOptionValueMode::SeparateToken);
}
if token
.strip_prefix(opt)
.is_some_and(|rest| rest.starts_with('='))
{
return Some(WrapperOptionValueMode::Attached);
}
}
return None;
}
for opt in SHORT_VALUE_OPTS {
if token == *opt {
return Some(WrapperOptionValueMode::SeparateToken);
}
if token.starts_with(opt) && token.len() > opt.len() {
return Some(WrapperOptionValueMode::Attached);
}
}
None
}
#[inline]
#[must_use]
fn command_option_is_query(token: &str) -> bool {
if !token.starts_with('-') || token == "--" || token.starts_with("--") || token.len() <= 1 {
return false;
}
token[1..].bytes().any(|b| b == b'v' || b == b'V')
}
#[inline]
#[must_use]
fn is_env_assignment(token: &str) -> bool {
let Some((key, _value)) = token.split_once('=') else {
return false;
};
!key.is_empty()
&& key.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_')
&& !token.starts_with('-')
}
#[inline]
#[must_use]
fn is_search_command(cmd: &str) -> bool {
let base_name = cmd.rsplit('/').next().unwrap_or(cmd);
matches!(base_name, "rg" | "grep" | "ag" | "ack")
}
#[inline]
#[must_use]
fn is_search_pattern_flag(cmd: &str, flag: &str) -> bool {
let base_name = cmd.rsplit('/').next().unwrap_or(cmd);
match base_name {
"rg" => matches!(flag, "-e" | "--regexp"),
"grep" => matches!(flag, "-e" | "--regexp"),
"ag" => matches!(flag, "-e" | "--pattern"),
"ack" => matches!(flag, "-e" | "--pattern"),
_ => false,
}
}
#[must_use]
fn split_flag_assignment(token: &str, token_start: usize) -> Option<(&str, Range<usize>)> {
if !token.starts_with('-') {
return None;
}
let (flag, value) = token.split_once('=')?;
if value.is_empty() {
return None;
}
let eq_offset = flag.len();
let value_start = token_start + eq_offset + 1;
let value_end = token_start + token.len();
if value_start >= value_end {
return None;
}
Some((flag, value_start..value_end))
}
#[must_use]
fn split_short_flag_attached_value(
cmd: &str,
token: &str,
token_start: usize,
) -> Option<(&'static str, Range<usize>)> {
if !token.starts_with('-') || token.starts_with("--") || token.len() <= 2 || token.contains('=')
{
return None;
}
let base_name = cmd.rsplit('/').next().unwrap_or(cmd);
let bytes = token.as_bytes();
let flags = bytes.get(1..)?;
for (offset, b) in flags.iter().enumerate() {
let token_index = 1 + offset;
let next_index = token_index + 1;
if next_index >= bytes.len() {
continue;
}
let Some(short_flag) = SAFE_STRING_REGISTRY
.flag_data_pairs
.iter()
.filter(|entry| entry.command == base_name)
.filter_map(|entry| entry.short_flag)
.find(|short| short.as_bytes().get(1) == Some(b))
else {
continue;
};
let value_start = token_start + next_index;
let value_end = token_start + token.len();
if value_start >= value_end {
continue;
}
return Some((short_flag, value_start..value_end));
}
None
}
#[must_use]
fn combined_short_data_flag_value(cmd: &str, token: &str) -> Option<&'static str> {
if !token.starts_with('-') || token.starts_with("--") || token.len() <= 2 || token.contains('=')
{
return None;
}
let base_name = cmd.rsplit('/').next().unwrap_or(cmd);
let flags = token.as_bytes().get(1..)?;
let last = flags.last()?;
SAFE_STRING_REGISTRY
.flag_data_pairs
.iter()
.filter(|entry| entry.command == base_name)
.filter_map(|entry| entry.short_flag)
.find(|short| short.as_bytes().get(1) == Some(last))
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SanitizeTokenKind {
Word,
Separator,
Comment,
}
#[derive(Debug, Clone)]
struct SanitizeToken {
kind: SanitizeTokenKind,
byte_range: Range<usize>,
has_inline_code: bool,
}
impl SanitizeToken {
#[inline]
#[must_use]
fn text<'a>(&self, command: &'a str) -> Option<&'a str> {
command.get(self.byte_range.clone())
}
}
type SanitizeTokens = SmallVec<[SanitizeToken; 16]>;
fn tokenize_command(command: &str) -> SanitizeTokens {
let bytes = command.as_bytes();
let len = bytes.len();
let mut tokens = SanitizeTokens::new();
let mut i = 0;
while i < len {
while i < len && bytes[i].is_ascii_whitespace() && bytes[i] != b'\n' {
i += 1;
}
if i >= len {
break;
}
if bytes[i] == b'\n' {
tokens.push(SanitizeToken {
kind: SanitizeTokenKind::Separator,
byte_range: i..i + 1,
has_inline_code: false,
});
i += 1;
continue;
}
if let Some(end) = consume_separator_token(bytes, i, len, &mut tokens) {
i = end;
continue;
}
if i < len && bytes[i] == b'#' {
let start = i;
while i < len && bytes[i] != b'\n' {
i += 1;
}
tokens.push(SanitizeToken {
kind: SanitizeTokenKind::Comment,
byte_range: start..i,
has_inline_code: false,
});
continue;
}
let start = i;
let (end, has_inline_code) = consume_word_token(command, bytes, i, len);
i = end;
if start < i {
tokens.push(SanitizeToken {
kind: SanitizeTokenKind::Word,
byte_range: start..i,
has_inline_code,
});
}
}
tokens
}
#[inline]
fn consume_separator_token(
bytes: &[u8],
i: usize,
len: usize,
tokens: &mut SanitizeTokens,
) -> Option<usize> {
match bytes[i] {
b'|' => {
let end = if i + 1 < len && bytes[i + 1] == b'|' {
i + 2
} else {
i + 1
};
tokens.push(SanitizeToken {
kind: SanitizeTokenKind::Separator,
byte_range: i..end,
has_inline_code: false,
});
Some(end)
}
b';' => {
tokens.push(SanitizeToken {
kind: SanitizeTokenKind::Separator,
byte_range: i..i + 1,
has_inline_code: false,
});
Some(i + 1)
}
b'&' => {
let end = if i + 1 < len && bytes[i + 1] == b'&' {
i + 2
} else {
i + 1
};
tokens.push(SanitizeToken {
kind: SanitizeTokenKind::Separator,
byte_range: i..end,
has_inline_code: false,
});
Some(end)
}
_ => None,
}
}
#[must_use]
fn consume_word_token(command: &str, bytes: &[u8], mut i: usize, len: usize) -> (usize, bool) {
let mut has_inline_code = false;
while i < len {
let b = bytes[i];
if b.is_ascii_whitespace() {
break;
}
if matches!(b, b'|' | b';' | b'&') {
break;
}
match b {
b'\\' => {
if i + 2 < len && bytes[i + 1] == b'\r' && bytes[i + 2] == b'\n' {
i += 3;
} else {
i = (i + 2).min(len);
}
}
b'\'' => {
i += 1;
while i < len && bytes[i] != b'\'' {
i += 1;
}
if i < len {
i += 1; }
}
b'"' => {
i += 1;
while i < len {
match bytes[i] {
b'"' => {
i += 1;
break;
}
b'\\' => {
i = (i + 2).min(len);
}
b'$' if i + 1 < len && bytes[i + 1] == b'(' => {
has_inline_code = true;
i += 1; }
b'`' => {
has_inline_code = true;
i = consume_backticks(command, i);
}
_ => {
i += 1;
}
}
}
}
b'$' if i + 1 < len && bytes[i + 1] == b'(' => {
has_inline_code = true;
i = consume_dollar_paren(command, i);
}
b'`' => {
has_inline_code = true;
i = consume_backticks(command, i);
}
_ => {
i += 1;
}
}
}
(i, has_inline_code)
}
#[must_use]
fn consume_dollar_paren(command: &str, start: usize) -> usize {
consume_dollar_paren_recursive(command, start, 0)
}
fn consume_dollar_paren_recursive(command: &str, start: usize, recursion_depth: usize) -> usize {
if recursion_depth > 500 {
return command.len(); }
let bytes = command.as_bytes();
let len = bytes.len();
debug_assert!(bytes.get(start) == Some(&b'$'));
debug_assert!(bytes.get(start + 1) == Some(&b'('));
let mut i = start + 2;
let mut depth: u32 = 1;
while i < len {
match bytes[i] {
b'(' => {
depth += 1;
i += 1;
}
b')' => {
if depth == 1 {
return i + 1;
}
depth = depth.saturating_sub(1);
i += 1;
}
b'\\' => {
i = (i + 2).min(len);
}
b'\'' => {
i += 1;
while i < len && bytes[i] != b'\'' {
i += 1;
}
if i < len {
i += 1;
}
}
b'"' => {
i += 1;
while i < len {
match bytes[i] {
b'"' => {
i += 1;
break;
}
b'\\' => {
i = (i + 2).min(len);
}
b'$' if i + 1 < len && bytes[i + 1] == b'(' => {
i = consume_dollar_paren_recursive(command, i, recursion_depth + 1);
}
_ => {
i += 1;
}
}
}
}
_ => {
i += 1;
}
}
}
len
}
#[must_use]
fn consume_backticks(command: &str, start: usize) -> usize {
let bytes = command.as_bytes();
let len = bytes.len();
debug_assert!(bytes.get(start) == Some(&b'`'));
let mut i = start + 1;
while i < len {
match bytes[i] {
b'\\' => {
i = (i + 2).min(len);
}
b'`' => {
i += 1;
break;
}
_ => {
i += 1;
}
}
}
i
}
#[must_use]
fn env_split_string_context(command: &str, flag_start: usize) -> bool {
let segment_start = segment_start_before_flag(command, flag_start);
let segment = &command[segment_start..flag_start];
segment.split_whitespace().any(|token| {
let token = token.trim_start_matches('\\');
token == "env" || token.ends_with("/env")
})
}
#[inline]
#[must_use]
fn is_inline_code_flag(word: &str) -> bool {
if word == "-S" {
return true;
}
if !word.starts_with('-') || word.starts_with("--") || word.len() < 2 {
return false;
}
word.as_bytes()
.iter()
.skip(1)
.any(|b| matches!(b.to_ascii_lowercase(), b'c' | b'e' | b'r'))
}
#[must_use]
fn segment_start_before_flag(command: &str, flag_start: usize) -> usize {
let bytes = command.as_bytes();
let mut i = flag_start.min(bytes.len());
while i > 0 {
i -= 1;
match bytes[i] {
b'|' => {
if i > 0 && bytes[i - 1] == b'|' {
return i + 1;
}
return i + 1;
}
b'&' => {
if i > 0 && bytes[i - 1] == b'&' {
return i + 1;
}
return i + 1;
}
b';' => return i + 1,
_ => {}
}
}
0
}
#[must_use]
fn merge_ranges(ranges: &[Range<usize>]) -> SmallVec<[Range<usize>; 8]> {
let mut merged: SmallVec<[Range<usize>; 8]> = SmallVec::new();
for range in ranges {
if let Some(last) = merged.last_mut() {
if range.start <= last.end {
last.end = last.end.max(range.end);
continue;
}
}
merged.push(range.clone());
}
merged
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_command() {
let spans = classify_command("git status");
assert_eq!(spans.spans().len(), 1);
assert_eq!(spans.spans()[0].kind, SpanKind::Executed);
}
#[test]
fn test_single_quoted_string() {
let cmd = "git commit -m 'Fix rm -rf detection'";
let spans = classify_command(cmd);
assert!(spans.spans().len() >= 2);
let data_span = spans.spans().iter().find(|s| s.kind == SpanKind::Data);
assert!(data_span.is_some());
let data_span = data_span.unwrap();
assert_eq!(data_span.text(cmd), "'Fix rm -rf detection'");
}
#[test]
fn test_double_quoted_string() {
let cmd = "echo \"hello world\"";
let spans = classify_command(cmd);
let arg_span = spans.spans().iter().find(|s| s.kind == SpanKind::Argument);
assert!(arg_span.is_some());
assert_eq!(arg_span.unwrap().text(cmd), "\"hello world\"");
}
#[test]
fn test_unclosed_single_quote_is_unknown() {
let cmd = "echo 'rm -rf /";
let spans = classify_command(cmd);
let last_span = spans.spans().last().expect("last span");
assert_eq!(last_span.kind, SpanKind::Unknown);
assert!(last_span.text(cmd).contains("rm -rf"));
}
#[test]
fn test_comment_at_eof_is_comment_span() {
let cmd = "echo safe # rm -rf /";
let spans = classify_command(cmd);
let comment_span = spans.spans().iter().find(|s| s.kind == SpanKind::Comment);
assert!(comment_span.is_some());
assert_eq!(comment_span.unwrap().text(cmd), "# rm -rf /");
}
#[test]
fn test_comment_after_separator_is_comment_span() {
let cmd = "echo safe;# rm -rf /";
let spans = classify_command(cmd);
let comment_span = spans.spans().iter().find(|s| s.kind == SpanKind::Comment);
assert!(comment_span.is_some());
assert_eq!(comment_span.unwrap().text(cmd), "# rm -rf /");
}
#[test]
fn test_command_substitution() {
let cmd = "echo $(rm -rf /)";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(inline_span.is_some());
assert_eq!(inline_span.unwrap().text(cmd), "$(rm -rf /)");
}
#[test]
fn test_backtick_substitution() {
let cmd = "echo `rm -rf /`";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(inline_span.is_some());
assert_eq!(inline_span.unwrap().text(cmd), "`rm -rf /`");
}
#[test]
fn test_env_split_string_marks_inline_code() {
let cmd = "env FOO=1 -S \"rm -rf /\"";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(inline_span.is_some());
assert_eq!(inline_span.unwrap().text(cmd), "\"rm -rf /\"");
}
#[test]
fn test_pipe() {
let cmd = "echo hi | cat";
let spans = classify_command(cmd);
assert!(spans.spans().len() >= 2);
for span in spans.executable_spans() {
assert!(span.kind.is_executable());
}
}
#[test]
fn test_semicolon_separator() {
let cmd = "echo a; echo b";
let spans = classify_command(cmd);
assert!(spans.has_executable_content());
}
#[test]
fn test_and_separator() {
let cmd = "true && rm -rf /";
let spans = classify_command(cmd);
let executable_text: Vec<_> = spans.executable_text(cmd);
assert!(!executable_text.is_empty());
}
#[test]
fn test_bash_c_inline_code() {
let cmd = "bash -c \"rm -rf /\"";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after bash -c"
);
}
#[test]
fn test_bash_c_single_quote_inline_code() {
let cmd = "bash -c 'rm -rf /'";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after bash -c with single quotes"
);
}
#[test]
fn test_bash_c_attached_single_quote_inline_code() {
let cmd = "bash -c'rm -rf /'";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after bash -c without space"
);
}
#[test]
fn test_bash_lc_inline_code() {
let cmd = "bash -lc \"rm -rf /\"";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after bash -lc"
);
}
#[test]
fn test_bash_lc_attached_single_quote_inline_code() {
let cmd = "bash -lc'echo rm -rf /'";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after bash -lc without space"
);
}
#[test]
fn test_python_c_inline_code() {
let cmd = "python -c \"import os; os.system('rm -rf /')\"";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after python -c"
);
}
#[test]
fn test_node_e_inline_code() {
let cmd = "node -e \"require('child_process').execSync('rm -rf /')\"";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after node -e"
);
}
#[test]
fn test_double_quote_with_substitution() {
let cmd = "echo \"$(rm -rf /)\"";
let spans = classify_command(cmd);
for span in spans.spans() {
if span.text(cmd).contains("$(") {
assert!(
span.kind == SpanKind::Unknown || span.kind == SpanKind::InlineCode,
"Double-quoted string with substitution should not be safe Argument"
);
}
}
}
#[test]
fn test_escaped_quote() {
let cmd = "echo \"hello \\\"world\\\"\"";
let spans = classify_command(cmd);
assert!(!spans.spans().is_empty());
}
#[test]
fn test_false_positive_case_bd_create() {
let cmd = "bd create --description=\"This pattern blocks rm -rf\"";
let spans = classify_command(cmd);
let desc_span = spans
.spans()
.iter()
.find(|s| s.text(cmd).contains("rm -rf"));
if let Some(span) = desc_span {
assert!(
!span.kind.requires_pattern_check() || span.kind == SpanKind::Argument,
"Description argument should not require pattern check"
);
}
}
#[test]
fn test_false_positive_case_git_commit() {
let cmd = "git commit -m \"Fix git reset --hard detection\"";
let spans = classify_command(cmd);
let msg_span = spans
.spans()
.iter()
.find(|s| s.text(cmd).contains("reset --hard"));
if let Some(span) = msg_span {
assert_eq!(span.kind, SpanKind::Argument);
}
}
#[test]
fn test_false_positive_case_rg_pattern() {
let cmd = "rg -n \"rm -rf\" src/main.rs";
let spans = classify_command(cmd);
let pattern_span = spans.spans().iter().find(|s| s.text(cmd) == "\"rm -rf\"");
if let Some(span) = pattern_span {
assert_eq!(span.kind, SpanKind::Argument);
}
}
#[test]
fn test_span_kind_requires_pattern_check() {
assert!(SpanKind::Executed.requires_pattern_check());
assert!(SpanKind::InlineCode.requires_pattern_check());
assert!(SpanKind::HeredocBody.requires_pattern_check());
assert!(SpanKind::Unknown.requires_pattern_check());
assert!(!SpanKind::Data.requires_pattern_check());
assert!(!SpanKind::Argument.requires_pattern_check());
}
#[test]
fn test_span_kind_is_safe_data() {
assert!(SpanKind::Data.is_safe_data());
assert!(!SpanKind::Argument.is_safe_data());
assert!(!SpanKind::Executed.is_safe_data());
}
#[test]
fn test_empty_command() {
let spans = classify_command("");
assert!(spans.spans().is_empty());
}
#[test]
fn test_whitespace_only() {
let spans = classify_command(" ");
assert!(!spans.spans().is_empty());
}
#[test]
fn test_nested_command_substitution() {
let cmd = "echo $(echo $(rm -rf /))";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(inline_span.is_some());
}
#[test]
fn test_nested_command_substitution_parens() {
let cmd = "echo $( ( echo inner ) )";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(inline_span.is_some());
assert_eq!(inline_span.unwrap().text(cmd), "$( ( echo inner ) )");
}
#[test]
fn test_command_substitution_with_comment() {
let cmd = "echo $(echo # ) \n)";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(inline_span.is_some());
assert_eq!(inline_span.unwrap().text(cmd), "$(echo # ) \n)");
}
#[test]
fn test_single_quote_preserves_special_chars() {
let cmd = "echo '$HOME'";
let spans = classify_command(cmd);
let data_span = spans.spans().iter().find(|s| s.kind == SpanKind::Data);
assert!(data_span.is_some());
assert_eq!(data_span.unwrap().text(cmd), "'$HOME'");
}
#[test]
fn test_mixed_quotes() {
let cmd = "echo 'single' \"double\" plain";
let spans = classify_command(cmd);
let data_count = spans
.spans()
.iter()
.filter(|s| s.kind == SpanKind::Data)
.count();
let arg_count = spans
.spans()
.iter()
.filter(|s| s.kind == SpanKind::Argument)
.count();
assert!(data_count >= 1, "Should have single-quoted Data span");
assert!(arg_count >= 1, "Should have double-quoted Argument span");
}
#[test]
fn test_or_operator() {
let cmd = "false || rm -rf /";
let spans = classify_command(cmd);
assert!(spans.has_executable_content());
}
#[test]
fn test_path_prefixed_command() {
let cmd = "/usr/bin/bash -c \"rm -rf /\"";
let spans = classify_command(cmd);
let inline_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
inline_span.is_some(),
"Should detect inline code after /usr/bin/bash -c"
);
}
#[test]
fn test_performance_typical_commands() {
use std::time::Instant;
let commands = [
"git status",
"git commit -m 'Fix bug in parser'",
"echo \"hello world\" | cat",
"ls -la /tmp",
"cargo test --release",
"python -c \"print('hello')\"",
"bash -c \"echo test && echo done\"",
"docker ps --all --format '{{.Names}}'",
];
for cmd in &commands {
let _ = classify_command(cmd);
}
let iterations = 1000;
let start = Instant::now();
for _ in 0..iterations {
for cmd in &commands {
let _ = classify_command(cmd);
}
}
let elapsed = start.elapsed();
let total_commands = iterations * commands.len();
let avg_nanoseconds = elapsed.as_nanos() / total_commands as u128;
#[allow(clippy::cast_precision_loss)]
let avg_microseconds = avg_nanoseconds as f64 / 1000.0;
assert!(
avg_microseconds < 100.0,
"Average classification time {avg_microseconds:.2}μs exceeds 100μs budget"
);
eprintln!(
"Context classification performance: {avg_microseconds:.2}μs/command ({} commands, {} iterations)",
commands.len(),
iterations
);
}
#[test]
fn test_registry_echo_is_all_data() {
assert!(SAFE_STRING_REGISTRY.is_all_args_data("echo"));
assert!(SAFE_STRING_REGISTRY.is_all_args_data("echo"));
assert!(SAFE_STRING_REGISTRY.is_all_args_data("/bin/echo"));
assert!(SAFE_STRING_REGISTRY.is_all_args_data("/usr/bin/echo"));
}
#[test]
fn test_registry_printf_is_all_data() {
assert!(SAFE_STRING_REGISTRY.is_all_args_data("printf"));
assert!(SAFE_STRING_REGISTRY.is_all_args_data("/usr/bin/printf"));
}
#[test]
fn test_registry_bash_is_not_all_data() {
assert!(!SAFE_STRING_REGISTRY.is_all_args_data("bash"));
assert!(!SAFE_STRING_REGISTRY.is_all_args_data("sh"));
assert!(!SAFE_STRING_REGISTRY.is_all_args_data("python"));
}
#[test]
fn test_registry_git_message_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("git", "-m"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("git", "--message"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("git", "-c"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("git", "--exec"));
}
#[test]
fn test_registry_bd_description_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("bd", "--description"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("bd", "--title"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("bd", "--notes"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("bd", "--reason"));
}
#[test]
fn test_registry_bd_multivalue_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data_multivalue("bd", "--notes"));
assert!(SAFE_STRING_REGISTRY.is_flag_data_multivalue("bd", "--description"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data_multivalue("git", "-m"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data_multivalue("grep", "-e"));
}
#[test]
fn test_registry_grep_pattern_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("grep", "-e"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("grep", "--regexp"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("grep", "-F"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("grep", "--fixed-strings"));
}
#[test]
fn test_registry_gh_cli_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("gh", "-t"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("gh", "--title"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("gh", "-b"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("gh", "--body"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("gh", "-m"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("gh", "--message"));
}
#[test]
fn test_registry_data_flags_for_git() {
let flags = SAFE_STRING_REGISTRY.data_flags_for_command("git");
assert!(flags.contains(&"-m"));
assert!(flags.contains(&"--message"));
}
#[test]
fn test_registry_data_flags_for_grep() {
let flags = SAFE_STRING_REGISTRY.data_flags_for_command("grep");
assert!(flags.contains(&"-e"));
assert!(flags.contains(&"--regexp"));
assert!(!flags.contains(&"-F"));
assert!(!flags.contains(&"--fixed-strings"));
}
#[test]
fn test_is_argument_data_echo() {
assert!(is_argument_data("echo \"rm -rf /\"", None));
}
#[test]
fn test_is_argument_data_git_commit_message() {
assert!(is_argument_data("git commit -m \"Fix rm -rf\"", Some("-m")));
}
#[test]
fn test_is_argument_data_rg_pattern() {
assert!(is_argument_data("rg -e \"rm -rf\" src/", Some("-e")));
}
#[test]
fn test_is_argument_data_bash_c_is_not_data() {
assert!(!is_argument_data("bash -c \"rm -rf /\"", Some("-c")));
}
#[test]
fn test_counterexample_bash_executes() {
assert!(!SAFE_STRING_REGISTRY.is_all_args_data("bash"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("bash", "-c"));
}
#[test]
fn test_counterexample_python_executes() {
assert!(!SAFE_STRING_REGISTRY.is_all_args_data("python"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("python", "-c"));
}
#[test]
fn test_counterexample_xargs_executes() {
assert!(!SAFE_STRING_REGISTRY.is_all_args_data("xargs"));
}
#[test]
fn test_false_positive_git_commit_message() {
let cmd = "git commit -m \"Fix git reset --hard detection\"";
assert!(SAFE_STRING_REGISTRY.is_flag_data("git", "-m"));
let spans = classify_command(cmd);
let msg_span = spans
.spans()
.iter()
.find(|s| s.text(cmd).contains("reset --hard"));
assert!(msg_span.is_some());
assert_eq!(msg_span.unwrap().kind, SpanKind::Argument);
}
#[test]
fn test_false_positive_rg_pattern() {
let cmd = "rg -e \"rm -rf\" src/";
assert!(SAFE_STRING_REGISTRY.is_flag_data("rg", "-e"));
let spans = classify_command(cmd);
let pattern_span = spans.spans().iter().find(|s| s.text(cmd) == "\"rm -rf\"");
assert!(pattern_span.is_some());
assert_eq!(pattern_span.unwrap().kind, SpanKind::Argument);
}
#[test]
fn test_false_positive_bd_create() {
let cmd = "bd create --description=\"This pattern blocks rm -rf\"";
assert!(SAFE_STRING_REGISTRY.is_flag_data("bd", "--description"));
let spans = classify_command(cmd);
let desc_span = spans
.spans()
.iter()
.find(|s| s.text(cmd).contains("rm -rf"));
assert!(desc_span.is_some());
assert_eq!(desc_span.unwrap().kind, SpanKind::Argument);
}
#[test]
fn test_true_positive_bash_c() {
let cmd = "bash -c \"rm -rf /\"";
assert!(!SAFE_STRING_REGISTRY.is_flag_data("bash", "-c"));
let spans = classify_command(cmd);
let code_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(code_span.is_some(), "bash -c content must be InlineCode");
}
#[test]
fn test_true_positive_python_c() {
let cmd = "python -c \"import os; os.system('rm -rf /')\"";
assert!(!SAFE_STRING_REGISTRY.is_flag_data("python", "-c"));
let spans = classify_command(cmd);
let code_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(code_span.is_some(), "python -c content must be InlineCode");
}
#[test]
fn sanitize_strips_bd_description_value() {
let cmd = r#"bd create --description="This pattern blocks rm -rf""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("bd create"));
assert!(sanitized.as_ref().contains("--description="));
}
#[test]
fn sanitize_strips_bd_notes_unquoted_multiword() {
let cmd = "bd create --notes This references git reset hard";
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("git reset"));
assert!(sanitized.as_ref().contains("bd create --notes"));
}
#[test]
fn sanitize_stops_multivalue_on_next_flag() {
let cmd = "bd create --notes This blocks rm rf --priority 2";
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm rf"));
assert!(sanitized.as_ref().contains("--priority 2"));
}
#[test]
fn sanitize_multivalue_keeps_inline_code_visible() {
let cmd = "bd create --notes $(rm -rf /) and more";
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Borrowed(_)));
assert!(sanitized.as_ref().contains("rm -rf"));
}
#[test]
fn sanitize_does_not_strip_when_inline_code_present() {
let cmd = r#"bd create --description="$(rm -rf /)""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Borrowed(_)));
assert!(sanitized.as_ref().contains("rm -rf"));
}
#[test]
fn sanitize_strips_rg_positional_pattern() {
let cmd = r#"rg -n "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("rg -n"));
}
#[test]
fn sanitize_strips_git_grep_positional_pattern() {
let cmd = r#"git grep "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("git grep"));
}
#[test]
fn sanitize_handles_git_grep_with_global_options() {
let cmd = r#"git -C /tmp -c color.ui=auto grep -e "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(
sanitized
.as_ref()
.contains("git -C /tmp -c color.ui=auto grep -e")
);
assert!(sanitized.as_ref().contains("src/main.rs"));
}
#[test]
fn sanitize_strips_ag_positional_pattern() {
let cmd = r#"ag "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("ag"));
}
#[test]
fn sanitize_strips_ack_positional_pattern() {
let cmd = r#"ack "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("ack"));
}
#[test]
fn sanitize_handles_rg_fixed_strings_flag_with_other_options() {
let cmd = r#"rg --fixed-strings -n "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("rg --fixed-strings -n"));
}
#[test]
fn sanitize_handles_grep_fixed_strings_flag_with_other_options() {
let cmd = r#"grep -F -n "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("grep -F -n"));
}
#[test]
fn sanitize_handles_attached_search_pattern_value_rg() {
let cmd = r#"rg -e"rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("rg -e"));
}
#[test]
fn sanitize_handles_attached_search_pattern_value_grep() {
let cmd = r#"grep -e"rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("grep -e"));
}
#[test]
fn sanitize_handles_attached_search_pattern_value_ag() {
let cmd = r#"ag -e"rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("ag -e"));
assert!(sanitized.as_ref().contains("src/main.rs"));
}
#[test]
fn sanitize_handles_attached_search_pattern_value_ack() {
let cmd = r#"ack -e"rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("ack -e"));
assert!(sanitized.as_ref().contains("src/main.rs"));
}
#[test]
fn sanitize_handles_attached_git_commit_message() {
let cmd = r#"git commit -m"Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("git commit -m"));
}
#[test]
fn sanitize_handles_sudo_wrapper() {
let cmd = r#"sudo git commit -m "Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("sudo git commit -m"));
}
#[test]
fn sanitize_handles_sudo_wrapper_with_path() {
let cmd = r#"/usr/bin/sudo git commit -m "Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("/usr/bin/sudo git commit -m"));
}
#[test]
fn sanitize_handles_sudo_u_wrapper() {
let cmd = r#"sudo -u root git commit -m "Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("sudo -u root git commit -m"));
}
#[test]
fn sanitize_handles_env_unset_wrapper() {
let cmd = r#"env -u FOO rg -n "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("env -u FOO rg -n"));
}
#[test]
fn sanitize_handles_env_unset_wrapper_with_path() {
let cmd = r#"/usr/bin/env -u FOO rg -n "rm -rf" src/main.rs"#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("/usr/bin/env -u FOO rg -n"));
}
#[test]
fn sanitize_masks_command_query_v() {
let cmd = r#"command -v "rm -rf""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("command -v"));
}
#[test]
fn sanitize_masks_command_query_v_combined() {
let cmd = r#"command -pv "rm -rf""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("command -pv"));
}
#[test]
fn sanitize_does_not_mask_command_p_wrapper() {
let cmd = r"command -p rm -rf /tmp";
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Borrowed(_)));
assert!(sanitized.as_ref().contains("rm -rf"));
}
#[test]
fn sanitize_handles_combined_short_flags_with_data_value() {
let cmd = r#"git commit -am "Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("git commit -am"));
}
#[test]
fn sanitize_handles_sudo_d_chdir_wrapper() {
let cmd = r#"sudo -D /tmp git commit -m "Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("sudo -D /tmp git commit -m"));
}
#[test]
fn sanitize_handles_sudo_r_role_wrapper() {
let cmd = r#"sudo -r myrole git commit -m "Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(sanitized.as_ref().contains("sudo -r myrole git commit -m"));
}
#[test]
fn sanitize_handles_sudo_chdir_long_wrapper() {
let cmd = r#"sudo --chdir=/tmp git commit -m "Fix rm -rf detection""#;
let sanitized = sanitize_for_pattern_matching(cmd);
assert!(matches!(sanitized, std::borrow::Cow::Owned(_)));
assert!(!sanitized.as_ref().contains("rm -rf"));
assert!(
sanitized
.as_ref()
.contains("sudo --chdir=/tmp git commit -m")
);
}
#[test]
fn test_regression_quoted_interpreter_identifies_inline_code() {
let cmd = r#""/usr/bin/python" -c "rm -rf /""#;
let spans = classify_command(cmd);
let code_span = spans
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
code_span.is_some(),
"Quoted interpreter path must still detect -c as InlineCode"
);
let cmd_simple = r#""python" -c "rm -rf /""#;
let spans_simple = classify_command(cmd_simple);
let code_span_simple = spans_simple
.spans()
.iter()
.find(|s| s.kind == SpanKind::InlineCode);
assert!(
code_span_simple.is_some(),
"Quoted interpreter name must still detect -c as InlineCode"
);
}
#[test]
fn test_registry_git_grep_flag() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("git", "--grep"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("git", "-g"));
}
#[test]
fn test_registry_ag_pattern_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("ag", "-e"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("ag", "--pattern"));
}
#[test]
fn test_registry_ack_pattern_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("ack", "-e"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("ack", "--pattern"));
}
#[test]
fn test_registry_curl_data_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("curl", "-d"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("curl", "--data"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("curl", "-H"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("curl", "--header"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("curl", "--data-raw"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("curl", "--data-binary"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("curl", "--url"));
}
#[test]
fn test_registry_jq_variable_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("jq", "--arg"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("jq", "--argjson"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("jq", "--slurpfile"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("jq", "-f"));
}
#[test]
fn test_registry_docker_label_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("docker", "-l"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("docker", "--label"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("docker", "--entrypoint"));
}
#[test]
fn test_registry_kubectl_annotation_label_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("kubectl", "--annotation"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("kubectl", "-l"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("kubectl", "--label"));
assert!(!SAFE_STRING_REGISTRY.is_flag_data("kubectl", "--command"));
}
#[test]
fn test_registry_xargs_placeholder_flag() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("xargs", "-I"));
assert!(!SAFE_STRING_REGISTRY.is_all_args_data("xargs"));
}
#[test]
fn test_registry_cargo_npm_message_flags() {
assert!(SAFE_STRING_REGISTRY.is_flag_data("cargo", "--message"));
assert!(SAFE_STRING_REGISTRY.is_flag_data("npm", "--message"));
}
#[test]
fn test_false_positive_curl_data() {
let cmd = r#"curl -d "rm -rf /" https://api.example.com"#;
assert!(SAFE_STRING_REGISTRY.is_flag_data("curl", "-d"));
let spans = classify_command(cmd);
let data_span = spans
.spans()
.iter()
.find(|s| s.text(cmd).contains("rm -rf"));
assert!(data_span.is_some());
assert_eq!(data_span.unwrap().kind, SpanKind::Argument);
}
#[test]
fn test_false_positive_ag_pattern() {
let cmd = r#"ag -e "rm -rf" src/"#;
assert!(SAFE_STRING_REGISTRY.is_flag_data("ag", "-e"));
let spans = classify_command(cmd);
let pattern_span = spans.spans().iter().find(|s| s.text(cmd) == "\"rm -rf\"");
assert!(pattern_span.is_some());
assert_eq!(pattern_span.unwrap().kind, SpanKind::Argument);
}
#[test]
fn test_false_positive_docker_label() {
let cmd = r#"docker run --label "cleanup=rm -rf /tmp" nginx"#;
assert!(SAFE_STRING_REGISTRY.is_flag_data("docker", "--label"));
let spans = classify_command(cmd);
let label_span = spans
.spans()
.iter()
.find(|s| s.text(cmd).contains("rm -rf"));
assert!(label_span.is_some());
assert_eq!(label_span.unwrap().kind, SpanKind::Argument);
}
}