use std::sync::LazyLock;
use regex::Regex;
use tracing;
const MAX_INPUT_BYTES: usize = 8192;
static RE_HEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\\x([0-9a-fA-F]{2})").expect("RE_HEX"));
static RE_OCT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\([0-7]{1,3})").expect("RE_OCT"));
static RE_UNI: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\\u([0-9a-fA-F]{4})").expect("RE_UNI"));
static RE_VAR_BRACE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}").expect("RE_VAR_BRACE"));
static RE_VAR_PLAIN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\$([A-Za-z_][A-Za-z0-9_]*)").expect("RE_VAR_PLAIN"));
static RE_BACKTICK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"`([^`]*)`").expect("RE_BACKTICK"));
static RE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").expect("RE_SPACE"));
#[must_use]
pub fn deobfuscate(command: &str) -> String {
let _span = tracing::info_span!("tools.deobfuscate.normalize").entered();
let input = if command.len() > MAX_INPUT_BYTES {
let boundary = command.floor_char_boundary(MAX_INPUT_BYTES);
&command[..boundary]
} else {
command
};
let s = RE_HEX.replace_all(input, |caps: ®ex::Captures<'_>| {
u8::from_str_radix(&caps[1], 16)
.ok()
.filter(u8::is_ascii)
.map_or_else(
|| caps[0].to_owned(),
|b| {
if b == b'\\' {
"[bs]".to_owned()
} else {
(b as char).to_string()
}
},
)
});
let s = RE_OCT.replace_all(&s, |caps: ®ex::Captures<'_>| {
u8::from_str_radix(&caps[1], 8)
.ok()
.filter(|&b| (0x20u8..=0x7E).contains(&b))
.map_or_else(|| caps[0].to_owned(), |b| (b as char).to_string())
});
let s = RE_UNI.replace_all(&s, |caps: ®ex::Captures<'_>| {
u32::from_str_radix(&caps[1], 16)
.ok()
.and_then(char::from_u32)
.map_or_else(|| caps[0].to_owned(), |c| c.to_string())
});
let s = s.replace("\\\n", "");
let s = RE_VAR_BRACE.replace_all(&s, "[var:$1]");
let s = RE_VAR_PLAIN.replace_all(&s, "[var:$1]");
let s = RE_BACKTICK.replace_all(&s, "[subshell: $1]");
let s = replace_dollar_subshells(&s);
let s = strip_concatenation_quotes(&s);
RE_SPACE.replace_all(&s, " ").trim().to_owned()
}
fn replace_dollar_subshells(s: &str) -> String {
let bytes = s.as_bytes();
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'$' && i + 1 < bytes.len() && bytes[i + 1] == b'(' {
let start = i + 2;
let mut depth = 1usize;
let mut j = start;
while j < bytes.len() && depth > 0 {
match bytes[j] {
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
j += 1;
}
let end = j.saturating_sub(1).min(s.len());
let inner = s[start..end].trim();
out.push_str("[subshell: ");
out.push_str(inner);
out.push(']');
i = j;
} else {
let ch = s[i..].chars().next().unwrap_or('\0');
out.push(ch);
i += ch.len_utf8();
}
}
out
}
fn strip_concatenation_quotes(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
match ch {
'\'' => {
for inner in chars.by_ref() {
if inner == '\'' {
break;
}
out.push(inner);
}
}
'"' => {
while let Some(inner) = chars.next() {
if inner == '"' {
break;
}
if inner == '\\' {
if let Some(escaped) = chars.next() {
out.push(escaped);
}
} else {
out.push(inner);
}
}
}
_ => out.push(ch),
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hex_escape_decoded() {
assert_eq!(deobfuscate(r"\x63url"), "curl");
assert_eq!(deobfuscate(r"\x41\x42\x43"), "ABC");
}
#[test]
fn octal_escape_decoded() {
assert_eq!(deobfuscate(r"\143at"), "cat");
assert_eq!(deobfuscate(r"\101"), "A");
}
#[test]
fn unicode_escape_decoded() {
assert_eq!(deobfuscate(r"curl"), "curl");
}
#[test]
fn variable_expansion_brace() {
assert_eq!(deobfuscate("${HOME}/file"), "[var:HOME]/file");
}
#[test]
fn variable_expansion_plain() {
assert_eq!(deobfuscate("echo $PATH"), "echo [var:PATH]");
}
#[test]
fn backtick_subshell() {
assert_eq!(deobfuscate("`whoami`"), "[subshell: whoami]");
}
#[test]
fn dollar_subshell_simple() {
assert_eq!(deobfuscate("$(whoami)"), "[subshell: whoami]");
}
#[test]
fn quote_concatenation_collapse() {
assert_eq!(deobfuscate("'cu'\"rl\""), "curl");
assert_eq!(deobfuscate("'ab'\"cd\"'ef'"), "abcdef");
}
#[test]
fn line_continuation() {
assert_eq!(deobfuscate("cu\\\nrl"), "curl");
}
#[test]
fn whitespace_normalized() {
assert_eq!(deobfuscate("echo hello"), "echo hello");
assert_eq!(deobfuscate(" ls "), "ls");
}
#[test]
fn long_input_truncated() {
let long = "a".repeat(MAX_INPUT_BYTES + 100);
let result = deobfuscate(&long);
assert!(result.len() <= MAX_INPUT_BYTES);
}
}