1mod c;
15mod cpp;
16mod go;
17mod java;
18mod javascript;
19mod php;
20mod python;
21pub(crate) mod ruby;
22mod rust;
23mod typescript;
24
25use bitflags::bitflags;
26use once_cell::sync::Lazy;
27use phf::Map;
28use serde::{Deserialize, Serialize};
29use smallvec::SmallVec;
30use std::collections::HashMap;
31
32#[derive(Debug, Clone, Copy)]
35pub struct LabelRule {
36 pub matchers: &'static [&'static str],
37 pub label: DataLabel,
38 pub case_sensitive: bool,
39}
40
41pub const ALL_ARGS_PAYLOAD: &[usize] = &[usize::MAX];
51
52#[derive(Debug, Clone, Copy)]
59pub enum GateActivation {
60 ValueMatch,
69 Destination {
91 object_destination_fields: &'static [&'static str],
92 },
93}
94
95#[derive(Debug, Clone, Copy)]
106pub struct SinkGate {
107 pub callee_matcher: &'static str,
108 pub arg_index: usize,
109 pub dangerous_values: &'static [&'static str],
110 pub dangerous_prefixes: &'static [&'static str],
111 pub label: DataLabel,
112 pub case_sensitive: bool,
113 pub payload_args: &'static [usize],
114 pub keyword_name: Option<&'static str>,
119 pub dangerous_kwargs: &'static [(&'static str, &'static [&'static str])],
134 pub activation: GateActivation,
138}
139
140bitflags! {
141 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
159 pub struct Cap: u16 {
160 const ENV_VAR = 0b0000_0000_0000_0001; const HTML_ESCAPE = 0b0000_0000_0000_0010; const SHELL_ESCAPE = 0b0000_0000_0000_0100; const URL_ENCODE = 0b0000_0000_0000_1000; const JSON_PARSE = 0b0000_0000_0001_0000; const FILE_IO = 0b0000_0000_0010_0000; const FMT_STRING = 0b0000_0000_0100_0000; const SQL_QUERY = 0b0000_0000_1000_0000; const DESERIALIZE = 0b0000_0001_0000_0000; const SSRF = 0b0000_0010_0000_0000; const CODE_EXEC = 0b0000_0100_0000_0000; const CRYPTO = 0b0000_1000_0000_0000; const UNAUTHORIZED_ID = 0b0001_0000_0000_0000; const DATA_EXFIL = 0b0010_0000_0000_0000; }
207}
208
209impl Default for Cap {
210 fn default() -> Self {
211 Cap::empty()
212 }
213}
214
215impl serde::Serialize for Cap {
216 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
217 s.serialize_u16(self.bits())
218 }
219}
220
221impl<'de> serde::Deserialize<'de> for Cap {
222 fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
223 let bits = u16::deserialize(d)?;
224 Ok(Cap::from_bits_truncate(bits))
225 }
226}
227
228#[derive(Debug, Clone, Copy, PartialEq, Eq)]
229pub enum Kind {
230 If,
231 InfiniteLoop,
232 While,
233 For,
234 CallFn,
235 CallMethod,
236 CallMacro,
237 Break,
238 Continue,
239 Return,
240 Block,
241 SourceFile,
242 Function,
243 Assignment,
244 CallWrapper,
245 Try,
246 Throw,
247 Switch,
253 Trivia,
254 Seq,
258 Other,
259}
260
261#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
262pub enum DataLabel {
263 Source(Cap),
264 Sanitizer(Cap),
265 Sink(Cap),
266}
267
268pub struct ParamConfig {
270 pub params_field: &'static str,
273 pub param_node_kinds: &'static [&'static str],
275 pub self_param_kinds: &'static [&'static str],
277 pub ident_fields: &'static [&'static str],
279}
280
281static DEFAULT_PARAM_CONFIG: ParamConfig = ParamConfig {
282 params_field: "parameters",
283 param_node_kinds: &["parameter", "identifier"],
284 self_param_kinds: &[],
285 ident_fields: &["name", "pattern"],
286};
287
288pub struct ArgPropagation {
291 pub callee: &'static str,
292 pub from_args: &'static [usize],
293 pub to_args: &'static [usize],
294}
295
296pub fn output_param_source_positions(lang: &str, callee: &str) -> Option<&'static [usize]> {
299 let registry: &[(&str, &[usize])] = match lang {
300 "c" => c::OUTPUT_PARAM_SOURCES,
301 "cpp" => cpp::OUTPUT_PARAM_SOURCES,
302 _ => return None,
303 };
304 let normalized = callee
305 .rsplit("::")
306 .next()
307 .unwrap_or(callee)
308 .rsplit('.')
309 .next()
310 .unwrap_or(callee);
311 registry
312 .iter()
313 .find(|(name, _)| name.eq_ignore_ascii_case(normalized))
314 .map(|(_, positions)| *positions)
315}
316
317pub fn arg_propagation(lang: &str, callee: &str) -> Option<&'static ArgPropagation> {
319 let registry: &[ArgPropagation] = match lang {
320 "c" => c::ARG_PROPAGATIONS,
321 "cpp" => cpp::ARG_PROPAGATIONS,
322 _ => return None,
323 };
324 let normalized = callee
325 .rsplit("::")
326 .next()
327 .unwrap_or(callee)
328 .rsplit('.')
329 .next()
330 .unwrap_or(callee);
331 registry
332 .iter()
333 .find(|p| p.callee.eq_ignore_ascii_case(normalized))
334}
335
336static REGISTRY: Lazy<HashMap<&'static str, &'static [LabelRule]>> = Lazy::new(|| {
337 let mut m = HashMap::new();
338 m.insert("rust", rust::RULES);
339 m.insert("rs", rust::RULES);
340
341 m.insert("javascript", javascript::RULES);
342 m.insert("js", javascript::RULES);
343
344 m.insert("typescript", typescript::RULES);
345 m.insert("ts", typescript::RULES);
346
347 m.insert("python", python::RULES);
348 m.insert("py", python::RULES);
349
350 m.insert("go", go::RULES);
351
352 m.insert("java", java::RULES);
353
354 m.insert("c", c::RULES);
355
356 m.insert("cpp", cpp::RULES);
357 m.insert("c++", cpp::RULES);
358
359 m.insert("php", php::RULES);
360
361 m.insert("ruby", ruby::RULES);
362 m.insert("rb", ruby::RULES);
363
364 m
365});
366
367static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::new(|| {
368 let mut m = HashMap::new();
369 m.insert("javascript", javascript::GATED_SINKS);
370 m.insert("js", javascript::GATED_SINKS);
371 m.insert("typescript", typescript::GATED_SINKS);
372 m.insert("ts", typescript::GATED_SINKS);
373 m.insert("python", python::GATED_SINKS);
374 m.insert("py", python::GATED_SINKS);
375 m.insert("go", go::GATED_SINKS);
376 m.insert("php", php::GATED_SINKS);
377 m.insert("c", c::GATED_SINKS);
378 m.insert("cpp", cpp::GATED_SINKS);
379 m.insert("c++", cpp::GATED_SINKS);
380 m
381});
382
383static EXCLUDES: Lazy<HashMap<&'static str, &'static [&'static str]>> = Lazy::new(|| {
385 let mut m = HashMap::new();
386 m.insert("javascript", javascript::EXCLUDES);
387 m.insert("js", javascript::EXCLUDES);
388 m.insert("typescript", typescript::EXCLUDES);
389 m.insert("ts", typescript::EXCLUDES);
390 m
391});
392
393pub(crate) fn is_excluded(lang: &str, trimmed: &[u8]) -> bool {
395 let excludes = match EXCLUDES.get(lang).or_else(|| {
396 let key = lang.to_ascii_lowercase();
397 EXCLUDES.get(key.as_str())
398 }) {
399 Some(e) => *e,
400 None => return false,
401 };
402 for &pat in excludes {
403 if match_suffix_cs(trimmed, pat.as_bytes(), false) {
404 return true;
405 }
406 }
407 false
408}
409
410type FastMap = &'static Map<&'static str, Kind>;
411
412pub(crate) static CLASSIFIERS: Lazy<HashMap<&'static str, FastMap>> = Lazy::new(|| {
413 let mut m = HashMap::new();
414 m.insert("rust", &rust::KINDS);
415 m.insert("rs", &rust::KINDS);
416
417 m.insert("javascript", &javascript::KINDS);
418 m.insert("js", &javascript::KINDS);
419
420 m.insert("typescript", &typescript::KINDS);
421 m.insert("ts", &typescript::KINDS);
422
423 m.insert("python", &python::KINDS);
424 m.insert("py", &python::KINDS);
425
426 m.insert("go", &go::KINDS);
427
428 m.insert("java", &java::KINDS);
429
430 m.insert("c", &c::KINDS);
431
432 m.insert("cpp", &cpp::KINDS);
433 m.insert("c++", &cpp::KINDS);
434
435 m.insert("php", &php::KINDS);
436
437 m.insert("ruby", &ruby::KINDS);
438 m.insert("rb", &ruby::KINDS);
439
440 m
441});
442
443static PARAM_CONFIGS: Lazy<HashMap<&'static str, &'static ParamConfig>> = Lazy::new(|| {
444 let mut m = HashMap::new();
445 m.insert("rust", &rust::PARAM_CONFIG);
446 m.insert("rs", &rust::PARAM_CONFIG);
447
448 m.insert("javascript", &javascript::PARAM_CONFIG);
449 m.insert("js", &javascript::PARAM_CONFIG);
450
451 m.insert("typescript", &typescript::PARAM_CONFIG);
452 m.insert("ts", &typescript::PARAM_CONFIG);
453
454 m.insert("python", &python::PARAM_CONFIG);
455 m.insert("py", &python::PARAM_CONFIG);
456
457 m.insert("go", &go::PARAM_CONFIG);
458
459 m.insert("java", &java::PARAM_CONFIG);
460
461 m.insert("c", &c::PARAM_CONFIG);
462
463 m.insert("cpp", &cpp::PARAM_CONFIG);
464 m.insert("c++", &cpp::PARAM_CONFIG);
465
466 m.insert("php", &php::PARAM_CONFIG);
467
468 m.insert("ruby", &ruby::PARAM_CONFIG);
469 m.insert("rb", &ruby::PARAM_CONFIG);
470
471 m
472});
473
474pub fn param_config(lang: &str) -> &'static ParamConfig {
476 PARAM_CONFIGS
477 .get(lang)
478 .copied()
479 .unwrap_or(&DEFAULT_PARAM_CONFIG)
480}
481
482const JS_TS_HANDLER_PARAM_NAMES: &[&str] = &["userinput", "userid", "payload", "cmd", "input"];
489
490pub fn is_js_ts_handler_param_name(name: &str) -> bool {
498 if name.is_empty() || !name.is_ascii() {
499 return false;
500 }
501 if JS_TS_HANDLER_PARAM_NAMES
502 .iter()
503 .any(|candidate| candidate.eq_ignore_ascii_case(name))
504 {
505 return true;
506 }
507 let bytes = name.as_bytes();
510 if bytes.len() >= 5
511 && bytes[..4].eq_ignore_ascii_case(b"user")
512 && (bytes[4].is_ascii_uppercase() || bytes[4] == b'_')
513 {
514 return true;
515 }
516 false
517}
518
519#[inline(always)]
520pub fn lookup(lang: &str, raw: &str) -> Kind {
521 CLASSIFIERS
522 .get(lang)
523 .and_then(|m| m.get(raw).copied())
524 .unwrap_or(Kind::Other)
525}
526
527#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
529#[serde(rename_all = "snake_case")]
530pub enum SourceKind {
531 UserInput,
533 Cookie,
535 Header,
537 EnvironmentConfig,
539 FileSystem,
541 Database,
543 CaughtException,
545 Unknown,
547}
548
549#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
562pub enum Sensitivity {
563 Plain,
567 Sensitive,
570 Secret,
575}
576
577impl SourceKind {
578 pub fn sensitivity(self) -> Sensitivity {
581 match self {
582 SourceKind::UserInput => Sensitivity::Plain,
586 SourceKind::Cookie
589 | SourceKind::Header
590 | SourceKind::EnvironmentConfig
591 | SourceKind::FileSystem
592 | SourceKind::Database => Sensitivity::Sensitive,
593 SourceKind::CaughtException => Sensitivity::Sensitive,
596 SourceKind::Unknown => Sensitivity::Sensitive,
599 }
600 }
601}
602
603pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
605 let cl = callee.to_ascii_lowercase();
606
607 if cl.contains("cookie") || cl.contains("session") {
618 return SourceKind::Cookie;
619 }
620 if cl.contains("header") {
621 return SourceKind::Header;
622 }
623
624 if cl.contains("argv")
626 || cl.contains("stdin")
627 || cl.contains("request")
628 || cl.contains("form")
629 || cl.contains("query")
630 || cl.contains("params")
631 || cl.contains("param")
632 || cl.contains("input")
633 || cl.contains("body")
634 || cl.contains("location")
635 || cl.contains("document.url")
636 || cl.contains("document.referrer")
637 || cl == "$_get"
646 || cl == "$_post"
647 || cl == "$_files"
648 || cl == "_get"
649 || cl == "_post"
650 || cl == "_files"
651 {
652 return SourceKind::UserInput;
653 }
654
655 if cl.contains("env")
657 || cl.contains("getenv")
658 || cl.contains("environ")
659 || cl.contains("config")
660 {
661 return SourceKind::EnvironmentConfig;
662 }
663
664 if cl.contains("read") || cl.contains("fopen") || cl.contains("open") {
666 if caps.contains(Cap::FILE_IO) {
668 return SourceKind::FileSystem;
669 }
670 }
671
672 if cl.contains("fetchone")
674 || cl.contains("fetchall")
675 || cl.contains("fetch_row")
676 || cl.contains("query")
677 || cl.contains("execute")
678 {
679 return SourceKind::Database;
681 }
682
683 SourceKind::Unknown
684}
685
686pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
688 match kind {
689 SourceKind::UserInput => crate::patterns::Severity::High,
690 SourceKind::Cookie => crate::patterns::Severity::High,
691 SourceKind::Header => crate::patterns::Severity::High,
692 SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
693 SourceKind::FileSystem => crate::patterns::Severity::Medium,
694 SourceKind::Database => crate::patterns::Severity::Medium,
695 SourceKind::CaughtException => crate::patterns::Severity::Medium,
696 SourceKind::Unknown => crate::patterns::Severity::High,
697 }
698}
699
700#[derive(Debug, Clone)]
702pub struct RuntimeLabelRule {
703 pub matchers: Vec<String>,
704 pub label: DataLabel,
705 pub case_sensitive: bool,
706}
707
708#[allow(dead_code)]
712pub fn parse_cap(s: &str) -> Option<Cap> {
713 match s.to_ascii_lowercase().as_str() {
714 "env_var" => Some(Cap::ENV_VAR),
715 "html_escape" => Some(Cap::HTML_ESCAPE),
716 "shell_escape" => Some(Cap::SHELL_ESCAPE),
717 "url_encode" => Some(Cap::URL_ENCODE),
718 "json_parse" => Some(Cap::JSON_PARSE),
719 "file_io" => Some(Cap::FILE_IO),
720 "fmt_string" => Some(Cap::FMT_STRING),
721 "sql_query" => Some(Cap::SQL_QUERY),
722 "deserialize" => Some(Cap::DESERIALIZE),
723 "ssrf" => Some(Cap::SSRF),
724 "code_exec" => Some(Cap::CODE_EXEC),
725 "crypto" => Some(Cap::CRYPTO),
726 "unauthorized_id" => Some(Cap::UNAUTHORIZED_ID),
727 "data_exfil" | "data_exfiltration" => Some(Cap::DATA_EXFIL),
728 "all" => Some(Cap::all()),
729 _ => None,
730 }
731}
732
733#[derive(Debug, Clone, Default)]
736pub struct LangAnalysisRules {
737 pub extra_labels: Vec<RuntimeLabelRule>,
738 pub terminators: Vec<String>,
739 pub event_handlers: Vec<String>,
740 pub frameworks: Vec<crate::utils::project::DetectedFramework>,
741}
742
743pub fn build_lang_rules(
745 config: &crate::utils::config::Config,
746 lang_slug: &str,
747) -> LangAnalysisRules {
748 let mut extra_labels: Vec<RuntimeLabelRule> = Vec::new();
749 let mut terminators = Vec::new();
750 let mut event_handlers = Vec::new();
751
752 if let Some(lang_cfg) = config.analysis.languages.get(lang_slug) {
753 extra_labels.extend(lang_cfg.rules.iter().map(|r| {
754 use crate::utils::config::RuleKind;
755 let cap = r.cap.to_cap();
756 let label = match r.kind {
757 RuleKind::Source => DataLabel::Source(cap),
758 RuleKind::Sanitizer => DataLabel::Sanitizer(cap),
759 RuleKind::Sink => DataLabel::Sink(cap),
760 };
761 RuntimeLabelRule {
762 matchers: r.matchers.clone(),
763 label,
764 case_sensitive: r.case_sensitive,
765 }
766 }));
767 terminators = lang_cfg.terminators.clone();
768 event_handlers = lang_cfg.event_handlers.clone();
769 }
770
771 let frameworks = if let Some(ref fw_ctx) = config.framework_ctx {
773 extra_labels.extend(framework_rules_for_lang(lang_slug, fw_ctx));
774 fw_ctx.frameworks.clone()
775 } else {
776 Vec::new()
777 };
778
779 if config.scanner.enable_auth_as_taint {
783 extra_labels.extend(phase_c_auth_rules_for_lang(lang_slug));
784 }
785
786 LangAnalysisRules {
787 extra_labels,
788 terminators,
789 event_handlers,
790 frameworks,
791 }
792}
793
794fn phase_c_auth_rules_for_lang(lang_slug: &str) -> Vec<RuntimeLabelRule> {
796 match lang_slug {
797 "rust" | "rs" => rust::phase_c_auth_rules(),
798 _ => Vec::new(),
799 }
800}
801
802pub fn framework_rules_for_lang_pub(
806 lang_slug: &str,
807 ctx: &crate::utils::project::FrameworkContext,
808) -> Vec<RuntimeLabelRule> {
809 framework_rules_for_lang(lang_slug, ctx)
810}
811
812fn framework_rules_for_lang(
814 lang_slug: &str,
815 ctx: &crate::utils::project::FrameworkContext,
816) -> Vec<RuntimeLabelRule> {
817 match lang_slug {
818 "go" => go::framework_rules(ctx),
819 "ruby" | "rb" => ruby::framework_rules(ctx),
820 "java" => java::framework_rules(ctx),
821 "php" => php::framework_rules(ctx),
822 "python" | "py" => python::framework_rules(ctx),
823 "rust" | "rs" => rust::framework_rules(ctx),
824 "javascript" | "js" => javascript::framework_rules(ctx),
825 "typescript" | "ts" => typescript::framework_rules(ctx),
826 _ => Vec::new(),
827 }
828}
829
830#[inline]
832fn ends_with_cs(haystack: &[u8], needle: &[u8], case_sensitive: bool) -> bool {
833 if needle.len() > haystack.len() {
834 return false;
835 }
836 let start = haystack.len() - needle.len();
837 if case_sensitive {
838 haystack[start..] == *needle
839 } else {
840 haystack[start..]
841 .iter()
842 .zip(needle)
843 .all(|(h, n)| h.eq_ignore_ascii_case(n))
844 }
845}
846
847#[inline]
852fn starts_with_cs(haystack: &[u8], needle: &[u8], case_sensitive: bool) -> bool {
853 let (needle, _) = unpack_matcher(needle);
854 if needle.len() > haystack.len() {
855 return false;
856 }
857 if case_sensitive {
858 haystack[..needle.len()] == *needle
859 } else {
860 haystack[..needle.len()]
861 .iter()
862 .zip(needle)
863 .all(|(h, n)| h.eq_ignore_ascii_case(n))
864 }
865}
866
867#[inline]
869fn match_suffix_cs(text: &[u8], matcher: &[u8], case_sensitive: bool) -> bool {
870 let (m, exact_only) = unpack_matcher(matcher);
871 if ends_with_cs(text, m, case_sensitive) {
872 let start = text.len() - m.len();
873 if exact_only {
874 start == 0
879 } else {
880 start == 0 || matches!(text[start - 1], b'.' | b':')
881 }
882 } else {
883 false
884 }
885}
886
887#[inline]
893fn unpack_matcher(matcher: &[u8]) -> (&[u8], bool) {
894 if matcher.first() == Some(&b'=') {
895 (&matcher[1..], true)
896 } else {
897 (matcher, false)
898 }
899}
900
901pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> Option<DataLabel> {
912 let head = text.split(['(', '<']).next().unwrap_or("");
913 let trimmed = head.trim().as_bytes();
914
915 if is_excluded(lang, trimmed) {
917 return None;
918 }
919
920 let full_normalized = normalize_chained_call(text);
923 let full_norm_bytes = full_normalized.as_bytes();
924
925 if let Some(extras) = extra {
927 for rule in extras {
929 for raw in &rule.matchers {
930 let m = raw.as_bytes();
931 if m.last() == Some(&b'_') {
932 continue;
933 }
934 if match_suffix_cs(trimmed, m, rule.case_sensitive)
935 || match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
936 {
937 return Some(rule.label);
938 }
939 }
940 }
941 for rule in extras {
943 for raw in &rule.matchers {
944 let m = raw.as_bytes();
945 if m.last() == Some(&b'_')
946 && (starts_with_cs(trimmed, m, rule.case_sensitive)
947 || starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
948 {
949 return Some(rule.label);
950 }
951 }
952 }
953 }
954
955 let rules = REGISTRY.get(lang).or_else(|| {
957 let key = lang.to_ascii_lowercase();
958 REGISTRY.get(key.as_str())
959 })?;
960
961 for rule in *rules {
963 for raw in rule.matchers {
964 let m = raw.as_bytes();
965 if m.last() == Some(&b'_') {
966 continue;
967 }
968 if match_suffix_cs(trimmed, m, rule.case_sensitive)
969 || match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
970 {
971 return Some(rule.label);
972 }
973 }
974 }
975
976 for rule in *rules {
978 for raw in rule.matchers {
979 let m = raw.as_bytes();
980 if m.last() == Some(&b'_')
981 && (starts_with_cs(trimmed, m, rule.case_sensitive)
982 || starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
983 {
984 return Some(rule.label);
985 }
986 }
987 }
988
989 None
990}
991
992pub fn classify_all(
998 lang: &str,
999 text: &str,
1000 extra: Option<&[RuntimeLabelRule]>,
1001) -> SmallVec<[DataLabel; 2]> {
1002 let head = text.split(['(', '<']).next().unwrap_or("");
1003 let trimmed = head.trim().as_bytes();
1004
1005 if is_excluded(lang, trimmed) {
1007 return SmallVec::new();
1008 }
1009
1010 let full_normalized = normalize_chained_call(text);
1011 let full_norm_bytes = full_normalized.as_bytes();
1012
1013 let mut out: SmallVec<[DataLabel; 2]> = SmallVec::new();
1014
1015 #[inline]
1017 fn push_dedup(out: &mut SmallVec<[DataLabel; 2]>, label: DataLabel) {
1018 if !out.contains(&label) {
1019 out.push(label);
1020 }
1021 }
1022
1023 if let Some(extras) = extra {
1025 for rule in extras {
1027 for raw in &rule.matchers {
1028 let m = raw.as_bytes();
1029 if m.last() == Some(&b'_') {
1030 continue;
1031 }
1032 if match_suffix_cs(trimmed, m, rule.case_sensitive)
1033 || match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
1034 {
1035 push_dedup(&mut out, rule.label);
1036 }
1037 }
1038 }
1039 for rule in extras {
1041 for raw in &rule.matchers {
1042 let m = raw.as_bytes();
1043 if m.last() == Some(&b'_')
1044 && (starts_with_cs(trimmed, m, rule.case_sensitive)
1045 || starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
1046 {
1047 push_dedup(&mut out, rule.label);
1048 }
1049 }
1050 }
1051 }
1052
1053 let rules = REGISTRY.get(lang).or_else(|| {
1055 let key = lang.to_ascii_lowercase();
1056 REGISTRY.get(key.as_str())
1057 });
1058
1059 if let Some(rules) = rules {
1060 for rule in *rules {
1062 for raw in rule.matchers {
1063 let m = raw.as_bytes();
1064 if m.last() == Some(&b'_') {
1065 continue;
1066 }
1067 if match_suffix_cs(trimmed, m, rule.case_sensitive)
1068 || match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
1069 {
1070 push_dedup(&mut out, rule.label);
1071 }
1072 }
1073 }
1074
1075 for rule in *rules {
1077 for raw in rule.matchers {
1078 let m = raw.as_bytes();
1079 if m.last() == Some(&b'_')
1080 && (starts_with_cs(trimmed, m, rule.case_sensitive)
1081 || starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
1082 {
1083 push_dedup(&mut out, rule.label);
1084 }
1085 }
1086 }
1087 }
1088
1089 out
1090}
1091
1092#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1102pub struct GateMatch {
1103 pub label: DataLabel,
1104 pub payload_args: &'static [usize],
1105 pub object_destination_fields: &'static [&'static str],
1106}
1107
1108pub fn classify_gated_sink(
1121 lang: &str,
1122 callee_text: &str,
1123 const_arg_at: impl Fn(usize) -> Option<String>,
1124 const_keyword_arg: impl Fn(&str) -> Option<String>,
1125 kwarg_present: impl Fn(&str) -> bool,
1126) -> SmallVec<[GateMatch; 2]> {
1127 let mut out: SmallVec<[GateMatch; 2]> = SmallVec::new();
1128 let gates = match GATED_REGISTRY.get(lang).or_else(|| {
1129 let key = lang.to_ascii_lowercase();
1130 GATED_REGISTRY.get(key.as_str())
1131 }) {
1132 Some(g) => g,
1133 None => return out,
1134 };
1135
1136 let callee_bytes = callee_text.as_bytes();
1142 let normalized = normalize_chained_call(callee_text);
1143 let normalized_bytes = normalized.as_bytes();
1144
1145 for gate in *gates {
1146 let matcher = gate.callee_matcher.as_bytes();
1147 if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive)
1148 && !match_suffix_cs(normalized_bytes, matcher, gate.case_sensitive)
1149 {
1150 continue;
1151 }
1152
1153 if let GateActivation::Destination {
1157 object_destination_fields,
1158 } = gate.activation
1159 {
1160 out.push(GateMatch {
1161 label: gate.label,
1162 payload_args: gate.payload_args,
1163 object_destination_fields,
1164 });
1165 continue;
1166 }
1167
1168 if !gate.dangerous_kwargs.is_empty() && gate.keyword_name.is_none() {
1175 let mut any_dangerous = false;
1176 let mut any_dynamic_present = false;
1177 for (name, values) in gate.dangerous_kwargs {
1178 if !kwarg_present(name) {
1179 continue; }
1181 match const_keyword_arg(name) {
1182 Some(v) => {
1183 let lower = v.to_ascii_lowercase();
1184 if values.iter().any(|dv| lower == dv.to_ascii_lowercase()) {
1185 any_dangerous = true;
1186 break;
1187 }
1188 }
1190 None => {
1191 any_dynamic_present = true;
1192 }
1193 }
1194 }
1195 if any_dangerous {
1196 out.push(GateMatch {
1197 label: gate.label,
1198 payload_args: gate.payload_args,
1199 object_destination_fields: &[],
1200 });
1201 continue;
1202 }
1203 if any_dynamic_present {
1204 out.push(GateMatch {
1208 label: gate.label,
1209 payload_args: ALL_ARGS_PAYLOAD,
1210 object_destination_fields: &[],
1211 });
1212 continue;
1213 }
1214 continue; }
1216
1217 let activation_value = if let Some(kw) = gate.keyword_name {
1219 const_keyword_arg(kw)
1220 } else {
1221 const_arg_at(gate.arg_index)
1222 };
1223
1224 match activation_value {
1225 Some(value) => {
1226 let lower = value.to_ascii_lowercase();
1227 let is_dangerous = gate
1228 .dangerous_values
1229 .iter()
1230 .any(|v| lower == v.to_ascii_lowercase())
1231 || gate
1232 .dangerous_prefixes
1233 .iter()
1234 .any(|p| lower.starts_with(&p.to_ascii_lowercase()));
1235 if is_dangerous {
1236 out.push(GateMatch {
1237 label: gate.label,
1238 payload_args: gate.payload_args,
1239 object_destination_fields: &[],
1240 });
1241 }
1242 }
1244 None => {
1251 out.push(GateMatch {
1252 label: gate.label,
1253 payload_args: ALL_ARGS_PAYLOAD,
1254 object_destination_fields: &[],
1255 });
1256 }
1257 }
1258 }
1259 out
1260}
1261
1262pub fn normalize_chained_call_for_classify(text: &str) -> String {
1265 normalize_chained_call(text)
1266}
1267
1268pub fn bare_method_name(callee: &str) -> &str {
1274 callee.rsplit('.').next().unwrap_or(callee)
1275}
1276
1277fn normalize_chained_call(text: &str) -> String {
1281 let mut result = String::with_capacity(text.len());
1282 let bytes = text.as_bytes();
1283 let mut i = 0;
1284 while i < bytes.len() {
1285 match bytes[i] {
1286 b'(' => {
1287 let mut depth = 1u32;
1290 let mut j = i + 1;
1291 while j < bytes.len() && depth > 0 {
1292 if bytes[j] == b'(' {
1293 depth += 1;
1294 } else if bytes[j] == b')' {
1295 depth -= 1;
1296 }
1297 j += 1;
1298 }
1299 if j >= bytes.len() || bytes[j] == b'.' {
1301 i = j;
1302 } else {
1303 result.push('(');
1305 i += 1;
1306 }
1307 }
1308 b'<' => break, _ => {
1310 result.push(bytes[i] as char);
1311 i += 1;
1312 }
1313 }
1314 }
1315 result
1316}
1317
1318const CANONICAL_LANGS: &[&str] = &[
1322 "javascript",
1323 "typescript",
1324 "python",
1325 "go",
1326 "java",
1327 "c",
1328 "cpp",
1329 "php",
1330 "ruby",
1331 "rust",
1332];
1333
1334pub fn canonical_lang(slug: &str) -> &str {
1336 match slug {
1338 "javascript" | "js" => "javascript",
1339 "typescript" | "ts" => "typescript",
1340 "python" | "py" => "python",
1341 "go" => "go",
1342 "java" => "java",
1343 "c" => "c",
1344 "cpp" | "c++" => "cpp",
1345 "php" => "php",
1346 "ruby" | "rb" => "ruby",
1347 "rust" | "rs" => "rust",
1348 _ => slug,
1350 }
1351}
1352
1353pub fn cap_to_name(cap: Cap) -> &'static str {
1355 if cap == Cap::all() {
1356 return "all";
1357 }
1358 match cap {
1359 Cap::ENV_VAR => "env_var",
1360 Cap::HTML_ESCAPE => "html_escape",
1361 Cap::SHELL_ESCAPE => "shell_escape",
1362 Cap::URL_ENCODE => "url_encode",
1363 Cap::JSON_PARSE => "json_parse",
1364 Cap::FILE_IO => "file_io",
1365 Cap::FMT_STRING => "fmt_string",
1366 Cap::SQL_QUERY => "sql_query",
1367 Cap::DESERIALIZE => "deserialize",
1368 Cap::SSRF => "ssrf",
1369 Cap::CODE_EXEC => "code_exec",
1370 Cap::CRYPTO => "crypto",
1371 Cap::UNAUTHORIZED_ID => "unauthorized_id",
1372 _ => "unknown",
1373 }
1374}
1375
1376pub fn rule_id(lang: &str, kind: &str, matchers: &[&str]) -> String {
1378 let mut sorted: Vec<&str> = matchers.to_vec();
1379 sorted.sort_unstable();
1380 let joined = sorted.join("\0");
1381 let hash = blake3::hash(joined.as_bytes());
1382 let hex = hash.to_hex();
1383 format!("{}.{}.{}", lang, kind, &hex[..8])
1384}
1385
1386#[derive(Debug, Clone, Serialize)]
1388pub struct RuleInfo {
1389 pub id: String,
1390 pub title: String,
1391 pub language: String,
1392 pub kind: String,
1393 pub cap: String,
1394 pub cap_bits: u16,
1395 pub matchers: Vec<String>,
1396 pub case_sensitive: bool,
1397 pub is_custom: bool,
1398 pub is_gated: bool,
1399 pub enabled: bool,
1400}
1401
1402pub fn enumerate_builtin_rules() -> Vec<RuleInfo> {
1404 let mut out = Vec::new();
1405
1406 for &lang in CANONICAL_LANGS {
1407 if let Some(rules) = REGISTRY.get(lang) {
1408 for rule in *rules {
1409 let (kind_str, cap) = match rule.label {
1410 DataLabel::Source(c) => ("source", c),
1411 DataLabel::Sanitizer(c) => ("sanitizer", c),
1412 DataLabel::Sink(c) => ("sink", c),
1413 };
1414 let matchers_strs: Vec<&str> = rule.matchers.to_vec();
1415 let id = rule_id(lang, kind_str, &matchers_strs);
1416 let first = rule.matchers.first().copied().unwrap_or("?");
1417 let title = format!("{} ({})", first, kind_str);
1418 out.push(RuleInfo {
1419 id,
1420 title,
1421 language: lang.to_string(),
1422 kind: kind_str.to_string(),
1423 cap: cap_to_name(cap).to_string(),
1424 cap_bits: cap.bits(),
1425 matchers: rule.matchers.iter().map(|s| s.to_string()).collect(),
1426 case_sensitive: rule.case_sensitive,
1427 is_custom: false,
1428 is_gated: false,
1429 enabled: true,
1430 });
1431 }
1432 }
1433
1434 if let Some(gates) = GATED_REGISTRY.get(lang) {
1436 for gate in *gates {
1437 let cap = match gate.label {
1438 DataLabel::Source(c) | DataLabel::Sanitizer(c) | DataLabel::Sink(c) => c,
1439 };
1440 let kind_str = "sink";
1441 let matchers_strs = &[gate.callee_matcher];
1442 let id = rule_id(lang, &format!("gated_{}", kind_str), matchers_strs);
1443 let title = format!("{} (gated {})", gate.callee_matcher, kind_str);
1444 out.push(RuleInfo {
1445 id,
1446 title,
1447 language: lang.to_string(),
1448 kind: kind_str.to_string(),
1449 cap: cap_to_name(cap).to_string(),
1450 cap_bits: cap.bits(),
1451 matchers: vec![gate.callee_matcher.to_string()],
1452 case_sensitive: gate.case_sensitive,
1453 is_custom: false,
1454 is_gated: true,
1455 enabled: true,
1456 });
1457 }
1458 }
1459 }
1460
1461 out
1462}
1463
1464pub fn custom_rule_id(lang: &str, kind: &str, matchers: &[String]) -> String {
1466 let refs: Vec<&str> = matchers.iter().map(|s| s.as_str()).collect();
1467 format!("custom.{}", rule_id(lang, kind, &refs))
1468}
1469
1470#[cfg(test)]
1471mod tests {
1472 use super::*;
1473
1474 #[test]
1475 fn bare_method_name_strips_chain() {
1476 assert_eq!(bare_method_name("foo"), "foo");
1478 assert_eq!(bare_method_name("obj.method"), "method");
1480 assert_eq!(bare_method_name("a.b.c.method"), "method");
1482 assert_eq!(bare_method_name("foo."), "");
1484 assert_eq!(bare_method_name(""), "");
1486 assert_eq!(bare_method_name("Lock"), "Lock");
1488 }
1489
1490 #[test]
1491 fn handler_param_names_exact_and_prefix() {
1492 assert!(is_js_ts_handler_param_name("cmd"));
1494 assert!(is_js_ts_handler_param_name("input"));
1495 assert!(is_js_ts_handler_param_name("userId"));
1496 assert!(is_js_ts_handler_param_name("USERID"));
1497 assert!(is_js_ts_handler_param_name("userCmd"));
1499 assert!(is_js_ts_handler_param_name("userData"));
1500 assert!(is_js_ts_handler_param_name("userPath"));
1501 assert!(is_js_ts_handler_param_name("user_cmd"));
1503 assert!(!is_js_ts_handler_param_name("user"));
1505 assert!(!is_js_ts_handler_param_name("userx"));
1506 assert!(!is_js_ts_handler_param_name("url"));
1508 assert!(!is_js_ts_handler_param_name("value"));
1509 }
1510
1511 #[test]
1512 fn classify_none_extra_unchanged() {
1513 let result = classify("javascript", "innerHTML", None);
1515 assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
1516
1517 let result = classify("javascript", "myCustomFunc", None);
1519 assert_eq!(result, None);
1520 }
1521
1522 #[test]
1523 fn classify_extra_rules_take_priority() {
1524 let extras = vec![RuntimeLabelRule {
1525 matchers: vec!["escapeHtml".into()],
1526 label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
1527 case_sensitive: false,
1528 }];
1529
1530 let result = classify("javascript", "escapeHtml", Some(&extras));
1531 assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
1532
1533 let result = classify("javascript", "innerHTML", Some(&extras));
1535 assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
1536 }
1537
1538 #[test]
1539 fn classify_extra_overrides_builtin() {
1540 let extras = vec![RuntimeLabelRule {
1542 matchers: vec!["innerHTML".into()],
1543 label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
1544 case_sensitive: false,
1545 }];
1546
1547 let result = classify("javascript", "innerHTML", Some(&extras));
1548 assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
1549 }
1550
1551 #[test]
1552 fn classify_location_href_is_sink() {
1553 let result = classify("javascript", "location.href", None);
1554 assert_eq!(result, Some(DataLabel::Sink(Cap::URL_ENCODE)));
1555 }
1556
1557 #[test]
1558 fn classify_bare_href_is_none() {
1559 let result = classify("javascript", "href", None);
1561 assert_eq!(result, None);
1562 }
1563
1564 #[test]
1565 fn classify_case_insensitive_is_default() {
1566 let extras = vec![RuntimeLabelRule {
1567 matchers: vec!["myCustomSink".into()],
1568 label: DataLabel::Sink(Cap::HTML_ESCAPE),
1569 case_sensitive: false,
1570 }];
1571 let result = classify("javascript", "MYCUSTOMSINK", Some(&extras));
1573 assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
1574 }
1575
1576 #[test]
1577 fn classify_case_sensitive_exact_match() {
1578 let extras = vec![RuntimeLabelRule {
1579 matchers: vec!["MyExactSink".into()],
1580 label: DataLabel::Sink(Cap::HTML_ESCAPE),
1581 case_sensitive: true,
1582 }];
1583 let result = classify("javascript", "MyExactSink", Some(&extras));
1585 assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
1586 let result = classify("javascript", "myexactsink", Some(&extras));
1588 assert_eq!(result, None);
1589 }
1590
1591 #[test]
1592 fn classify_case_sensitive_prefix() {
1593 let extras = vec![RuntimeLabelRule {
1594 matchers: vec!["Sanitize_".into()],
1595 label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
1596 case_sensitive: true,
1597 }];
1598 let result = classify("javascript", "Sanitize_input", Some(&extras));
1600 assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
1601 let result = classify("javascript", "sanitize_input", Some(&extras));
1603 assert_eq!(result, None);
1604 }
1605
1606 #[test]
1612 fn classify_go_os_remove_is_file_io_sink() {
1613 let result = classify("go", "os.Remove", None);
1614 assert_eq!(result, Some(DataLabel::Sink(Cap::FILE_IO)));
1615 }
1616
1617 #[test]
1618 fn classify_go_os_write_file_is_file_io_sink() {
1619 let result = classify("go", "os.WriteFile", None);
1620 assert_eq!(result, Some(DataLabel::Sink(Cap::FILE_IO)));
1621 }
1622
1623 #[test]
1624 fn classify_go_os_remove_all_is_file_io_sink() {
1625 let result = classify("go", "os.RemoveAll", None);
1626 assert_eq!(result, Some(DataLabel::Sink(Cap::FILE_IO)));
1627 }
1628
1629 #[test]
1636 fn classify_go_goqu_l_is_sql_query_sink() {
1637 let result = classify("go", "goqu.L", None);
1638 assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
1639 }
1640
1641 #[test]
1642 fn classify_go_goqu_lit_is_sql_query_sink() {
1643 let result = classify("go", "goqu.Lit", None);
1644 assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
1645 }
1646
1647 #[test]
1648 fn classify_go_goqu_i_is_not_sink() {
1649 let result = classify("go", "goqu.I", None);
1650 assert_eq!(result, None);
1651 }
1652
1653 #[test]
1663 fn classify_go_http_default_client_get_is_ssrf_gate() {
1664 let no_kw = |_: &str| None;
1665 let no_kw_present = |_: &str| false;
1666 let result = classify_gated_sink(
1667 "go",
1668 "http.DefaultClient.Get",
1669 |_| None,
1670 no_kw,
1671 no_kw_present,
1672 );
1673 assert!(
1674 result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
1675 "expected SSRF gate match, got {result:?}"
1676 );
1677 }
1678
1679 #[test]
1680 fn classify_go_http_default_client_post_is_ssrf_and_data_exfil_gate() {
1681 let no_kw = |_: &str| None;
1682 let no_kw_present = |_: &str| false;
1683 let result = classify_gated_sink(
1684 "go",
1685 "http.DefaultClient.Post",
1686 |_| None,
1687 no_kw,
1688 no_kw_present,
1689 );
1690 assert!(
1691 result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
1692 "expected SSRF gate match, got {result:?}"
1693 );
1694 assert!(
1695 result
1696 .iter()
1697 .any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
1698 "expected DATA_EXFIL gate match, got {result:?}"
1699 );
1700 }
1701
1702 #[test]
1703 fn classify_go_http_default_client_do_is_data_exfil_gate() {
1704 let no_kw = |_: &str| None;
1705 let no_kw_present = |_: &str| false;
1706 let result = classify_gated_sink(
1707 "go",
1708 "http.DefaultClient.Do",
1709 |_| None,
1710 no_kw,
1711 no_kw_present,
1712 );
1713 assert!(
1714 result
1715 .iter()
1716 .any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
1717 "expected DATA_EXFIL gate match, got {result:?}"
1718 );
1719 }
1720
1721 #[test]
1722 fn classify_go_user_client_get_is_not_ssrf_sink() {
1723 let result = classify("go", "client.Get", None);
1728 assert_eq!(result, None);
1729 }
1730
1731 #[test]
1738 fn classify_ruby_bare_open_is_shell_escape_sink() {
1739 let result = classify("ruby", "open", None);
1740 assert_eq!(result, Some(DataLabel::Sink(Cap::SHELL_ESCAPE)));
1741 }
1742
1743 #[test]
1744 fn classify_ruby_file_open_is_not_shell_escape_sink() {
1745 let result = classify_all("ruby", "File.open", None);
1749 assert!(result.contains(&DataLabel::Sink(Cap::FILE_IO)));
1751 assert!(!result.contains(&DataLabel::Sink(Cap::SHELL_ESCAPE)));
1753 }
1754
1755 #[test]
1756 fn classify_ruby_io_open_is_not_shell_escape_sink() {
1757 let result = classify("ruby", "IO.open", None);
1760 assert_ne!(result, Some(DataLabel::Sink(Cap::SHELL_ESCAPE)));
1761 }
1762
1763 #[test]
1764 fn classify_ruby_uri_open_remains_ssrf_sink() {
1765 let result = classify("ruby", "URI.open", None);
1768 assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
1769 }
1770
1771 #[test]
1772 fn classify_ruby_openuri_open_uri_is_ssrf_sink() {
1773 let result = classify("ruby", "OpenURI.open_uri", None);
1778 assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
1779 }
1780
1781 #[test]
1782 fn unpack_matcher_strips_exact_sigil() {
1783 let (m, exact) = unpack_matcher(b"=open");
1784 assert_eq!(m, b"open");
1785 assert!(exact);
1786
1787 let (m, exact) = unpack_matcher(b"open");
1788 assert_eq!(m, b"open");
1789 assert!(!exact);
1790 }
1791
1792 #[test]
1793 fn classify_case_sensitive_suffix_boundary() {
1794 let extras = vec![RuntimeLabelRule {
1795 matchers: vec!["RunQuery".into()],
1796 label: DataLabel::Sink(Cap::SQL_QUERY),
1797 case_sensitive: true,
1798 }];
1799 let result = classify("javascript", "db.RunQuery", Some(&extras));
1801 assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
1802 let result = classify("javascript", "db.runquery", Some(&extras));
1804 assert_eq!(result, None);
1805 }
1806
1807 #[test]
1808 fn classify_cpp_sto_family_is_sanitizer() {
1809 for callee in [
1813 "std::stoi",
1814 "std::stol",
1815 "std::stoll",
1816 "std::stoul",
1817 "std::stoull",
1818 "std::stof",
1819 "std::stod",
1820 "std::stold",
1821 ] {
1822 assert_eq!(
1823 classify("cpp", callee, None),
1824 Some(DataLabel::Sanitizer(Cap::all())),
1825 "{callee} should be a Cap::all() sanitizer",
1826 );
1827 }
1828 }
1829
1830 #[test]
1831 fn parse_cap_works() {
1832 assert_eq!(parse_cap("html_escape"), Some(Cap::HTML_ESCAPE));
1833 assert_eq!(parse_cap("shell_escape"), Some(Cap::SHELL_ESCAPE));
1834 assert_eq!(parse_cap("url_encode"), Some(Cap::URL_ENCODE));
1835 assert_eq!(parse_cap("json_parse"), Some(Cap::JSON_PARSE));
1836 assert_eq!(parse_cap("env_var"), Some(Cap::ENV_VAR));
1837 assert_eq!(parse_cap("file_io"), Some(Cap::FILE_IO));
1838 assert_eq!(parse_cap("all"), Some(Cap::all()));
1839 assert_eq!(parse_cap("ALL"), Some(Cap::all()));
1840 assert_eq!(parse_cap("sql_query"), Some(Cap::SQL_QUERY));
1841 assert_eq!(parse_cap("deserialize"), Some(Cap::DESERIALIZE));
1842 assert_eq!(parse_cap("ssrf"), Some(Cap::SSRF));
1843 assert_eq!(parse_cap("code_exec"), Some(Cap::CODE_EXEC));
1844 assert_eq!(parse_cap("crypto"), Some(Cap::CRYPTO));
1845 assert_eq!(parse_cap("invalid"), None);
1846 }
1847
1848 fn no_kw(_: &str) -> Option<String> {
1850 None
1851 }
1852
1853 fn no_kw_present(_: &str) -> bool {
1855 false
1856 }
1857
1858 fn find_match_with_caps(matches: &[GateMatch], caps: Cap) -> Option<GateMatch> {
1862 matches
1863 .iter()
1864 .find(|m| matches!(m.label, DataLabel::Sink(c) if c.intersects(caps)))
1865 .copied()
1866 }
1867
1868 #[test]
1869 fn gated_sink_dangerous_exact() {
1870 let result = classify_gated_sink(
1871 "javascript",
1872 "setAttribute",
1873 |_| Some("href".to_string()),
1874 no_kw,
1875 no_kw_present,
1876 );
1877 assert_eq!(
1878 result.as_slice(),
1879 &[GateMatch {
1880 label: DataLabel::Sink(Cap::HTML_ESCAPE),
1881 payload_args: [1usize].as_slice(),
1882 object_destination_fields: &[],
1883 }]
1884 );
1885 }
1886
1887 #[test]
1888 fn gated_sink_dangerous_prefix() {
1889 let result = classify_gated_sink(
1890 "javascript",
1891 "setAttribute",
1892 |_| Some("onclick".to_string()),
1893 no_kw,
1894 no_kw_present,
1895 );
1896 assert_eq!(
1897 result.as_slice(),
1898 &[GateMatch {
1899 label: DataLabel::Sink(Cap::HTML_ESCAPE),
1900 payload_args: [1usize].as_slice(),
1901 object_destination_fields: &[],
1902 }]
1903 );
1904 }
1905
1906 #[test]
1907 fn gated_sink_safe_suppressed() {
1908 let result = classify_gated_sink(
1909 "javascript",
1910 "setAttribute",
1911 |_| Some("class".to_string()),
1912 no_kw,
1913 no_kw_present,
1914 );
1915 assert!(result.is_empty());
1916 }
1917
1918 #[test]
1919 fn gated_sink_dynamic_conservative() {
1920 let result =
1925 classify_gated_sink("javascript", "setAttribute", |_| None, no_kw, no_kw_present);
1926 assert_eq!(
1927 result.as_slice(),
1928 &[GateMatch {
1929 label: DataLabel::Sink(Cap::HTML_ESCAPE),
1930 payload_args: ALL_ARGS_PAYLOAD,
1931 object_destination_fields: &[],
1932 }]
1933 );
1934 }
1935
1936 #[test]
1937 fn gated_sink_no_match() {
1938 let result = classify_gated_sink(
1939 "rust",
1940 "setAttribute",
1941 |_| Some("href".to_string()),
1942 no_kw,
1943 no_kw_present,
1944 );
1945 assert!(result.is_empty());
1946 }
1947
1948 #[test]
1949 fn gated_sink_returns_payload_args() {
1950 let result = classify_gated_sink(
1952 "javascript",
1953 "setAttribute",
1954 |_| Some("href".to_string()),
1955 no_kw,
1956 no_kw_present,
1957 );
1958 assert_eq!(result[0].payload_args, &[1]);
1959
1960 let result = classify_gated_sink(
1962 "javascript",
1963 "parseFromString",
1964 |idx| {
1965 if idx == 1 {
1966 Some("text/html".to_string())
1967 } else {
1968 None
1969 }
1970 },
1971 no_kw,
1972 no_kw_present,
1973 );
1974 assert_eq!(result[0].payload_args, &[0]);
1975 }
1976
1977 #[test]
1978 fn gated_sink_parse_from_string_safe_mime() {
1979 let result = classify_gated_sink(
1980 "javascript",
1981 "parseFromString",
1982 |idx| {
1983 if idx == 1 {
1984 Some("text/xml".to_string())
1985 } else {
1986 None
1987 }
1988 },
1989 no_kw,
1990 no_kw_present,
1991 );
1992 assert!(result.is_empty());
1993 }
1994
1995 #[test]
1996 fn gated_sink_python_popen_shell_true() {
1997 let result = classify_gated_sink(
1998 "python",
1999 "Popen",
2000 |_| None,
2001 |kw| {
2002 if kw == "shell" {
2003 Some("True".to_string())
2004 } else {
2005 None
2006 }
2007 },
2008 |kw| kw == "shell",
2009 );
2010 assert_eq!(
2011 result.as_slice(),
2012 &[GateMatch {
2013 label: DataLabel::Sink(Cap::SHELL_ESCAPE),
2014 payload_args: [0usize].as_slice(),
2015 object_destination_fields: &[],
2016 }]
2017 );
2018 }
2019
2020 #[test]
2021 fn gated_sink_python_popen_shell_false() {
2022 let result = classify_gated_sink(
2023 "python",
2024 "Popen",
2025 |_| None,
2026 |kw| {
2027 if kw == "shell" {
2028 Some("False".to_string())
2029 } else {
2030 None
2031 }
2032 },
2033 |kw| kw == "shell",
2034 );
2035 assert!(result.is_empty());
2036 }
2037
2038 #[test]
2039 fn gated_sink_python_popen_no_shell_conservative() {
2040 let result = classify_gated_sink("python", "Popen", |_| None, |_| None, no_kw_present);
2043 assert_eq!(
2044 result.as_slice(),
2045 &[GateMatch {
2046 label: DataLabel::Sink(Cap::SHELL_ESCAPE),
2047 payload_args: ALL_ARGS_PAYLOAD,
2048 object_destination_fields: &[],
2049 }]
2050 );
2051 }
2052
2053 #[test]
2057 fn gated_sink_subprocess_run_shell_true() {
2058 let result = classify_gated_sink(
2059 "python",
2060 "subprocess.run",
2061 |_| None,
2062 |kw| {
2063 if kw == "shell" {
2064 Some("True".to_string())
2065 } else {
2066 None
2067 }
2068 },
2069 |kw| kw == "shell",
2070 );
2071 assert_eq!(
2072 result.as_slice(),
2073 &[GateMatch {
2074 label: DataLabel::Sink(Cap::SHELL_ESCAPE),
2075 payload_args: [0usize].as_slice(),
2076 object_destination_fields: &[],
2077 }]
2078 );
2079 }
2080
2081 #[test]
2083 fn gated_sink_subprocess_run_shell_false() {
2084 let result = classify_gated_sink(
2085 "python",
2086 "subprocess.run",
2087 |_| None,
2088 |kw| {
2089 if kw == "shell" {
2090 Some("False".to_string())
2091 } else {
2092 None
2093 }
2094 },
2095 |kw| kw == "shell",
2096 );
2097 assert!(result.is_empty());
2098 }
2099
2100 #[test]
2103 fn gated_sink_subprocess_run_shell_absent_suppresses() {
2104 let result = classify_gated_sink(
2105 "python",
2106 "subprocess.run",
2107 |_| None,
2108 |_| None,
2109 no_kw_present,
2110 );
2111 assert!(result.is_empty());
2112 }
2113
2114 #[test]
2118 fn gated_sink_subprocess_run_shell_dynamic_conservative() {
2119 let result = classify_gated_sink(
2120 "python",
2121 "subprocess.run",
2122 |_| None,
2123 |_| None, |kw| kw == "shell",
2125 );
2126 assert_eq!(
2127 result.as_slice(),
2128 &[GateMatch {
2129 label: DataLabel::Sink(Cap::SHELL_ESCAPE),
2130 payload_args: ALL_ARGS_PAYLOAD,
2131 object_destination_fields: &[],
2132 }]
2133 );
2134 }
2135
2136 #[test]
2139 fn gated_sink_destination_positional_always_fires() {
2140 let result = classify_gated_sink(
2145 "javascript",
2146 "fetch",
2147 |_| None, no_kw,
2149 no_kw_present,
2150 );
2151 let m = find_match_with_caps(&result, Cap::SSRF).expect("fetch SSRF gate should fire");
2152 assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
2153 assert_eq!(m.payload_args, &[0]);
2154 assert_eq!(m.object_destination_fields, &["url"]);
2155 }
2156
2157 #[test]
2160 fn gated_sink_destination_object_fields_surfaced() {
2161 let result =
2163 classify_gated_sink("javascript", "http.request", |_| None, no_kw, no_kw_present);
2164 let m = result
2165 .first()
2166 .copied()
2167 .expect("http.request gate should fire");
2168 assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
2169 assert_eq!(m.payload_args, &[0]);
2170 assert!(
2171 m.object_destination_fields
2172 .iter()
2173 .any(|&f| f == "host" || f == "hostname"),
2174 "expected host/hostname in destination fields, got {:?}",
2175 m.object_destination_fields,
2176 );
2177 }
2178
2179 #[test]
2183 fn gated_sink_fetch_emits_ssrf_and_data_exfil() {
2184 let result = classify_gated_sink("javascript", "fetch", |_| None, no_kw, no_kw_present);
2185 let ssrf = find_match_with_caps(&result, Cap::SSRF).expect("SSRF gate fires");
2186 assert_eq!(ssrf.label, DataLabel::Sink(Cap::SSRF));
2187 assert_eq!(ssrf.payload_args, &[0]);
2188 assert_eq!(ssrf.object_destination_fields, &["url"]);
2189
2190 let exfil = find_match_with_caps(&result, Cap::DATA_EXFIL).expect("DATA_EXFIL gate fires");
2191 assert_eq!(exfil.label, DataLabel::Sink(Cap::DATA_EXFIL));
2192 assert_eq!(exfil.payload_args, &[1]);
2193 assert!(
2194 exfil.object_destination_fields.contains(&"body"),
2195 "expected body in DATA_EXFIL destination fields, got {:?}",
2196 exfil.object_destination_fields,
2197 );
2198 }
2199
2200 #[test]
2201 fn classify_all_single_label() {
2202 let result = classify_all("javascript", "innerHTML", None);
2203 assert_eq!(result.len(), 1);
2204 assert_eq!(result[0], DataLabel::Sink(Cap::HTML_ESCAPE));
2205 }
2206
2207 #[test]
2208 fn classify_all_dual_label_php() {
2209 let result = classify_all("php", "file_get_contents", None);
2210 assert!(result.len() >= 2, "expected dual label, got {:?}", result);
2211 assert!(
2212 result.contains(&DataLabel::Source(Cap::all())),
2213 "expected Source(all), got {:?}",
2214 result
2215 );
2216 assert!(
2217 result.contains(&DataLabel::Sink(Cap::SSRF)),
2218 "expected Sink(SSRF), got {:?}",
2219 result
2220 );
2221 }
2222
2223 #[test]
2224 fn classify_all_dual_label_java() {
2225 let result = classify_all("java", "readObject", None);
2226 assert!(result.len() >= 2, "expected dual label, got {:?}", result);
2227 assert!(
2228 result.contains(&DataLabel::Source(Cap::all())),
2229 "expected Source(all), got {:?}",
2230 result
2231 );
2232 assert!(
2233 result.contains(&DataLabel::Sink(Cap::DESERIALIZE)),
2234 "expected Sink(DESERIALIZE), got {:?}",
2235 result
2236 );
2237 }
2238
2239 #[test]
2240 fn classify_go_echo_sinks_with_runtime_rules() {
2241 use crate::utils::project::{DetectedFramework, FrameworkContext};
2242
2243 let ctx = FrameworkContext {
2244 frameworks: vec![DetectedFramework::Echo],
2245 inspected_langs: std::collections::HashSet::new(),
2246 };
2247 let rules = go::framework_rules(&ctx);
2248 let extras = rules.to_vec();
2249
2250 assert_eq!(
2251 classify("go", "c.String", Some(&extras)),
2252 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2253 );
2254 assert_eq!(
2255 classify("go", "c.HTML", Some(&extras)),
2256 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2257 );
2258 assert_eq!(
2259 classify("go", "c.JSON", Some(&extras)),
2260 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2261 );
2262
2263 let empty = go::framework_rules(&FrameworkContext::default());
2265 assert_eq!(classify("go", "c.String", Some(&empty)), None);
2266 }
2267
2268 #[test]
2269 fn classify_javascript_koa_runtime_rules() {
2270 use crate::utils::project::{DetectedFramework, FrameworkContext};
2271
2272 let ctx = FrameworkContext {
2273 frameworks: vec![DetectedFramework::Koa],
2274 inspected_langs: std::collections::HashSet::new(),
2275 };
2276 let extras = javascript::framework_rules(&ctx);
2277
2278 assert_eq!(
2279 classify("javascript", "ctx.query", Some(&extras)),
2280 Some(DataLabel::Source(Cap::all())),
2281 );
2282 assert_eq!(
2283 classify("javascript", "ctx.cookies.get", Some(&extras)),
2284 Some(DataLabel::Source(Cap::all())),
2285 );
2286 assert_eq!(
2287 classify("javascript", "ctx.body", Some(&extras)),
2288 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2289 );
2290 assert_eq!(
2291 classify("javascript", "ctx.redirect", Some(&extras)),
2292 Some(DataLabel::Sink(Cap::SSRF)),
2293 );
2294
2295 let empty = javascript::framework_rules(&FrameworkContext::default());
2296 assert_eq!(classify("javascript", "ctx.query", Some(&empty)), None);
2297 }
2298
2299 #[test]
2300 fn classify_typescript_fastify_runtime_rules() {
2301 use crate::utils::project::{DetectedFramework, FrameworkContext};
2302
2303 let ctx = FrameworkContext {
2304 frameworks: vec![DetectedFramework::Fastify],
2305 inspected_langs: std::collections::HashSet::new(),
2306 };
2307 let extras = typescript::framework_rules(&ctx);
2308
2309 assert_eq!(
2310 classify("typescript", "request.query", Some(&extras)),
2311 Some(DataLabel::Source(Cap::all())),
2312 );
2313 assert_eq!(
2314 classify("typescript", "reply.send", Some(&extras)),
2315 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2316 );
2317 assert_eq!(
2318 classify("typescript", "reply.redirect", Some(&extras)),
2319 Some(DataLabel::Sink(Cap::SSRF)),
2320 );
2321
2322 let empty = typescript::framework_rules(&FrameworkContext::default());
2323 assert_eq!(classify("typescript", "request.query", Some(&empty)), None);
2324 }
2325
2326 #[test]
2327 fn classify_ruby_sinatra_template_sinks() {
2328 use crate::utils::project::{DetectedFramework, FrameworkContext};
2329
2330 let ctx = FrameworkContext {
2331 frameworks: vec![DetectedFramework::Sinatra],
2332 inspected_langs: std::collections::HashSet::new(),
2333 };
2334 let rules = ruby::framework_rules(&ctx);
2335 let extras = rules.to_vec();
2336
2337 assert_eq!(
2338 classify("ruby", "erb", Some(&extras)),
2339 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2340 );
2341 assert_eq!(
2342 classify("ruby", "haml", Some(&extras)),
2343 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2344 );
2345
2346 let empty = ruby::framework_rules(&FrameworkContext::default());
2348 assert_eq!(classify("ruby", "erb", Some(&empty)), None);
2349 }
2350
2351 #[test]
2352 fn classify_rust_axum_runtime_rules() {
2353 use crate::utils::project::{DetectedFramework, FrameworkContext};
2354
2355 let ctx = FrameworkContext {
2356 frameworks: vec![DetectedFramework::Axum],
2357 inspected_langs: std::collections::HashSet::new(),
2358 };
2359 let extras = rust::framework_rules(&ctx);
2360
2361 assert_eq!(
2362 classify("rust", "Path<String>", Some(&extras)),
2363 Some(DataLabel::Source(Cap::all())),
2364 );
2365 assert_eq!(
2366 classify("rust", "HeaderMap.get(\"x-user\")", Some(&extras)),
2367 Some(DataLabel::Source(Cap::all())),
2368 );
2369 assert_eq!(
2370 classify("rust", "Html(name)", Some(&extras)),
2371 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2372 );
2373 assert_eq!(
2374 classify("rust", "Redirect::to(next)", Some(&extras)),
2375 Some(DataLabel::Sink(Cap::SSRF)),
2376 );
2377
2378 let empty = rust::framework_rules(&FrameworkContext::default());
2379 assert_eq!(classify("rust", "Html(name)", Some(&empty)), None);
2380 }
2381
2382 #[test]
2383 fn classify_rust_actix_runtime_rules() {
2384 use crate::utils::project::{DetectedFramework, FrameworkContext};
2385
2386 let ctx = FrameworkContext {
2387 frameworks: vec![DetectedFramework::ActixWeb],
2388 inspected_langs: std::collections::HashSet::new(),
2389 };
2390 let extras = rust::framework_rules(&ctx);
2391
2392 assert_eq!(
2393 classify("rust", "web::Json<String>", Some(&extras)),
2394 Some(DataLabel::Source(Cap::all())),
2395 );
2396 assert_eq!(
2397 classify("rust", "HttpRequest.match_info()", Some(&extras)),
2398 Some(DataLabel::Source(Cap::all())),
2399 );
2400 assert_eq!(
2401 classify("rust", "HttpResponse.body(payload)", Some(&extras)),
2402 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2403 );
2404 }
2405
2406 #[test]
2407 fn classify_rust_rocket_runtime_rules() {
2408 use crate::utils::project::{DetectedFramework, FrameworkContext};
2409
2410 let ctx = FrameworkContext {
2411 frameworks: vec![DetectedFramework::Rocket],
2412 inspected_langs: std::collections::HashSet::new(),
2413 };
2414 let extras = rust::framework_rules(&ctx);
2415
2416 assert_eq!(
2417 classify("rust", "CookieJar.get_private(\"sid\")", Some(&extras)),
2418 Some(DataLabel::Source(Cap::all())),
2419 );
2420 assert_eq!(
2421 classify("rust", "content::RawHtml(name)", Some(&extras)),
2422 Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
2423 );
2424 assert_eq!(
2425 classify("rust", "Redirect::to(next)", Some(&extras)),
2426 Some(DataLabel::Sink(Cap::SSRF)),
2427 );
2428 }
2429}