use fallow_config::EffectKind;
use fallow_types::extract::{SinkArgKind, SinkLiteralValue, SinkObjectProperty, SinkShape};
use rustc_hash::FxHashSet;
pub const HARDCODED_SECRET_CATEGORY_ID: &str = "hardcoded-secret";
pub const HARDCODED_SECRET_CATEGORY_TITLE: &str = "Hardcoded secret candidate";
const CATALOGUE_TOML: &str = include_str!("../data/security_matchers.toml");
#[derive(serde::Deserialize)]
struct RawCatalogue {
#[serde(default)]
matcher: Vec<RawMatcher>,
#[serde(default)]
source: Vec<RawSource>,
}
#[derive(serde::Deserialize)]
struct RawSource {
id: String,
title: String,
#[serde(default)]
enabler: Option<String>,
path_patterns: Vec<String>,
#[serde(default)]
receiver_allowlist: Vec<String>,
}
#[derive(serde::Deserialize)]
struct RawMatcher {
id: String,
cwe: u32,
title: String,
effect: EffectKind,
sink_shape: String,
callee_patterns: Vec<String>,
arg_index: u32,
evidence_template: String,
#[serde(default)]
import_provenance: Option<String>,
#[serde(default)]
enabler: Option<String>,
#[serde(default)]
arg_kinds: Option<Vec<String>>,
#[serde(default)]
literal_values: Option<Vec<String>>,
#[serde(default)]
literal_contains: Option<Vec<String>>,
#[serde(default)]
literal_integers: Option<Vec<i64>>,
#[serde(default)]
object_properties: Option<Vec<RawObjectPropertyPredicate>>,
#[serde(default)]
object_missing_or_false: Option<Vec<String>>,
#[serde(default)]
object_missing: Option<Vec<String>>,
#[serde(default)]
context_keywords: Option<Vec<String>>,
#[serde(default)]
requires_source: bool,
#[serde(default)]
requires_source_kinds: Vec<String>,
}
#[derive(Debug, serde::Deserialize)]
struct RawObjectPropertyPredicate {
key: String,
#[serde(default)]
string: Option<String>,
#[serde(default)]
boolean: Option<bool>,
#[serde(default)]
integer: Option<i64>,
#[serde(default)]
null: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LiteralPredicate {
String(String),
Integer(i64),
Boolean(bool),
Null,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ObjectPropertyPredicate {
key: String,
value: LiteralPredicate,
}
#[derive(Debug, Clone)]
pub struct CalleePattern {
raw: String,
suffix_segments: Vec<String>,
leading_wildcard: bool,
trailing_wildcard: bool,
}
impl CalleePattern {
#[must_use]
pub fn parse(raw: &str) -> Option<Self> {
parse_callee_pattern(raw)
}
#[must_use]
pub fn raw(&self) -> &str {
&self.raw
}
#[must_use]
pub fn matches(&self, callee_path: &str) -> bool {
if self.suffix_segments.is_empty() || (self.leading_wildcard && self.trailing_wildcard) {
return false;
}
let candidate: Vec<&str> = callee_path.split('.').collect();
if self.leading_wildcard {
if self.suffix_segments.len() >= candidate.len() {
return false;
}
let tail = &candidate[candidate.len() - self.suffix_segments.len()..];
self.suffix_segments
.iter()
.zip(tail)
.all(|(pat, seg)| pat == seg)
} else if self.trailing_wildcard {
if self.suffix_segments.len() >= candidate.len() {
return false;
}
let head = &candidate[..self.suffix_segments.len()];
self.suffix_segments
.iter()
.zip(head)
.all(|(pat, seg)| pat == seg)
} else {
self.suffix_segments.len() == candidate.len()
&& self
.suffix_segments
.iter()
.zip(&candidate)
.all(|(pat, seg)| pat == seg)
}
}
#[must_use]
pub fn matched_receiver<'p>(&self, callee_path: &'p str) -> Option<&'p str> {
if !self.leading_wildcard || !self.matches(callee_path) {
return None;
}
let candidate: Vec<&str> = callee_path.split('.').collect();
let recv_idx = candidate.len() - self.suffix_segments.len() - 1;
candidate.get(recv_idx).copied()
}
}
fn parse_callee_pattern(raw: &str) -> Option<CalleePattern> {
if raw.trim().is_empty() {
return None;
}
let mut segments: Vec<&str> = raw.split('.').collect();
let leading_wildcard = segments.first() == Some(&"*");
if leading_wildcard {
segments.remove(0);
}
let trailing_wildcard = segments.last() == Some(&"*");
if trailing_wildcard {
segments.pop();
}
Some(CalleePattern {
raw: raw.to_string(),
suffix_segments: segments.into_iter().map(str::to_string).collect(),
leading_wildcard,
trailing_wildcard,
})
}
#[derive(Debug, Clone)]
pub struct Matcher {
pub id: String,
pub cwe: u32,
pub title: String,
pub effect: EffectKind,
pub sink_shape: SinkShape,
pub callee_patterns: Vec<CalleePattern>,
pub arg_index: u32,
pub evidence_template: String,
pub import_provenance: Option<String>,
pub enabler: Option<String>,
pub arg_kinds: Option<Vec<SinkArgKind>>,
pub requires_source: bool,
pub requires_source_kinds: Vec<String>,
pub literal_values: Vec<String>,
pub literal_contains: Vec<String>,
pub literal_integers: Vec<i64>,
pub object_properties: Vec<ObjectPropertyPredicate>,
pub object_missing_or_false: Vec<String>,
pub object_missing: Vec<String>,
pub context_keywords: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct SourceMatcher {
pub id: String,
pub title: String,
pub enabler: Option<String>,
pub path_patterns: Vec<CalleePattern>,
pub receiver_allowlist: Vec<String>,
}
impl SourceMatcher {
#[cfg(test)]
#[must_use]
pub fn matches(&self, source_path: &str) -> bool {
let extra_receivers = FxHashSet::default();
self.matches_with_extra_receivers(source_path, &extra_receivers)
}
#[must_use]
pub fn matches_with_extra_receivers(
&self,
source_path: &str,
extra_receivers: &FxHashSet<String>,
) -> bool {
self.path_patterns.iter().any(|p| {
p.matches(source_path) && self.receiver_allowed(p, source_path, extra_receivers)
})
}
fn receiver_allowed(
&self,
pattern: &CalleePattern,
source_path: &str,
extra_receivers: &FxHashSet<String>,
) -> bool {
if self.receiver_allowlist.is_empty() {
return true;
}
match pattern.matched_receiver(source_path) {
Some(receiver) => {
self.receiver_allowlist
.iter()
.any(|allowed| allowed.eq_ignore_ascii_case(receiver))
|| extra_receivers.contains(&receiver.to_ascii_lowercase())
}
None => true,
}
}
#[must_use]
pub fn enabler_satisfied(&self, declared_deps: &rustc_hash::FxHashSet<String>) -> bool {
enabler_satisfied(self.enabler.as_deref(), declared_deps)
}
}
#[derive(Debug)]
pub struct Catalogue {
matchers: Vec<Matcher>,
sources: Vec<SourceMatcher>,
}
impl Matcher {
#[must_use]
pub fn first_matching_pattern(&self, callee_path: &str) -> Option<&CalleePattern> {
self.callee_patterns.iter().find(|p| p.matches(callee_path))
}
#[must_use]
pub fn admits_arg_kind(&self, arg_kind: SinkArgKind) -> bool {
self.arg_kinds
.as_ref()
.is_none_or(|kinds| kinds.contains(&arg_kind))
}
#[must_use]
pub fn is_literal_aware(&self) -> bool {
!self.literal_values.is_empty()
|| !self.literal_contains.is_empty()
|| !self.literal_integers.is_empty()
|| !self.object_properties.is_empty()
|| !self.object_missing_or_false.is_empty()
|| !self.object_missing.is_empty()
|| !self.context_keywords.is_empty()
|| self.arg_kinds.as_ref().is_some_and(|kinds| {
kinds
.iter()
.any(|kind| matches!(kind, SinkArgKind::Literal | SinkArgKind::NoArg))
})
}
#[must_use]
pub fn literal_value_satisfied(&self, literal: Option<&SinkLiteralValue>) -> bool {
if self.literal_values.is_empty()
&& self.literal_contains.is_empty()
&& self.literal_integers.is_empty()
{
return true;
}
let string_satisfied = (self.literal_values.is_empty() && self.literal_contains.is_empty())
|| match literal {
Some(SinkLiteralValue::String(value)) => {
let lower = value.to_ascii_lowercase();
(self.literal_values.is_empty()
|| self
.literal_values
.iter()
.any(|expected| lower == expected.to_ascii_lowercase()))
&& (self.literal_contains.is_empty()
|| self
.literal_contains
.iter()
.any(|needle| lower.contains(&needle.to_ascii_lowercase())))
}
_ => false,
};
let integer_satisfied = self.literal_integers.is_empty()
|| match literal {
Some(SinkLiteralValue::Integer(value)) => self.literal_integers.contains(value),
_ => false,
};
string_satisfied && integer_satisfied
}
#[must_use]
pub fn object_properties_satisfied(&self, properties: &[SinkObjectProperty]) -> bool {
if self.object_properties.is_empty() && self.object_missing_or_false.is_empty() {
return true;
}
for predicate in &self.object_properties {
let Some(property) = properties.iter().find(|p| p.key == predicate.key) else {
return false;
};
if !predicate.value.matches(&property.value) {
return false;
}
}
if self.object_missing_or_false.is_empty() {
return true;
}
self.object_missing_or_false.iter().any(|key| {
properties
.iter()
.find(|p| p.key == *key)
.is_none_or(|property| matches!(property.value, SinkLiteralValue::Boolean(false)))
})
}
#[must_use]
pub fn object_missing_satisfied(&self, keys: &[String], keys_complete: bool) -> bool {
if self.object_missing.is_empty() {
return true;
}
keys_complete && self.object_missing.iter().any(|key| !keys.contains(key))
}
#[must_use]
pub fn context_satisfied(&self, context_names: &[String]) -> bool {
if self.context_keywords.is_empty() {
return true;
}
context_names.iter().any(|name| {
let lower = name.to_ascii_lowercase();
self.context_keywords
.iter()
.any(|keyword| lower.contains(&keyword.to_ascii_lowercase()))
})
}
#[must_use]
pub fn enabler_satisfied(&self, declared_deps: &rustc_hash::FxHashSet<String>) -> bool {
enabler_satisfied(self.enabler.as_deref(), declared_deps)
}
}
fn enabler_satisfied(enabler: Option<&str>, declared_deps: &rustc_hash::FxHashSet<String>) -> bool {
let Some(enabler) = enabler else {
return true;
};
if let Some(prefix) = enabler.strip_suffix('/') {
declared_deps
.iter()
.any(|d| d == prefix || d.starts_with(enabler))
} else {
declared_deps.contains(enabler)
}
}
impl LiteralPredicate {
fn matches(&self, value: &SinkLiteralValue) -> bool {
match (self, value) {
(Self::String(expected), SinkLiteralValue::String(actual)) => {
expected.eq_ignore_ascii_case(actual)
}
(Self::Integer(expected), SinkLiteralValue::Integer(actual)) => expected == actual,
(Self::Boolean(expected), SinkLiteralValue::Boolean(actual)) => expected == actual,
(Self::Null, SinkLiteralValue::Null) => true,
_ => false,
}
}
}
impl Catalogue {
#[must_use]
pub fn matchers(&self) -> &[Matcher] {
&self.matchers
}
#[cfg(test)]
#[must_use]
pub fn sources(&self) -> &[SourceMatcher] {
&self.sources
}
#[cfg(test)]
#[must_use]
pub fn matching_source(&self, source_path: &str) -> Option<(&str, &str)> {
let request_receivers = FxHashSet::default();
self.sources
.iter()
.find(|s| s.matches_with_extra_receivers(source_path, &request_receivers))
.map(|s| (s.id.as_str(), s.title.as_str()))
}
#[cfg(test)]
#[must_use]
pub fn matching_source_for_deps(
&self,
source_path: &str,
declared_deps: &FxHashSet<String>,
) -> Option<(&str, &str)> {
let request_receivers = FxHashSet::default();
self.matching_source_for_deps_with_receivers(source_path, declared_deps, &request_receivers)
}
#[must_use]
pub fn matching_source_for_deps_with_receivers(
&self,
source_path: &str,
declared_deps: &FxHashSet<String>,
request_receivers: &FxHashSet<String>,
) -> Option<(&str, &str)> {
let empty_receivers = FxHashSet::default();
self.sources
.iter()
.find(|s| {
let extra_receivers = if s.id == "http-request-input" {
request_receivers
} else {
&empty_receivers
};
s.enabler_satisfied(declared_deps)
&& s.matches_with_extra_receivers(source_path, extra_receivers)
})
.map(|s| (s.id.as_str(), s.title.as_str()))
}
#[cfg(test)]
#[must_use]
pub fn is_source_path(&self, source_path: &str) -> bool {
self.matching_source(source_path).is_some()
}
#[must_use]
pub fn title_for(&self, id: &str) -> Option<&str> {
self.matchers
.iter()
.find(|m| m.id == id)
.map(|m| m.title.as_str())
}
}
#[must_use]
pub fn catalogue_title(id: &str) -> Option<&'static str> {
catalogue().title_for(id)
}
fn parse_sink_shape(s: &str) -> Option<SinkShape> {
match s {
"call" => Some(SinkShape::Call),
"member-call" => Some(SinkShape::MemberCall),
"member-assign" => Some(SinkShape::MemberAssign),
"tagged-template" => Some(SinkShape::TaggedTemplate),
"jsx-attr" => Some(SinkShape::JsxAttr),
"new-expression" => Some(SinkShape::NewExpression),
_ => None,
}
}
fn parse_arg_kind(s: &str) -> Option<SinkArgKind> {
match s {
"template-with-subst" => Some(SinkArgKind::TemplateWithSubst),
"concat" => Some(SinkArgKind::Concat),
"object" => Some(SinkArgKind::Object),
"call" => Some(SinkArgKind::Call),
"literal" => Some(SinkArgKind::Literal),
"no-arg" => Some(SinkArgKind::NoArg),
"other" => Some(SinkArgKind::Other),
_ => None,
}
}
fn parse_object_property_predicates(
id: &str,
raw: Option<Vec<RawObjectPropertyPredicate>>,
) -> Result<Vec<ObjectPropertyPredicate>, String> {
let Some(raw_predicates) = raw else {
return Ok(Vec::new());
};
let mut predicates = Vec::with_capacity(raw_predicates.len());
for predicate in raw_predicates {
if predicate.key.trim().is_empty() {
return Err(format!(
"matcher {id:?} has an object_properties predicate with an empty key"
));
}
let value_count = usize::from(predicate.string.is_some())
+ usize::from(predicate.boolean.is_some())
+ usize::from(predicate.integer.is_some())
+ usize::from(predicate.null);
if value_count != 1 {
return Err(format!(
"matcher {id:?} object_properties predicate for {:?} must set exactly one of string | boolean | integer | null",
predicate.key
));
}
let value = if let Some(string) = predicate.string {
LiteralPredicate::String(string)
} else if let Some(boolean) = predicate.boolean {
LiteralPredicate::Boolean(boolean)
} else if let Some(integer) = predicate.integer {
LiteralPredicate::Integer(integer)
} else {
LiteralPredicate::Null
};
predicates.push(ObjectPropertyPredicate {
key: predicate.key,
value,
});
}
Ok(predicates)
}
fn parse_catalogue(src: &str) -> Result<Catalogue, String> {
let raw: RawCatalogue =
toml::from_str(src).map_err(|e| format!("security_matchers.toml parse error: {e}"))?;
let mut matchers = Vec::with_capacity(raw.matcher.len());
for entry in raw.matcher {
matchers.push(parse_matcher_entry(entry)?);
}
if matchers.is_empty() {
return Err("security_matchers.toml has no [[matcher]] entries".to_string());
}
let sources = parse_source_catalogue(raw.source)?;
Ok(Catalogue { matchers, sources })
}
fn parse_matcher_entry(entry: RawMatcher) -> Result<Matcher, String> {
let (sink_shape, callee_patterns) = validate_matcher_core(&entry)?;
let arg_kinds = parse_matcher_arg_kinds(&entry.id, entry.arg_kinds.as_deref())?;
let enabler = validate_matcher_enabler(&entry.id, entry.enabler)?;
let object_properties = parse_object_property_predicates(&entry.id, entry.object_properties)?;
Ok(Matcher {
id: entry.id,
cwe: entry.cwe,
title: entry.title,
effect: entry.effect,
sink_shape,
callee_patterns,
arg_index: entry.arg_index,
evidence_template: entry.evidence_template,
import_provenance: entry.import_provenance,
enabler,
arg_kinds,
requires_source: entry.requires_source,
requires_source_kinds: entry.requires_source_kinds,
literal_values: entry.literal_values.unwrap_or_default(),
literal_contains: entry.literal_contains.unwrap_or_default(),
literal_integers: entry.literal_integers.unwrap_or_default(),
object_properties,
object_missing_or_false: entry.object_missing_or_false.unwrap_or_default(),
object_missing: entry.object_missing.unwrap_or_default(),
context_keywords: entry.context_keywords.unwrap_or_default(),
})
}
fn validate_matcher_core(entry: &RawMatcher) -> Result<(SinkShape, Vec<CalleePattern>), String> {
if entry.id.trim().is_empty() {
return Err("matcher id must be non-empty / non-whitespace".to_string());
}
if entry.cwe == 0 {
return Err(format!("matcher {:?} has cwe 0; cwe must be > 0", entry.id));
}
let sink_shape = parse_sink_shape(&entry.sink_shape).ok_or_else(|| {
format!(
"matcher {:?} has unknown sink_shape {:?}; expected one of \
call | member-call | member-assign | tagged-template | jsx-attr | new-expression",
entry.id, entry.sink_shape
)
})?;
if entry.callee_patterns.is_empty() {
return Err(format!(
"matcher {:?} has no callee_patterns; at least one is required",
entry.id
));
}
if entry.evidence_template.trim().is_empty() {
return Err(format!(
"matcher {:?} has an empty evidence_template",
entry.id
));
}
let mut callee_patterns = Vec::with_capacity(entry.callee_patterns.len());
for pat in &entry.callee_patterns {
let parsed = parse_callee_pattern(pat).ok_or_else(|| {
format!(
"matcher {:?} has an empty / whitespace callee_pattern {pat:?}",
entry.id
)
})?;
callee_patterns.push(parsed);
}
Ok((sink_shape, callee_patterns))
}
fn validate_matcher_enabler(id: &str, enabler: Option<String>) -> Result<Option<String>, String> {
match enabler {
Some(e) if e.trim().is_empty() => Err(format!(
"matcher {id:?} has an empty / whitespace enabler; omit the key for a global row"
)),
other => Ok(other),
}
}
fn parse_matcher_arg_kinds(
id: &str,
raw_kinds: Option<&[String]>,
) -> Result<Option<Vec<SinkArgKind>>, String> {
let Some(raw_kinds) = raw_kinds else {
return Ok(None);
};
if raw_kinds.is_empty() {
return Err(format!(
"matcher {id:?} has an empty arg_kinds list; omit the key to admit any shape"
));
}
let mut kinds = Vec::with_capacity(raw_kinds.len());
for raw in raw_kinds {
let kind = parse_arg_kind(raw).ok_or_else(|| {
format!(
"matcher {id:?} has unknown arg_kind {raw:?}; expected one of \
template-with-subst | concat | object | call | literal | no-arg | other"
)
})?;
kinds.push(kind);
}
Ok(Some(kinds))
}
fn parse_source_catalogue(raw_sources: Vec<RawSource>) -> Result<Vec<SourceMatcher>, String> {
let mut sources = Vec::with_capacity(raw_sources.len());
for entry in raw_sources {
if entry.id.trim().is_empty() {
return Err("source id must be non-empty / non-whitespace".to_string());
}
if entry.path_patterns.is_empty() {
return Err(format!(
"source {:?} has no path_patterns; at least one is required",
entry.id
));
}
let path_patterns = parse_source_path_patterns(&entry)?;
let receiver_allowlist = parse_source_receiver_allowlist(&entry)?;
let enabler = match entry.enabler {
Some(e) if e.trim().is_empty() => {
return Err(format!(
"source {:?} has an empty / whitespace enabler; omit the key for a global row",
entry.id
));
}
other => other,
};
sources.push(SourceMatcher {
id: entry.id,
title: entry.title,
enabler,
path_patterns,
receiver_allowlist,
});
}
Ok(sources)
}
fn parse_source_path_patterns(entry: &RawSource) -> Result<Vec<CalleePattern>, String> {
let mut path_patterns = Vec::with_capacity(entry.path_patterns.len());
for pattern in &entry.path_patterns {
let parsed = parse_callee_pattern(pattern).ok_or_else(|| {
format!(
"source {:?} has an empty / whitespace path_pattern {pattern:?}",
entry.id
)
})?;
path_patterns.push(parsed);
}
Ok(path_patterns)
}
fn parse_source_receiver_allowlist(entry: &RawSource) -> Result<Vec<String>, String> {
let mut receiver_allowlist = Vec::with_capacity(entry.receiver_allowlist.len());
for receiver in &entry.receiver_allowlist {
if receiver.trim().is_empty() {
return Err(format!(
"source {:?} has an empty / whitespace receiver_allowlist entry; omit the key for an ungated row",
entry.id
));
}
receiver_allowlist.push(receiver.to_ascii_lowercase());
}
Ok(receiver_allowlist)
}
#[expect(
clippy::expect_used,
reason = "compile-time-embedded catalogue pinned by security_catalogue_parses"
)]
pub fn catalogue() -> &'static Catalogue {
static CATALOGUE: std::sync::OnceLock<Catalogue> = std::sync::OnceLock::new();
CATALOGUE.get_or_init(|| {
parse_catalogue(CATALOGUE_TOML).expect(
"embedded crates/security/data/security_matchers.toml must parse; run \
`cargo test -p fallow-security security_catalogue_parses` to see the error",
)
})
}
#[cfg(test)]
#[allow(
clippy::expect_used,
clippy::unwrap_used,
reason = "catalogue parser tests assert fixture invariants directly"
)]
mod tests {
use super::*;
use rustc_hash::FxHashSet;
#[test]
fn security_catalogue_parses() {
let cat = catalogue();
assert!(!cat.matchers().is_empty(), "catalogue must have matchers");
assert!(
cat.matchers().iter().any(|m| m.id == "dangerous-html"),
"catalogue must contain the dangerous-html seed"
);
}
#[test]
fn catalogue_rows_are_unique() {
let raw: RawCatalogue = toml::from_str(CATALOGUE_TOML).unwrap();
let mut seen = FxHashSet::default();
for m in &raw.matcher {
let pats = m.callee_patterns.join("|");
let enabler = m.enabler.as_deref().unwrap_or("");
let import_provenance = m.import_provenance.as_deref().unwrap_or("");
let arg_kinds = m
.arg_kinds
.as_ref()
.map_or_else(String::new, |kinds| kinds.join("|"));
let literal_values = m
.literal_values
.as_ref()
.map_or_else(String::new, |values| values.join("|"));
let literal_contains = m
.literal_contains
.as_ref()
.map_or_else(String::new, |values| values.join("|"));
let literal_integers = m
.literal_integers
.as_ref()
.map_or_else(String::new, |values| {
values
.iter()
.map(i64::to_string)
.collect::<Vec<_>>()
.join("|")
});
let object_properties = format!("{:?}", m.object_properties);
let object_missing_or_false = m
.object_missing_or_false
.as_ref()
.map_or_else(String::new, |keys| keys.join("|"));
let object_missing = m
.object_missing
.as_ref()
.map_or_else(String::new, |keys| keys.join("|"));
let context_keywords = m
.context_keywords
.as_ref()
.map_or_else(String::new, |keywords| keywords.join("|"));
let key = format!(
"{}::{}::{pats}::{enabler}::{import_provenance}::{}::{arg_kinds}::{literal_values}::{literal_contains}::{literal_integers}::{object_properties}::{object_missing_or_false}::{object_missing}::{context_keywords}",
m.id, m.sink_shape, m.requires_source
);
assert!(seen.insert(key.clone()), "duplicate matcher row: {key}");
}
}
#[test]
fn catalogue_ids_non_empty() {
for m in catalogue().matchers() {
assert!(
!m.id.trim().is_empty(),
"matcher id must be non-empty / non-whitespace"
);
}
}
#[test]
fn catalogue_cwe_valid() {
for m in catalogue().matchers() {
assert!(m.cwe > 0, "matcher {:?} has cwe 0", m.id);
}
}
#[test]
fn catalogue_sink_shapes_known() {
let raw: RawCatalogue = toml::from_str(CATALOGUE_TOML).unwrap();
for m in &raw.matcher {
assert!(
parse_sink_shape(&m.sink_shape).is_some(),
"matcher {:?} has unknown sink_shape {:?}",
m.id,
m.sink_shape
);
}
}
#[test]
fn catalogue_callee_patterns_non_empty() {
for m in catalogue().matchers() {
assert!(
!m.callee_patterns.is_empty(),
"matcher {:?} has no callee_patterns",
m.id
);
for p in &m.callee_patterns {
assert!(
!p.raw().trim().is_empty(),
"matcher {:?} has an empty callee_pattern",
m.id
);
}
}
}
#[test]
fn catalogue_evidence_templates_non_empty() {
for m in catalogue().matchers() {
assert!(
!m.evidence_template.trim().is_empty(),
"matcher {:?} has an empty evidence_template",
m.id
);
}
}
#[test]
fn parse_rejects_empty_id() {
let toml = r#"
[[matcher]]
id = ""
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "member-assign"
callee_patterns = ["*.innerHTML"]
arg_index = 0
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("id must be non-empty"), "got: {err}");
}
#[test]
fn parse_rejects_zero_cwe() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 0
title = "x"
effect = "unknown"
sink_shape = "member-assign"
callee_patterns = ["*.innerHTML"]
arg_index = 0
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("cwe"), "got: {err}");
}
#[test]
fn parse_rejects_missing_effect() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
sink_shape = "member-assign"
callee_patterns = ["*.innerHTML"]
arg_index = 0
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("missing field `effect`"), "got: {err}");
}
#[test]
fn parse_rejects_unknown_sink_shape() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "not-a-shape"
callee_patterns = ["*.innerHTML"]
arg_index = 0
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("unknown sink_shape"), "got: {err}");
}
#[test]
fn parse_rejects_empty_callee_patterns() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "member-assign"
callee_patterns = []
arg_index = 0
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("callee_patterns"), "got: {err}");
}
#[test]
fn parse_rejects_empty_pattern_string() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "member-assign"
callee_patterns = [" "]
arg_index = 0
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("empty"), "got: {err}");
}
#[test]
fn parse_rejects_empty_evidence_template() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "member-assign"
callee_patterns = ["*.innerHTML"]
arg_index = 0
evidence_template = " "
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("evidence_template"), "got: {err}");
}
#[test]
fn parse_rejects_no_matchers() {
let err = parse_catalogue("").unwrap_err();
assert!(err.contains("no [[matcher]]"), "got: {err}");
}
#[test]
fn segment_match_is_not_substring() {
let bare = parse_callee_pattern("fetch").unwrap();
assert!(bare.matches("fetch"));
assert!(!bare.matches("myfetch"));
assert!(!bare.matches("fetcher"));
let wildcard = parse_callee_pattern("*.innerHTML").unwrap();
assert!(wildcard.matches("el.innerHTML"));
assert!(wildcard.matches("this.node.innerHTML"));
assert!(!wildcard.matches("el.innerHTMLFoo"));
assert!(!wildcard.matches("innerHTML"));
let dotted = parse_callee_pattern("child_process.exec").unwrap();
assert!(dotted.matches("child_process.exec"));
assert!(!dotted.matches("exec"));
assert!(!dotted.matches("child_process.execSync"));
assert!(!dotted.matches("my_child_process.exec"));
}
#[test]
fn wildcard_only_pattern_matches_nothing() {
let star = parse_callee_pattern("*").unwrap();
assert!(!star.matches("el.innerHTML"));
assert!(!star.matches("anything"));
}
#[test]
fn trailing_wildcard_prefix_matches() {
let trailing = parse_callee_pattern("child_process.*").unwrap();
assert!(trailing.matches("child_process.exec"));
assert!(trailing.matches("child_process.exec.call"));
assert!(!trailing.matches("child_process")); assert!(!trailing.matches("my_child_process.exec"));
assert!(!trailing.matches("exec"));
let console = parse_callee_pattern("console.*").unwrap();
assert!(console.matches("console.log"));
assert!(!console.matches("myconsole.log"));
}
#[test]
fn double_wildcard_pattern_matches_nothing() {
let both = parse_callee_pattern("*.query.*").unwrap();
assert!(!both.matches("db.query.run"));
let stars = parse_callee_pattern("*.*").unwrap();
assert!(!stars.matches("a.b"));
}
#[test]
fn arg_kinds_unset_admits_any_shape() {
let html = catalogue()
.matchers()
.iter()
.find(|m| m.id == "dangerous-html")
.expect("dangerous-html present");
for kind in [
SinkArgKind::TemplateWithSubst,
SinkArgKind::Concat,
SinkArgKind::Object,
SinkArgKind::Call,
SinkArgKind::Literal,
SinkArgKind::NoArg,
SinkArgKind::Other,
] {
assert!(html.admits_arg_kind(kind), "html admits {kind:?}");
}
}
#[test]
fn sql_injection_query_execute_excludes_object_arg_kind() {
let query_matchers: Vec<&Matcher> = catalogue()
.matchers()
.iter()
.filter(|m| {
m.id == "sql-injection"
&& m.callee_patterns
.iter()
.any(|p| p.raw() == "*.query" || p.raw() == "*.execute")
})
.collect();
assert!(
!query_matchers.is_empty(),
"sql-injection .query/.execute rows present"
);
for m in query_matchers {
let kinds = m
.arg_kinds
.as_ref()
.unwrap_or_else(|| panic!("sql-injection query/execute must constrain arg_kinds"));
assert!(
!kinds.contains(&SinkArgKind::Object),
"sql-injection .query/.execute must not admit the object (parameterized) form"
);
assert!(
!m.admits_arg_kind(SinkArgKind::Object),
"admits_arg_kind agrees: object excluded"
);
assert!(
m.admits_arg_kind(SinkArgKind::Concat),
"sql-injection .query/.execute admits the concat (unsafe) form"
);
}
}
#[test]
fn source_required_matchers_are_explicit() {
let mass_assignment = catalogue()
.matchers()
.iter()
.find(|m| m.id == "mass-assignment")
.expect("mass-assignment row present");
assert!(
mass_assignment.requires_source,
"mass-assignment should only fire for source-backed arguments"
);
}
#[test]
fn literal_integer_predicate_matches_integer_literals() {
let chmod = catalogue()
.matchers()
.iter()
.find(|m| m.id == "world-writable-permission" && m.sink_shape == SinkShape::MemberCall)
.expect("world-writable permission row present");
assert!(chmod.literal_value_satisfied(Some(&SinkLiteralValue::Integer(511))));
assert!(!chmod.literal_value_satisfied(Some(&SinkLiteralValue::Integer(420))));
assert!(
!chmod.literal_value_satisfied(Some(&SinkLiteralValue::String("0o777".to_string())))
);
}
#[test]
fn object_property_predicate_matches_nested_integer_values() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 732
title = "x"
effect = "unknown"
sink_shape = "member-call"
callee_patterns = ["fs.chmod"]
arg_index = 0
arg_kinds = ["object"]
object_properties = [{ key = "mode.value", integer = 511 }]
evidence_template = "x"
"#;
let cat = parse_catalogue(toml).expect("catalogue parses");
let matcher = cat.matchers().first().expect("matcher present");
let properties = vec![SinkObjectProperty {
key: "mode.value".to_string(),
value: SinkLiteralValue::Integer(511),
}];
assert!(matcher.object_properties_satisfied(&properties));
}
#[test]
fn object_missing_requires_complete_key_metadata() {
let jwt_verify = catalogue()
.matchers()
.iter()
.find(|m| m.id == "jwt-verify-missing-algorithms")
.expect("jwt verify missing algorithms row present");
assert!(
jwt_verify.is_literal_aware(),
"object_missing rows opt into literal-aware matching"
);
assert!(jwt_verify.object_missing_satisfied(&[], true));
assert!(jwt_verify.object_missing_satisfied(&["audience".to_string()], true));
assert!(!jwt_verify.object_missing_satisfied(&["algorithms".to_string()], true));
assert!(!jwt_verify.object_missing_satisfied(&["audience".to_string()], false));
}
#[test]
fn parse_rejects_unknown_arg_kind() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 89
title = "x"
effect = "unknown"
sink_shape = "member-call"
callee_patterns = ["*.query"]
arg_index = 0
arg_kinds = ["not-a-kind"]
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("unknown arg_kind"), "got: {err}");
}
#[test]
fn enabler_unset_is_global() {
let html = catalogue()
.matchers()
.iter()
.find(|m| m.id == "dangerous-html")
.expect("dangerous-html present");
assert!(html.enabler.is_none(), "dangerous-html is a global row");
assert!(html.enabler_satisfied(&FxHashSet::default()));
}
#[test]
fn enabler_satisfied_exact_and_prefix() {
let mut m = catalogue()
.matchers()
.iter()
.find(|m| m.id == "dangerous-html")
.cloned()
.expect("dangerous-html present");
m.enabler = Some("jquery".to_string());
let mut deps = FxHashSet::default();
assert!(!m.enabler_satisfied(&deps), "absent dep is not satisfied");
deps.insert("jquery".to_string());
assert!(m.enabler_satisfied(&deps), "present exact dep satisfies");
m.enabler = Some("@angular/".to_string());
let mut scoped = FxHashSet::default();
assert!(!m.enabler_satisfied(&scoped));
scoped.insert("@angular/platform-browser".to_string());
assert!(m.enabler_satisfied(&scoped), "prefix dep satisfies");
let mut bare_scope = FxHashSet::default();
bare_scope.insert("@angular".to_string());
assert!(
m.enabler_satisfied(&bare_scope),
"bare scope name satisfies the prefix form"
);
m.enabler = Some("react".to_string());
let mut reactish = FxHashSet::default();
reactish.insert("react-dom".to_string());
assert!(
!m.enabler_satisfied(&reactish),
"exact enabler must not prefix-match"
);
}
#[test]
fn framework_scoped_rows_are_present() {
let cat = catalogue();
let angular = cat
.matchers()
.iter()
.find(|m| m.id == "angular-trusted-html")
.expect("angular-trusted-html present");
assert_eq!(
angular.enabler.as_deref(),
Some("@angular/platform-browser")
);
assert!(
cat.matchers().iter().any(|m| m.id == "jquery-html"),
"jquery-html present"
);
assert!(
cat.matchers().iter().any(|m| m.id == "dom-document-write"),
"dom-document-write present"
);
}
#[test]
fn parse_rejects_empty_enabler() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "member-call"
callee_patterns = ["*.html"]
arg_index = 0
enabler = " "
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("empty / whitespace enabler"), "got: {err}");
}
#[test]
fn catalogue_has_untrusted_sources() {
let cat = catalogue();
assert!(
!cat.sources().is_empty(),
"catalogue must ship untrusted-source rows"
);
for s in cat.sources() {
assert!(!s.id.trim().is_empty(), "source id non-empty");
assert!(!s.title.trim().is_empty(), "source title non-empty");
assert!(!s.path_patterns.is_empty(), "source has path patterns");
}
}
#[test]
fn source_paths_match_expected_request_inputs() {
let cat = catalogue();
assert!(cat.is_source_path("req.query"));
assert!(cat.is_source_path("ctx.req.query"));
assert!(cat.is_source_path("request.body"));
assert!(cat.is_source_path("req.params"));
assert!(cat.is_source_path("process.argv"));
assert!(cat.is_source_path("event.data"));
assert!(cat.is_source_path("request.rawBody"));
assert!(cat.is_source_path("document.referrer"));
assert!(cat.is_source_path("window.name"));
assert!(cat.is_source_path("document.cookie"));
assert!(!cat.is_source_path("config.value"));
assert!(!cat.is_source_path("user.name"));
assert!(!cat.is_source_path("profile.name"));
assert!(!cat.is_source_path("jar.cookie"));
}
#[test]
fn source_matcher_matches_helper() {
let cat = catalogue();
let http = cat
.sources()
.iter()
.find(|s| s.id == "http-request-input")
.expect("http-request-input source present");
assert!(http.matches("req.query"));
assert!(!http.matches("process.argv"));
}
#[test]
fn matched_receiver_returns_segment_before_suffix() {
let pat = parse_callee_pattern("*.query").expect("pattern parses");
assert_eq!(pat.matched_receiver("db.query"), Some("db"));
assert_eq!(pat.matched_receiver("req.query"), Some("req"));
assert_eq!(pat.matched_receiver("ctx.req.query"), Some("req"));
assert_eq!(pat.matched_receiver("req.body"), None);
let exact = parse_callee_pattern("process.env").expect("pattern parses");
assert_eq!(exact.matched_receiver("process.env"), None);
}
#[test]
fn receiver_allowlist_rejects_orm_query_builders_keeps_request_objects() {
let cat = catalogue();
assert!(!cat.is_source_path("db.query"), "Drizzle db.query");
assert!(!cat.is_source_path("prisma.query"), "Prisma prisma.query");
assert!(!cat.is_source_path("drizzle.query"));
assert!(!cat.is_source_path("knex.body"));
assert!(!cat.is_source_path("client.query"));
assert!(!cat.is_source_path("dbConn.query"));
assert!(!cat.is_source_path("database.params"));
assert!(cat.is_source_path("req.query"), "Express req.query");
assert!(cat.is_source_path("request.body"));
assert!(cat.is_source_path("ctx.params"), "Koa/Elysia ctx.params");
assert!(cat.is_source_path("context.body"));
assert!(cat.is_source_path("event.query"), "SvelteKit event.query");
assert!(cat.is_source_path("ctx.req.query"));
assert!(cat.is_source_path("Req.query"));
}
#[test]
fn configured_request_receivers_extend_http_request_source_allowlist() {
let cat = catalogue();
let deps = FxHashSet::default();
let receivers = FxHashSet::from_iter(["h".to_string(), "httpreq".to_string()]);
assert!(
cat.matching_source_for_deps_with_receivers("h.query", &deps, &receivers)
.is_some()
);
assert!(
cat.matching_source_for_deps_with_receivers("HttpReq.body", &deps, &receivers)
.is_some()
);
assert!(
cat.matching_source_for_deps_with_receivers("req.params", &deps, &receivers)
.is_some()
);
assert!(
cat.matching_source_for_deps_with_receivers("db.query", &deps, &receivers)
.is_none()
);
}
#[test]
fn search_params_source_stays_ungated() {
let cat = catalogue();
assert!(cat.is_source_path("u.searchParams"));
assert!(cat.is_source_path("url.searchParams"));
assert!(cat.is_source_path("params.searchParams"));
}
#[test]
fn parse_rejects_empty_receiver_allowlist_entry() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "member-assign"
callee_patterns = ["*.innerHTML"]
arg_index = 0
evidence_template = "x"
[[source]]
id = "http"
title = "HTTP"
path_patterns = ["*.query"]
receiver_allowlist = ["req", " "]
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("receiver_allowlist"), "got: {err}");
}
#[test]
fn source_enabler_gates_framework_param_sources() {
let cat = catalogue();
let source = cat
.sources()
.iter()
.find(|s| s.id == "framework-handler-input" && s.enabler.as_deref() == Some("express"))
.expect("express handler source present");
assert!(source.matches("framework.request"));
let empty = FxHashSet::default();
assert!(!source.enabler_satisfied(&empty));
assert!(
cat.matching_source_for_deps("framework.request", &empty)
.is_none(),
"framework handler params require an enabler"
);
let mut deps = FxHashSet::default();
deps.insert("express".to_string());
assert!(source.enabler_satisfied(&deps));
assert_eq!(
cat.matching_source_for_deps("framework.request", &deps),
Some(("framework-handler-input", "Framework handler input"))
);
}
#[test]
fn source_enabler_gates_graphql_and_trpc_param_sources() {
let cat = catalogue();
let empty = FxHashSet::default();
assert!(
cat.matching_source_for_deps("graphql.args", &empty)
.is_none(),
"GraphQL resolver args require a matching package"
);
assert!(
cat.matching_source_for_deps("trpc.input", &empty).is_none(),
"tRPC procedure input requires a matching package"
);
let mut graphql_deps = FxHashSet::default();
graphql_deps.insert("@apollo/server".to_string());
assert_eq!(
cat.matching_source_for_deps("graphql.args", &graphql_deps),
Some(("graphql-resolver-args", "GraphQL resolver args"))
);
let mut trpc_deps = FxHashSet::default();
trpc_deps.insert("@trpc/server".to_string());
assert_eq!(
cat.matching_source_for_deps("trpc.input", &trpc_deps),
Some(("trpc-procedure-input", "tRPC procedure input"))
);
}
#[test]
fn parse_rejects_source_without_patterns() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 79
title = "x"
effect = "unknown"
sink_shape = "member-assign"
callee_patterns = ["*.innerHTML"]
arg_index = 0
evidence_template = "x"
[[source]]
id = "bad"
title = "bad"
path_patterns = []
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("path_patterns"), "got: {err}");
}
#[test]
fn parse_rejects_empty_arg_kinds() {
let toml = r#"
[[matcher]]
id = "x"
cwe = 89
title = "x"
effect = "unknown"
sink_shape = "member-call"
callee_patterns = ["*.query"]
arg_index = 0
arg_kinds = []
evidence_template = "x"
"#;
let err = parse_catalogue(toml).unwrap_err();
assert!(err.contains("empty arg_kinds"), "got: {err}");
}
}