Skip to main content

fallow_security/
lib.rs

1//! Data-driven catalogue of syntactic security-sink candidate matchers.
2//!
3//! The catalogue is community-maintainable: every matcher lives in
4//! `crates/security/data/security_matchers.toml`, embedded via `include_str!` and
5//! parsed once behind a `OnceLock`. There is NO regeneration step. Adding a
6//! category is a single `[[matcher]]` TOML edit plus ZERO Rust enum or
7//! discriminant churn (the `tainted_sink` detector matches captured
8//! category-blind `SinkSite`s against the loaded catalogue).
9//!
10//! Findings are CANDIDATES for downstream agent verification, NOT verified
11//! vulnerabilities: fallow is deterministic and syntactic, never taint-proof.
12//! Matchers default to non-literal arguments. A row can opt into narrowly
13//! captured literal or context predicates when the literal itself is the signal.
14
15use fallow_config::EffectKind;
16use fallow_types::extract::{SinkArgKind, SinkLiteralValue, SinkObjectProperty, SinkShape};
17use rustc_hash::FxHashSet;
18
19pub const HARDCODED_SECRET_CATEGORY_ID: &str = "hardcoded-secret";
20pub const HARDCODED_SECRET_CATEGORY_TITLE: &str = "Hardcoded secret candidate";
21
22/// Embedded catalogue source. Because it is `include_str!`-embedded at compile
23/// time, a green `security_catalogue_parses` test guarantees the released
24/// binary parses.
25const CATALOGUE_TOML: &str = include_str!("../data/security_matchers.toml");
26
27#[derive(serde::Deserialize)]
28struct RawCatalogue {
29    #[serde(default)]
30    matcher: Vec<RawMatcher>,
31    #[serde(default)]
32    source: Vec<RawSource>,
33}
34
35/// A raw untrusted-source row (issue #859). Names member-access paths that carry
36/// attacker-controlled input; the analyze layer matches captured tainted-binding
37/// source paths against these to mark source-tainted locals.
38#[derive(serde::Deserialize)]
39struct RawSource {
40    id: String,
41    title: String,
42    /// Optional framework enabler, same semantics as matcher enablers.
43    #[serde(default)]
44    enabler: Option<String>,
45    path_patterns: Vec<String>,
46    /// Optional allowlist of receiver names for leading-`*.` wildcard patterns
47    /// (issue #1092). When non-empty, a wildcard pattern fires only if the
48    /// matched member's receiver is one of these (case-insensitive), so
49    /// `*.query` matches `req.query` but not `db.query`. Empty / absent leaves
50    /// the row ungated (every receiver matches). Has no effect on exact
51    /// patterns, whose receiver is fixed in the pattern itself.
52    #[serde(default)]
53    receiver_allowlist: Vec<String>,
54}
55
56#[derive(serde::Deserialize)]
57struct RawMatcher {
58    id: String,
59    cwe: u32,
60    title: String,
61    effect: EffectKind,
62    /// Kebab-case shape string, validated into [`SinkShape`].
63    sink_shape: String,
64    callee_patterns: Vec<String>,
65    arg_index: u32,
66    evidence_template: String,
67    #[serde(default)]
68    import_provenance: Option<String>,
69    /// Optional framework enabler: a package name that gates this row on the
70    /// active framework (issue #861). The plugin system already activates on the
71    /// declared dependency set, so a row carrying `enabler = "@angular/platform-browser"`
72    /// fires only when that package (or, with a trailing `/`, any package under
73    /// that prefix) is present in the project's declared dependencies. Lets a
74    /// framework-specific idiom (`bypassSecurityTrustHtml`, `dangerouslySetInnerHTML`)
75    /// be recognized with higher precision without a new enum variant. Unset means
76    /// the row is global (the prior behavior).
77    #[serde(default)]
78    enabler: Option<String>,
79    /// Optional allowlist of argument shapes. When set, the captured sink site's
80    /// `arg_kind` must be one of the listed kebab-case kinds for the matcher to
81    /// fire. Lets a matcher require the unsafe SQL shapes (`concat`,
82    /// `template-with-subst`) and exclude the safely-parameterized forms
83    /// (`object` for `.execute({ sql, args })`, the bare `sql` tag). Unset means
84    /// any non-literal argument shape matches (the prior behavior).
85    #[serde(default)]
86    arg_kinds: Option<Vec<String>>,
87    /// Optional string-literal equality predicates for literal-aware rows.
88    #[serde(default)]
89    literal_values: Option<Vec<String>>,
90    /// Optional string-literal substring predicates for literal-aware rows.
91    #[serde(default)]
92    literal_contains: Option<Vec<String>>,
93    /// Optional integer-literal equality predicates for literal-aware rows.
94    #[serde(default)]
95    literal_integers: Option<Vec<i64>>,
96    /// Optional object-literal property equality predicates.
97    #[serde(default)]
98    object_properties: Option<Vec<RawObjectPropertyPredicate>>,
99    /// Optional object-literal flags that are unsafe when missing or `false`.
100    #[serde(default)]
101    object_missing_or_false: Option<Vec<String>>,
102    /// Optional object-literal keys that are unsafe when absent. Unlike
103    /// `object_missing_or_false`, this checks key presence only and refuses
104    /// incomplete object shapes.
105    #[serde(default)]
106    object_missing: Option<Vec<String>>,
107    /// Optional context-name keywords for zero-arg sinks like `Math.random()`.
108    #[serde(default)]
109    context_keywords: Option<Vec<String>>,
110    /// Optional precision gate: require the captured sink argument to reference
111    /// a local binding that came from a configured untrusted source.
112    #[serde(default)]
113    requires_source: bool,
114    /// Optional precision gate narrowing `requires_source` to SPECIFIC source
115    /// kinds by catalogue source id (issue #890). Empty (default) admits any
116    /// matched source (the prior behavior); when set, the matched source's id
117    /// must be one of these. Lets `secret-to-network` fire only when backed by a
118    /// SECRET source (`process-env` / `import-meta-env`), not request input
119    /// (which the `ssrf` rows already cover).
120    #[serde(default)]
121    requires_source_kinds: Vec<String>,
122}
123
124#[derive(Debug, serde::Deserialize)]
125struct RawObjectPropertyPredicate {
126    key: String,
127    #[serde(default)]
128    string: Option<String>,
129    #[serde(default)]
130    boolean: Option<bool>,
131    #[serde(default)]
132    integer: Option<i64>,
133    #[serde(default)]
134    null: bool,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub enum LiteralPredicate {
139    String(String),
140    Integer(i64),
141    Boolean(bool),
142    Null,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub struct ObjectPropertyPredicate {
147    key: String,
148    value: LiteralPredicate,
149}
150
151/// A pre-segmented callee pattern. Matching is segment-aware (NOT substring):
152/// the pattern is split on `.`, a leading `*` segment means "any object"
153/// (`*.innerHTML` matches `el.innerHTML` and `this.node.innerHTML` by
154/// suffix-matching the trailing non-`*` segments), and a trailing `*` segment
155/// means "any member" (`child_process.*` matches `child_process.exec` by
156/// prefix-matching the leading non-`*` segments). The security catalogue uses
157/// exact and leading-wildcard rows; the trailing form serves the boundary
158/// forbidden-call detector.
159#[derive(Debug, Clone)]
160pub struct CalleePattern {
161    /// The literal source pattern (`"*.innerHTML"`, `"child_process.exec"`),
162    /// surfaced in evidence rendering as `{pattern}`.
163    raw: String,
164    /// Segments between any leading and trailing `*` (e.g. `["innerHTML"]`
165    /// for `*.innerHTML`, `["child_process"]` for `child_process.*`,
166    /// `["child_process", "exec"]` for the exact dotted form).
167    suffix_segments: Vec<String>,
168    /// Whether the pattern began with a `*` wildcard object segment.
169    leading_wildcard: bool,
170    /// Whether the pattern ended with a `*` wildcard member segment.
171    trailing_wildcard: bool,
172}
173
174impl CalleePattern {
175    /// Parse a raw pattern string into its segmented form. Returns `None` for
176    /// an empty or whitespace-only pattern. Public constructor for non-security
177    /// reusers of the segment-aware matcher (the boundary forbidden-call
178    /// detector); the catalogue's own rows go through the same parser.
179    #[must_use]
180    pub fn parse(raw: &str) -> Option<Self> {
181        parse_callee_pattern(raw)
182    }
183
184    /// The original pattern text, for evidence templating.
185    #[must_use]
186    pub fn raw(&self) -> &str {
187        &self.raw
188    }
189
190    /// Segment-aware match against a captured dotted/bare callee path.
191    ///
192    /// With a leading `*`, the trailing segments must equal the tail of the
193    /// candidate's segments (suffix match), so `*.innerHTML` matches
194    /// `el.innerHTML` but not `el.innerHTMLFoo`. With a trailing `*`, the
195    /// leading segments must equal the head of the candidate's segments
196    /// (prefix match), so `child_process.*` matches `child_process.exec` but
197    /// not the bare `child_process`. Without either, the whole segment list
198    /// must match exactly, so `fetch` matches `fetch` but not `myfetch`.
199    /// Patterns carrying BOTH wildcards match nothing (rejected by the config
200    /// layer; never produced by catalogue rows).
201    #[must_use]
202    pub fn matches(&self, callee_path: &str) -> bool {
203        // With only wildcards and no concrete segments, match nothing.
204        if self.suffix_segments.is_empty() || (self.leading_wildcard && self.trailing_wildcard) {
205            return false;
206        }
207        let candidate: Vec<&str> = callee_path.split('.').collect();
208        if self.leading_wildcard {
209            // A leading `*.` requires at least one object segment before the
210            // suffix, so the candidate must have strictly more segments than
211            // the suffix (`*.innerHTML` matches `el.innerHTML`, not `innerHTML`).
212            if self.suffix_segments.len() >= candidate.len() {
213                return false;
214            }
215            let tail = &candidate[candidate.len() - self.suffix_segments.len()..];
216            self.suffix_segments
217                .iter()
218                .zip(tail)
219                .all(|(pat, seg)| pat == seg)
220        } else if self.trailing_wildcard {
221            // A trailing `.*` requires at least one member segment after the
222            // prefix (`child_process.*` matches `child_process.exec`, not the
223            // bare `child_process`).
224            if self.suffix_segments.len() >= candidate.len() {
225                return false;
226            }
227            let head = &candidate[..self.suffix_segments.len()];
228            self.suffix_segments
229                .iter()
230                .zip(head)
231                .all(|(pat, seg)| pat == seg)
232        } else {
233            self.suffix_segments.len() == candidate.len()
234                && self
235                    .suffix_segments
236                    .iter()
237                    .zip(&candidate)
238                    .all(|(pat, seg)| pat == seg)
239        }
240    }
241
242    /// The receiver segment immediately before this pattern's matched suffix,
243    /// for a leading-`*.` wildcard pattern: `*.query` against `db.query` returns
244    /// `Some("db")`, against `ctx.req.query` returns `Some("req")` (the segment
245    /// right before `query`, which is the receiver of the matched member). Used
246    /// by a source row's receiver allowlist to keep HTTP-input patterns from
247    /// firing on ORM / data-access receivers (issue #1092). Returns `None` for
248    /// an exact (non-wildcard) pattern, whose receiver is fixed in the pattern
249    /// itself, and for any `callee_path` this pattern does not match.
250    #[must_use]
251    pub fn matched_receiver<'p>(&self, callee_path: &'p str) -> Option<&'p str> {
252        if !self.leading_wildcard || !self.matches(callee_path) {
253            return None;
254        }
255        let candidate: Vec<&str> = callee_path.split('.').collect();
256        // `matches` guarantees `candidate.len() > suffix_segments.len()` for a
257        // leading-wildcard hit, so the receiver index is always in range.
258        let recv_idx = candidate.len() - self.suffix_segments.len() - 1;
259        candidate.get(recv_idx).copied()
260    }
261}
262
263/// Parse a raw pattern string into its segmented form. Returns `None` for an
264/// empty or whitespace-only pattern (rejected at parse time).
265fn parse_callee_pattern(raw: &str) -> Option<CalleePattern> {
266    if raw.trim().is_empty() {
267        return None;
268    }
269    let mut segments: Vec<&str> = raw.split('.').collect();
270    let leading_wildcard = segments.first() == Some(&"*");
271    if leading_wildcard {
272        segments.remove(0);
273    }
274    let trailing_wildcard = segments.last() == Some(&"*");
275    if trailing_wildcard {
276        segments.pop();
277    }
278    Some(CalleePattern {
279        raw: raw.to_string(),
280        suffix_segments: segments.into_iter().map(str::to_string).collect(),
281        leading_wildcard,
282        trailing_wildcard,
283    })
284}
285
286/// A parsed, validated matcher with the sink shape resolved to the typed enum
287/// and callee patterns pre-segmented for O(1)-ish matching.
288#[derive(Debug, Clone)]
289pub struct Matcher {
290    pub id: String,
291    pub cwe: u32,
292    pub title: String,
293    pub effect: EffectKind,
294    pub sink_shape: SinkShape,
295    pub callee_patterns: Vec<CalleePattern>,
296    pub arg_index: u32,
297    pub evidence_template: String,
298    pub import_provenance: Option<String>,
299    /// Framework enabler package gate (issue #861). `None` = global row.
300    /// `Some("pkg")` requires an exact dependency match; `Some("@scope/")`
301    /// (trailing slash) requires any dependency under that prefix.
302    pub enabler: Option<String>,
303    /// Resolved allowlist of admitted argument shapes. `None` admits any
304    /// non-literal shape; `Some` requires the captured `arg_kind` to be listed.
305    pub arg_kinds: Option<Vec<SinkArgKind>>,
306    /// Whether this matcher only fires when the sink argument traces to a
307    /// configured untrusted source binding.
308    pub requires_source: bool,
309    /// When non-empty, narrows `requires_source` to these catalogue source ids
310    /// (issue #890): the matched source's id must be one of these. Empty admits
311    /// any matched source.
312    pub requires_source_kinds: Vec<String>,
313    /// String-literal values admitted by this row.
314    pub literal_values: Vec<String>,
315    /// String fragments admitted by this row.
316    pub literal_contains: Vec<String>,
317    /// Integer literal values admitted by this row.
318    pub literal_integers: Vec<i64>,
319    /// Required literal object properties.
320    pub object_properties: Vec<ObjectPropertyPredicate>,
321    /// Object properties whose absence or boolean `false` makes the row match.
322    pub object_missing_or_false: Vec<String>,
323    /// Object keys whose absence makes the row match.
324    pub object_missing: Vec<String>,
325    /// Context-name keywords admitted by this row.
326    pub context_keywords: Vec<String>,
327}
328
329/// A parsed, validated untrusted-source matcher (issue #859). Its
330/// `path_patterns` reuse the segment-aware [`CalleePattern`] engine: a leading
331/// `*.` matches any object prefix (`*.query` matches `req.query` and
332/// `ctx.req.query`); a bare path matches exactly.
333#[derive(Debug, Clone)]
334pub struct SourceMatcher {
335    pub id: String,
336    pub title: String,
337    pub enabler: Option<String>,
338    pub path_patterns: Vec<CalleePattern>,
339    /// Lowercased receiver allowlist for leading-wildcard patterns (issue
340    /// #1092). Empty leaves the row ungated.
341    pub receiver_allowlist: Vec<String>,
342}
343
344impl SourceMatcher {
345    /// Whether any of this source's path patterns match the given flattened
346    /// member-access path, subject to the built-in receiver allowlist.
347    #[cfg(test)]
348    #[must_use]
349    pub fn matches(&self, source_path: &str) -> bool {
350        let extra_receivers = FxHashSet::default();
351        self.matches_with_extra_receivers(source_path, &extra_receivers)
352    }
353
354    #[must_use]
355    pub fn matches_with_extra_receivers(
356        &self,
357        source_path: &str,
358        extra_receivers: &FxHashSet<String>,
359    ) -> bool {
360        self.path_patterns.iter().any(|p| {
361            p.matches(source_path) && self.receiver_allowed(p, source_path, extra_receivers)
362        })
363    }
364
365    /// Whether `pattern`'s match on `source_path` is admitted by the receiver
366    /// allowlist. An empty allowlist admits everything. For a leading-wildcard
367    /// pattern the matched receiver must be in the allowlist (case-insensitive);
368    /// an exact pattern (receiver fixed in the pattern) is always admitted.
369    fn receiver_allowed(
370        &self,
371        pattern: &CalleePattern,
372        source_path: &str,
373        extra_receivers: &FxHashSet<String>,
374    ) -> bool {
375        if self.receiver_allowlist.is_empty() {
376            return true;
377        }
378        match pattern.matched_receiver(source_path) {
379            Some(receiver) => {
380                self.receiver_allowlist
381                    .iter()
382                    .any(|allowed| allowed.eq_ignore_ascii_case(receiver))
383                    || extra_receivers.contains(&receiver.to_ascii_lowercase())
384            }
385            None => true,
386        }
387    }
388
389    /// Whether this source row's framework enabler is satisfied by the
390    /// project's declared dependency set. Unset means global.
391    #[must_use]
392    pub fn enabler_satisfied(&self, declared_deps: &rustc_hash::FxHashSet<String>) -> bool {
393        enabler_satisfied(self.enabler.as_deref(), declared_deps)
394    }
395}
396
397/// The parsed catalogue: an ordered list of sink matchers plus untrusted-source
398/// matchers. Order is preserved from the TOML so the detector can break on the
399/// first match deterministically.
400#[derive(Debug)]
401pub struct Catalogue {
402    matchers: Vec<Matcher>,
403    sources: Vec<SourceMatcher>,
404}
405
406impl Matcher {
407    /// The first callee pattern that matches the given path, if any. The first
408    /// match wins, matching the deterministic declaration order.
409    #[must_use]
410    pub fn first_matching_pattern(&self, callee_path: &str) -> Option<&CalleePattern> {
411        self.callee_patterns.iter().find(|p| p.matches(callee_path))
412    }
413
414    /// Whether a captured argument shape is admitted by this matcher. `None`
415    /// `arg_kinds` admits any shape; `Some` requires the kind to be listed.
416    #[must_use]
417    pub fn admits_arg_kind(&self, arg_kind: SinkArgKind) -> bool {
418        self.arg_kinds
419            .as_ref()
420            .is_none_or(|kinds| kinds.contains(&arg_kind))
421    }
422
423    /// Whether this row has opted into matching a literal, object-property, or
424    /// context-only sink that is not covered by the default non-literal model.
425    #[must_use]
426    pub fn is_literal_aware(&self) -> bool {
427        !self.literal_values.is_empty()
428            || !self.literal_contains.is_empty()
429            || !self.literal_integers.is_empty()
430            || !self.object_properties.is_empty()
431            || !self.object_missing_or_false.is_empty()
432            || !self.object_missing.is_empty()
433            || !self.context_keywords.is_empty()
434            || self.arg_kinds.as_ref().is_some_and(|kinds| {
435                kinds
436                    .iter()
437                    .any(|kind| matches!(kind, SinkArgKind::Literal | SinkArgKind::NoArg))
438            })
439    }
440
441    /// Whether captured literal metadata satisfies this row's literal gates.
442    #[must_use]
443    pub fn literal_value_satisfied(&self, literal: Option<&SinkLiteralValue>) -> bool {
444        if self.literal_values.is_empty()
445            && self.literal_contains.is_empty()
446            && self.literal_integers.is_empty()
447        {
448            return true;
449        }
450        let string_satisfied = (self.literal_values.is_empty() && self.literal_contains.is_empty())
451            || match literal {
452                Some(SinkLiteralValue::String(value)) => {
453                    let lower = value.to_ascii_lowercase();
454                    (self.literal_values.is_empty()
455                        || self
456                            .literal_values
457                            .iter()
458                            .any(|expected| lower == expected.to_ascii_lowercase()))
459                        && (self.literal_contains.is_empty()
460                            || self
461                                .literal_contains
462                                .iter()
463                                .any(|needle| lower.contains(&needle.to_ascii_lowercase())))
464                }
465                _ => false,
466            };
467        let integer_satisfied = self.literal_integers.is_empty()
468            || match literal {
469                Some(SinkLiteralValue::Integer(value)) => self.literal_integers.contains(value),
470                _ => false,
471            };
472        string_satisfied && integer_satisfied
473    }
474
475    /// Whether captured object-literal metadata satisfies this row's object
476    /// property gates.
477    #[must_use]
478    pub fn object_properties_satisfied(&self, properties: &[SinkObjectProperty]) -> bool {
479        if self.object_properties.is_empty() && self.object_missing_or_false.is_empty() {
480            return true;
481        }
482        for predicate in &self.object_properties {
483            let Some(property) = properties.iter().find(|p| p.key == predicate.key) else {
484                return false;
485            };
486            if !predicate.value.matches(&property.value) {
487                return false;
488            }
489        }
490        if self.object_missing_or_false.is_empty() {
491            return true;
492        }
493        self.object_missing_or_false.iter().any(|key| {
494            properties
495                .iter()
496                .find(|p| p.key == *key)
497                .is_none_or(|property| matches!(property.value, SinkLiteralValue::Boolean(false)))
498        })
499    }
500
501    /// Whether missing-key predicates are satisfied by complete static object
502    /// key metadata.
503    #[must_use]
504    pub fn object_missing_satisfied(&self, keys: &[String], keys_complete: bool) -> bool {
505        if self.object_missing.is_empty() {
506            return true;
507        }
508        keys_complete && self.object_missing.iter().any(|key| !keys.contains(key))
509    }
510
511    /// Whether captured context names satisfy this row's context keyword gate.
512    #[must_use]
513    pub fn context_satisfied(&self, context_names: &[String]) -> bool {
514        if self.context_keywords.is_empty() {
515            return true;
516        }
517        context_names.iter().any(|name| {
518            let lower = name.to_ascii_lowercase();
519            self.context_keywords
520                .iter()
521                .any(|keyword| lower.contains(&keyword.to_ascii_lowercase()))
522        })
523    }
524
525    /// Whether this matcher's framework enabler is satisfied by the project's
526    /// declared dependency set (issue #861). `None` enabler is always satisfied
527    /// (a global row). A `Some` enabler matches by exact package name, or, when
528    /// it ends with `/`, by prefix (`@angular/` matches `@angular/platform-browser`),
529    /// mirroring the plugin-system `enablers()` semantics so framework rows
530    /// activate on exactly the dependency universe the plugins do.
531    #[must_use]
532    pub fn enabler_satisfied(&self, declared_deps: &rustc_hash::FxHashSet<String>) -> bool {
533        enabler_satisfied(self.enabler.as_deref(), declared_deps)
534    }
535}
536
537fn enabler_satisfied(enabler: Option<&str>, declared_deps: &rustc_hash::FxHashSet<String>) -> bool {
538    let Some(enabler) = enabler else {
539        return true;
540    };
541    if let Some(prefix) = enabler.strip_suffix('/') {
542        // Trailing-slash prefix match, e.g. `@fastify/` -> `@fastify/static`.
543        // Also admit the bare scope name itself (`@fastify`).
544        declared_deps
545            .iter()
546            .any(|d| d == prefix || d.starts_with(enabler))
547    } else {
548        declared_deps.contains(enabler)
549    }
550}
551
552impl LiteralPredicate {
553    fn matches(&self, value: &SinkLiteralValue) -> bool {
554        match (self, value) {
555            (Self::String(expected), SinkLiteralValue::String(actual)) => {
556                expected.eq_ignore_ascii_case(actual)
557            }
558            (Self::Integer(expected), SinkLiteralValue::Integer(actual)) => expected == actual,
559            (Self::Boolean(expected), SinkLiteralValue::Boolean(actual)) => expected == actual,
560            (Self::Null, SinkLiteralValue::Null) => true,
561            _ => false,
562        }
563    }
564}
565
566impl Catalogue {
567    /// All matchers in declaration order.
568    #[must_use]
569    pub fn matchers(&self) -> &[Matcher] {
570        &self.matchers
571    }
572
573    /// All untrusted-source matchers in declaration order. Test-only inspection.
574    #[cfg(test)]
575    #[must_use]
576    pub fn sources(&self) -> &[SourceMatcher] {
577        &self.sources
578    }
579
580    /// The id + human title of the first untrusted-source matcher whose pattern
581    /// matches the given flattened member-access path, if any (issue #859).
582    #[cfg(test)]
583    #[must_use]
584    pub fn matching_source(&self, source_path: &str) -> Option<(&str, &str)> {
585        let request_receivers = FxHashSet::default();
586        self.sources
587            .iter()
588            .find(|s| s.matches_with_extra_receivers(source_path, &request_receivers))
589            .map(|s| (s.id.as_str(), s.title.as_str()))
590    }
591
592    /// The id + human title of the first untrusted-source matcher whose pattern
593    /// and optional framework enabler match the given source path.
594    #[cfg(test)]
595    #[must_use]
596    pub fn matching_source_for_deps(
597        &self,
598        source_path: &str,
599        declared_deps: &FxHashSet<String>,
600    ) -> Option<(&str, &str)> {
601        let request_receivers = FxHashSet::default();
602        self.matching_source_for_deps_with_receivers(source_path, declared_deps, &request_receivers)
603    }
604
605    /// The id + human title of the first untrusted-source matcher whose pattern,
606    /// optional framework enabler, and configured request-receiver extension
607    /// match the given source path.
608    #[must_use]
609    pub fn matching_source_for_deps_with_receivers(
610        &self,
611        source_path: &str,
612        declared_deps: &FxHashSet<String>,
613        request_receivers: &FxHashSet<String>,
614    ) -> Option<(&str, &str)> {
615        let empty_receivers = FxHashSet::default();
616        self.sources
617            .iter()
618            .find(|s| {
619                let extra_receivers = if s.id == "http-request-input" {
620                    request_receivers
621                } else {
622                    &empty_receivers
623                };
624                s.enabler_satisfied(declared_deps)
625                    && s.matches_with_extra_receivers(source_path, extra_receivers)
626            })
627            .map(|s| (s.id.as_str(), s.title.as_str()))
628    }
629
630    /// Whether the given flattened member-access path matches any untrusted
631    /// source pattern (issue #859). Test-only convenience over `matching_source`.
632    #[cfg(test)]
633    #[must_use]
634    pub fn is_source_path(&self, source_path: &str) -> bool {
635        self.matching_source(source_path).is_some()
636    }
637
638    /// The human-readable title for a category id, if any matcher declares it.
639    #[must_use]
640    pub fn title_for(&self, id: &str) -> Option<&str> {
641        self.matchers
642            .iter()
643            .find(|m| m.id == id)
644            .map(|m| m.title.as_str())
645    }
646}
647
648/// The human-readable title for a category id, used by the CLI renderer.
649#[must_use]
650pub fn catalogue_title(id: &str) -> Option<&'static str> {
651    catalogue().title_for(id)
652}
653
654/// Resolve a kebab-case sink-shape string into the typed [`SinkShape`].
655fn parse_sink_shape(s: &str) -> Option<SinkShape> {
656    match s {
657        "call" => Some(SinkShape::Call),
658        "member-call" => Some(SinkShape::MemberCall),
659        "member-assign" => Some(SinkShape::MemberAssign),
660        "tagged-template" => Some(SinkShape::TaggedTemplate),
661        "jsx-attr" => Some(SinkShape::JsxAttr),
662        "new-expression" => Some(SinkShape::NewExpression),
663        _ => None,
664    }
665}
666
667/// Resolve a kebab-case arg-kind string into the typed [`SinkArgKind`].
668fn parse_arg_kind(s: &str) -> Option<SinkArgKind> {
669    match s {
670        "template-with-subst" => Some(SinkArgKind::TemplateWithSubst),
671        "concat" => Some(SinkArgKind::Concat),
672        "object" => Some(SinkArgKind::Object),
673        "call" => Some(SinkArgKind::Call),
674        "literal" => Some(SinkArgKind::Literal),
675        "no-arg" => Some(SinkArgKind::NoArg),
676        "other" => Some(SinkArgKind::Other),
677        _ => None,
678    }
679}
680
681fn parse_object_property_predicates(
682    id: &str,
683    raw: Option<Vec<RawObjectPropertyPredicate>>,
684) -> Result<Vec<ObjectPropertyPredicate>, String> {
685    let Some(raw_predicates) = raw else {
686        return Ok(Vec::new());
687    };
688    let mut predicates = Vec::with_capacity(raw_predicates.len());
689    for predicate in raw_predicates {
690        if predicate.key.trim().is_empty() {
691            return Err(format!(
692                "matcher {id:?} has an object_properties predicate with an empty key"
693            ));
694        }
695        let value_count = usize::from(predicate.string.is_some())
696            + usize::from(predicate.boolean.is_some())
697            + usize::from(predicate.integer.is_some())
698            + usize::from(predicate.null);
699        if value_count != 1 {
700            return Err(format!(
701                "matcher {id:?} object_properties predicate for {:?} must set exactly one of string | boolean | integer | null",
702                predicate.key
703            ));
704        }
705        let value = if let Some(string) = predicate.string {
706            LiteralPredicate::String(string)
707        } else if let Some(boolean) = predicate.boolean {
708            LiteralPredicate::Boolean(boolean)
709        } else if let Some(integer) = predicate.integer {
710            LiteralPredicate::Integer(integer)
711        } else {
712            LiteralPredicate::Null
713        };
714        predicates.push(ObjectPropertyPredicate {
715            key: predicate.key,
716            value,
717        });
718    }
719    Ok(predicates)
720}
721
722/// Parse + validate the catalogue source. Returns a `Result` (NOT a panic) so
723/// the validation tests can assert on error messages; `catalogue()` unwraps it.
724///
725/// Validates: non-empty id; cwe > 0; sink_shape resolves; callee_patterns
726/// non-empty and every pattern non-empty/non-whitespace; non-empty
727/// evidence_template.
728fn parse_catalogue(src: &str) -> Result<Catalogue, String> {
729    let raw: RawCatalogue =
730        toml::from_str(src).map_err(|e| format!("security_matchers.toml parse error: {e}"))?;
731
732    let mut matchers = Vec::with_capacity(raw.matcher.len());
733    for entry in raw.matcher {
734        matchers.push(parse_matcher_entry(entry)?);
735    }
736
737    if matchers.is_empty() {
738        return Err("security_matchers.toml has no [[matcher]] entries".to_string());
739    }
740
741    let sources = parse_source_catalogue(raw.source)?;
742
743    Ok(Catalogue { matchers, sources })
744}
745
746/// Validate one raw matcher entry and convert it to a `Matcher`. Validates a
747/// non-empty id, cwe > 0, a resolvable sink_shape, non-empty callee_patterns /
748/// arg_kinds / evidence_template, and a non-empty enabler when present.
749fn parse_matcher_entry(entry: RawMatcher) -> Result<Matcher, String> {
750    let (sink_shape, callee_patterns) = validate_matcher_core(&entry)?;
751    let arg_kinds = parse_matcher_arg_kinds(&entry.id, entry.arg_kinds.as_deref())?;
752    let enabler = validate_matcher_enabler(&entry.id, entry.enabler)?;
753    let object_properties = parse_object_property_predicates(&entry.id, entry.object_properties)?;
754    Ok(Matcher {
755        id: entry.id,
756        cwe: entry.cwe,
757        title: entry.title,
758        effect: entry.effect,
759        sink_shape,
760        callee_patterns,
761        arg_index: entry.arg_index,
762        evidence_template: entry.evidence_template,
763        import_provenance: entry.import_provenance,
764        enabler,
765        arg_kinds,
766        requires_source: entry.requires_source,
767        requires_source_kinds: entry.requires_source_kinds,
768        literal_values: entry.literal_values.unwrap_or_default(),
769        literal_contains: entry.literal_contains.unwrap_or_default(),
770        literal_integers: entry.literal_integers.unwrap_or_default(),
771        object_properties,
772        object_missing_or_false: entry.object_missing_or_false.unwrap_or_default(),
773        object_missing: entry.object_missing.unwrap_or_default(),
774        context_keywords: entry.context_keywords.unwrap_or_default(),
775    })
776}
777
778/// Validate a matcher's scalar fields (id, cwe, evidence_template) and parse its
779/// sink_shape plus non-empty callee_patterns.
780fn validate_matcher_core(entry: &RawMatcher) -> Result<(SinkShape, Vec<CalleePattern>), String> {
781    if entry.id.trim().is_empty() {
782        return Err("matcher id must be non-empty / non-whitespace".to_string());
783    }
784    if entry.cwe == 0 {
785        return Err(format!("matcher {:?} has cwe 0; cwe must be > 0", entry.id));
786    }
787    let sink_shape = parse_sink_shape(&entry.sink_shape).ok_or_else(|| {
788        format!(
789            "matcher {:?} has unknown sink_shape {:?}; expected one of \
790             call | member-call | member-assign | tagged-template | jsx-attr | new-expression",
791            entry.id, entry.sink_shape
792        )
793    })?;
794    if entry.callee_patterns.is_empty() {
795        return Err(format!(
796            "matcher {:?} has no callee_patterns; at least one is required",
797            entry.id
798        ));
799    }
800    if entry.evidence_template.trim().is_empty() {
801        return Err(format!(
802            "matcher {:?} has an empty evidence_template",
803            entry.id
804        ));
805    }
806    let mut callee_patterns = Vec::with_capacity(entry.callee_patterns.len());
807    for pat in &entry.callee_patterns {
808        let parsed = parse_callee_pattern(pat).ok_or_else(|| {
809            format!(
810                "matcher {:?} has an empty / whitespace callee_pattern {pat:?}",
811                entry.id
812            )
813        })?;
814        callee_patterns.push(parsed);
815    }
816    Ok((sink_shape, callee_patterns))
817}
818
819/// Validate the optional `enabler`: present but empty / whitespace is rejected;
820/// absent or non-empty passes through unchanged.
821fn validate_matcher_enabler(id: &str, enabler: Option<String>) -> Result<Option<String>, String> {
822    match enabler {
823        Some(e) if e.trim().is_empty() => Err(format!(
824            "matcher {id:?} has an empty / whitespace enabler; omit the key for a global row"
825        )),
826        other => Ok(other),
827    }
828}
829
830/// Parse the optional `arg_kinds` list: `None` admits any shape, an empty list
831/// is rejected, and each entry must resolve to a known `ArgKind`.
832fn parse_matcher_arg_kinds(
833    id: &str,
834    raw_kinds: Option<&[String]>,
835) -> Result<Option<Vec<SinkArgKind>>, String> {
836    let Some(raw_kinds) = raw_kinds else {
837        return Ok(None);
838    };
839    if raw_kinds.is_empty() {
840        return Err(format!(
841            "matcher {id:?} has an empty arg_kinds list; omit the key to admit any shape"
842        ));
843    }
844    let mut kinds = Vec::with_capacity(raw_kinds.len());
845    for raw in raw_kinds {
846        let kind = parse_arg_kind(raw).ok_or_else(|| {
847            format!(
848                "matcher {id:?} has unknown arg_kind {raw:?}; expected one of \
849                 template-with-subst | concat | object | call | literal | no-arg | other"
850            )
851        })?;
852        kinds.push(kind);
853    }
854    Ok(Some(kinds))
855}
856
857fn parse_source_catalogue(raw_sources: Vec<RawSource>) -> Result<Vec<SourceMatcher>, String> {
858    let mut sources = Vec::with_capacity(raw_sources.len());
859    for entry in raw_sources {
860        if entry.id.trim().is_empty() {
861            return Err("source id must be non-empty / non-whitespace".to_string());
862        }
863        if entry.path_patterns.is_empty() {
864            return Err(format!(
865                "source {:?} has no path_patterns; at least one is required",
866                entry.id
867            ));
868        }
869        let path_patterns = parse_source_path_patterns(&entry)?;
870        let receiver_allowlist = parse_source_receiver_allowlist(&entry)?;
871        let enabler = match entry.enabler {
872            Some(e) if e.trim().is_empty() => {
873                return Err(format!(
874                    "source {:?} has an empty / whitespace enabler; omit the key for a global row",
875                    entry.id
876                ));
877            }
878            other => other,
879        };
880        sources.push(SourceMatcher {
881            id: entry.id,
882            title: entry.title,
883            enabler,
884            path_patterns,
885            receiver_allowlist,
886        });
887    }
888    Ok(sources)
889}
890
891fn parse_source_path_patterns(entry: &RawSource) -> Result<Vec<CalleePattern>, String> {
892    let mut path_patterns = Vec::with_capacity(entry.path_patterns.len());
893    for pattern in &entry.path_patterns {
894        let parsed = parse_callee_pattern(pattern).ok_or_else(|| {
895            format!(
896                "source {:?} has an empty / whitespace path_pattern {pattern:?}",
897                entry.id
898            )
899        })?;
900        path_patterns.push(parsed);
901    }
902    Ok(path_patterns)
903}
904
905fn parse_source_receiver_allowlist(entry: &RawSource) -> Result<Vec<String>, String> {
906    let mut receiver_allowlist = Vec::with_capacity(entry.receiver_allowlist.len());
907    for receiver in &entry.receiver_allowlist {
908        if receiver.trim().is_empty() {
909            return Err(format!(
910                "source {:?} has an empty / whitespace receiver_allowlist entry; omit the key for an ungated row",
911                entry.id
912            ));
913        }
914        receiver_allowlist.push(receiver.to_ascii_lowercase());
915    }
916    Ok(receiver_allowlist)
917}
918
919/// Parse and cache the embedded catalogue once. Unwraps the parse `Result`; in
920/// a released binary this is unreachable because the bytes are compile-time
921/// embedded and gated by `security_catalogue_parses`.
922#[expect(
923    clippy::expect_used,
924    reason = "compile-time-embedded catalogue pinned by security_catalogue_parses"
925)]
926pub fn catalogue() -> &'static Catalogue {
927    static CATALOGUE: std::sync::OnceLock<Catalogue> = std::sync::OnceLock::new();
928    CATALOGUE.get_or_init(|| {
929        parse_catalogue(CATALOGUE_TOML).expect(
930            "embedded crates/security/data/security_matchers.toml must parse; run \
931             `cargo test -p fallow-security security_catalogue_parses` to see the error",
932        )
933    })
934}
935
936#[cfg(test)]
937#[allow(
938    clippy::expect_used,
939    clippy::unwrap_used,
940    reason = "catalogue parser tests assert fixture invariants directly"
941)]
942mod tests {
943    use super::*;
944    use rustc_hash::FxHashSet;
945
946    #[test]
947    fn security_catalogue_parses() {
948        let cat = catalogue();
949        assert!(!cat.matchers().is_empty(), "catalogue must have matchers");
950        assert!(
951            cat.matchers().iter().any(|m| m.id == "dangerous-html"),
952            "catalogue must contain the dangerous-html seed"
953        );
954    }
955
956    #[test]
957    fn catalogue_rows_are_unique() {
958        // Multiple rows legitimately share an `id` (dangerous-html spans three
959        // shapes), so uniqueness is keyed on the FULL row: id + sink_shape +
960        // callee_patterns + gates. No two identical matcher rows. Keyed off the
961        // raw source so the test does not require `SinkShape: Hash`.
962        let raw: RawCatalogue = toml::from_str(CATALOGUE_TOML).unwrap();
963        let mut seen = FxHashSet::default();
964        for m in &raw.matcher {
965            let pats = m.callee_patterns.join("|");
966            // Uniqueness includes the enabler: framework-scoped rows (#861) may
967            // legitimately share id + shape + patterns and differ only by their
968            // framework gate (e.g. one `route-send-file` row per framework).
969            let enabler = m.enabler.as_deref().unwrap_or("");
970            let import_provenance = m.import_provenance.as_deref().unwrap_or("");
971            let arg_kinds = m
972                .arg_kinds
973                .as_ref()
974                .map_or_else(String::new, |kinds| kinds.join("|"));
975            let literal_values = m
976                .literal_values
977                .as_ref()
978                .map_or_else(String::new, |values| values.join("|"));
979            let literal_contains = m
980                .literal_contains
981                .as_ref()
982                .map_or_else(String::new, |values| values.join("|"));
983            let literal_integers = m
984                .literal_integers
985                .as_ref()
986                .map_or_else(String::new, |values| {
987                    values
988                        .iter()
989                        .map(i64::to_string)
990                        .collect::<Vec<_>>()
991                        .join("|")
992                });
993            let object_properties = format!("{:?}", m.object_properties);
994            let object_missing_or_false = m
995                .object_missing_or_false
996                .as_ref()
997                .map_or_else(String::new, |keys| keys.join("|"));
998            let object_missing = m
999                .object_missing
1000                .as_ref()
1001                .map_or_else(String::new, |keys| keys.join("|"));
1002            let context_keywords = m
1003                .context_keywords
1004                .as_ref()
1005                .map_or_else(String::new, |keywords| keywords.join("|"));
1006            let key = format!(
1007                "{}::{}::{pats}::{enabler}::{import_provenance}::{}::{arg_kinds}::{literal_values}::{literal_contains}::{literal_integers}::{object_properties}::{object_missing_or_false}::{object_missing}::{context_keywords}",
1008                m.id, m.sink_shape, m.requires_source
1009            );
1010            assert!(seen.insert(key.clone()), "duplicate matcher row: {key}");
1011        }
1012    }
1013
1014    #[test]
1015    fn catalogue_ids_non_empty() {
1016        for m in catalogue().matchers() {
1017            assert!(
1018                !m.id.trim().is_empty(),
1019                "matcher id must be non-empty / non-whitespace"
1020            );
1021        }
1022    }
1023
1024    #[test]
1025    fn catalogue_cwe_valid() {
1026        for m in catalogue().matchers() {
1027            assert!(m.cwe > 0, "matcher {:?} has cwe 0", m.id);
1028        }
1029    }
1030
1031    #[test]
1032    fn catalogue_sink_shapes_known() {
1033        // Every parsed matcher already carries a typed SinkShape, so re-parse
1034        // the raw source to assert the kebab strings all resolve.
1035        let raw: RawCatalogue = toml::from_str(CATALOGUE_TOML).unwrap();
1036        for m in &raw.matcher {
1037            assert!(
1038                parse_sink_shape(&m.sink_shape).is_some(),
1039                "matcher {:?} has unknown sink_shape {:?}",
1040                m.id,
1041                m.sink_shape
1042            );
1043        }
1044    }
1045
1046    #[test]
1047    fn catalogue_callee_patterns_non_empty() {
1048        for m in catalogue().matchers() {
1049            assert!(
1050                !m.callee_patterns.is_empty(),
1051                "matcher {:?} has no callee_patterns",
1052                m.id
1053            );
1054            for p in &m.callee_patterns {
1055                assert!(
1056                    !p.raw().trim().is_empty(),
1057                    "matcher {:?} has an empty callee_pattern",
1058                    m.id
1059                );
1060            }
1061        }
1062    }
1063
1064    #[test]
1065    fn catalogue_evidence_templates_non_empty() {
1066        for m in catalogue().matchers() {
1067            assert!(
1068                !m.evidence_template.trim().is_empty(),
1069                "matcher {:?} has an empty evidence_template",
1070                m.id
1071            );
1072        }
1073    }
1074
1075    #[test]
1076    fn parse_rejects_empty_id() {
1077        let toml = r#"
1078[[matcher]]
1079id = ""
1080cwe = 79
1081title = "x"
1082effect = "unknown"
1083sink_shape = "member-assign"
1084callee_patterns = ["*.innerHTML"]
1085arg_index = 0
1086evidence_template = "x"
1087"#;
1088        let err = parse_catalogue(toml).unwrap_err();
1089        assert!(err.contains("id must be non-empty"), "got: {err}");
1090    }
1091
1092    #[test]
1093    fn parse_rejects_zero_cwe() {
1094        let toml = r#"
1095[[matcher]]
1096id = "x"
1097cwe = 0
1098title = "x"
1099effect = "unknown"
1100sink_shape = "member-assign"
1101callee_patterns = ["*.innerHTML"]
1102arg_index = 0
1103evidence_template = "x"
1104"#;
1105        let err = parse_catalogue(toml).unwrap_err();
1106        assert!(err.contains("cwe"), "got: {err}");
1107    }
1108
1109    #[test]
1110    fn parse_rejects_missing_effect() {
1111        let toml = r#"
1112[[matcher]]
1113id = "x"
1114cwe = 79
1115title = "x"
1116sink_shape = "member-assign"
1117callee_patterns = ["*.innerHTML"]
1118arg_index = 0
1119evidence_template = "x"
1120"#;
1121        let err = parse_catalogue(toml).unwrap_err();
1122        assert!(err.contains("missing field `effect`"), "got: {err}");
1123    }
1124
1125    #[test]
1126    fn parse_rejects_unknown_sink_shape() {
1127        let toml = r#"
1128[[matcher]]
1129id = "x"
1130cwe = 79
1131title = "x"
1132effect = "unknown"
1133sink_shape = "not-a-shape"
1134callee_patterns = ["*.innerHTML"]
1135arg_index = 0
1136evidence_template = "x"
1137"#;
1138        let err = parse_catalogue(toml).unwrap_err();
1139        assert!(err.contains("unknown sink_shape"), "got: {err}");
1140    }
1141
1142    #[test]
1143    fn parse_rejects_empty_callee_patterns() {
1144        let toml = r#"
1145[[matcher]]
1146id = "x"
1147cwe = 79
1148title = "x"
1149effect = "unknown"
1150sink_shape = "member-assign"
1151callee_patterns = []
1152arg_index = 0
1153evidence_template = "x"
1154"#;
1155        let err = parse_catalogue(toml).unwrap_err();
1156        assert!(err.contains("callee_patterns"), "got: {err}");
1157    }
1158
1159    #[test]
1160    fn parse_rejects_empty_pattern_string() {
1161        let toml = r#"
1162[[matcher]]
1163id = "x"
1164cwe = 79
1165title = "x"
1166effect = "unknown"
1167sink_shape = "member-assign"
1168callee_patterns = ["   "]
1169arg_index = 0
1170evidence_template = "x"
1171"#;
1172        let err = parse_catalogue(toml).unwrap_err();
1173        assert!(err.contains("empty"), "got: {err}");
1174    }
1175
1176    #[test]
1177    fn parse_rejects_empty_evidence_template() {
1178        let toml = r#"
1179[[matcher]]
1180id = "x"
1181cwe = 79
1182title = "x"
1183effect = "unknown"
1184sink_shape = "member-assign"
1185callee_patterns = ["*.innerHTML"]
1186arg_index = 0
1187evidence_template = "   "
1188"#;
1189        let err = parse_catalogue(toml).unwrap_err();
1190        assert!(err.contains("evidence_template"), "got: {err}");
1191    }
1192
1193    #[test]
1194    fn parse_rejects_no_matchers() {
1195        let err = parse_catalogue("").unwrap_err();
1196        assert!(err.contains("no [[matcher]]"), "got: {err}");
1197    }
1198
1199    #[test]
1200    fn segment_match_is_not_substring() {
1201        let bare = parse_callee_pattern("fetch").unwrap();
1202        assert!(bare.matches("fetch"));
1203        assert!(!bare.matches("myfetch"));
1204        assert!(!bare.matches("fetcher"));
1205
1206        let wildcard = parse_callee_pattern("*.innerHTML").unwrap();
1207        assert!(wildcard.matches("el.innerHTML"));
1208        assert!(wildcard.matches("this.node.innerHTML"));
1209        assert!(!wildcard.matches("el.innerHTMLFoo"));
1210        assert!(!wildcard.matches("innerHTML")); // wildcard requires an object
1211
1212        let dotted = parse_callee_pattern("child_process.exec").unwrap();
1213        assert!(dotted.matches("child_process.exec"));
1214        assert!(!dotted.matches("exec"));
1215        assert!(!dotted.matches("child_process.execSync"));
1216        assert!(!dotted.matches("my_child_process.exec"));
1217    }
1218
1219    #[test]
1220    fn wildcard_only_pattern_matches_nothing() {
1221        // Guard against a degenerate `*` pattern matching every callee.
1222        let star = parse_callee_pattern("*").unwrap();
1223        assert!(!star.matches("el.innerHTML"));
1224        assert!(!star.matches("anything"));
1225    }
1226
1227    #[test]
1228    fn trailing_wildcard_prefix_matches() {
1229        let trailing = parse_callee_pattern("child_process.*").unwrap();
1230        assert!(trailing.matches("child_process.exec"));
1231        assert!(trailing.matches("child_process.exec.call"));
1232        assert!(!trailing.matches("child_process")); // requires a member
1233        assert!(!trailing.matches("my_child_process.exec"));
1234        assert!(!trailing.matches("exec"));
1235
1236        let console = parse_callee_pattern("console.*").unwrap();
1237        assert!(console.matches("console.log"));
1238        assert!(!console.matches("myconsole.log"));
1239    }
1240
1241    #[test]
1242    fn double_wildcard_pattern_matches_nothing() {
1243        // `*.x.*` and `*.*` are rejected by config validation; the matcher
1244        // guards against them anyway.
1245        let both = parse_callee_pattern("*.query.*").unwrap();
1246        assert!(!both.matches("db.query.run"));
1247        let stars = parse_callee_pattern("*.*").unwrap();
1248        assert!(!stars.matches("a.b"));
1249    }
1250
1251    #[test]
1252    fn arg_kinds_unset_admits_any_shape() {
1253        // A matcher with no arg_kinds (e.g. dangerous-html) admits every shape.
1254        let html = catalogue()
1255            .matchers()
1256            .iter()
1257            .find(|m| m.id == "dangerous-html")
1258            .expect("dangerous-html present");
1259        for kind in [
1260            SinkArgKind::TemplateWithSubst,
1261            SinkArgKind::Concat,
1262            SinkArgKind::Object,
1263            SinkArgKind::Call,
1264            SinkArgKind::Literal,
1265            SinkArgKind::NoArg,
1266            SinkArgKind::Other,
1267        ] {
1268            assert!(html.admits_arg_kind(kind), "html admits {kind:?}");
1269        }
1270    }
1271
1272    #[test]
1273    fn sql_injection_query_execute_excludes_object_arg_kind() {
1274        // The `.query` / `.execute` matchers must require unsafe shapes (concat /
1275        // interpolated template) and reject the parameterized object-literal form
1276        // (`.execute({ sql, args })`). The separate `sql.raw` escape-hatch row is
1277        // intentionally shape-agnostic and is excluded from this check.
1278        let query_matchers: Vec<&Matcher> = catalogue()
1279            .matchers()
1280            .iter()
1281            .filter(|m| {
1282                m.id == "sql-injection"
1283                    && m.callee_patterns
1284                        .iter()
1285                        .any(|p| p.raw() == "*.query" || p.raw() == "*.execute")
1286            })
1287            .collect();
1288        assert!(
1289            !query_matchers.is_empty(),
1290            "sql-injection .query/.execute rows present"
1291        );
1292        for m in query_matchers {
1293            let kinds = m
1294                .arg_kinds
1295                .as_ref()
1296                .unwrap_or_else(|| panic!("sql-injection query/execute must constrain arg_kinds"));
1297            assert!(
1298                !kinds.contains(&SinkArgKind::Object),
1299                "sql-injection .query/.execute must not admit the object (parameterized) form"
1300            );
1301            assert!(
1302                !m.admits_arg_kind(SinkArgKind::Object),
1303                "admits_arg_kind agrees: object excluded"
1304            );
1305            assert!(
1306                m.admits_arg_kind(SinkArgKind::Concat),
1307                "sql-injection .query/.execute admits the concat (unsafe) form"
1308            );
1309        }
1310    }
1311
1312    #[test]
1313    fn source_required_matchers_are_explicit() {
1314        let mass_assignment = catalogue()
1315            .matchers()
1316            .iter()
1317            .find(|m| m.id == "mass-assignment")
1318            .expect("mass-assignment row present");
1319        assert!(
1320            mass_assignment.requires_source,
1321            "mass-assignment should only fire for source-backed arguments"
1322        );
1323    }
1324
1325    #[test]
1326    fn literal_integer_predicate_matches_integer_literals() {
1327        let chmod = catalogue()
1328            .matchers()
1329            .iter()
1330            .find(|m| m.id == "world-writable-permission" && m.sink_shape == SinkShape::MemberCall)
1331            .expect("world-writable permission row present");
1332
1333        assert!(chmod.literal_value_satisfied(Some(&SinkLiteralValue::Integer(511))));
1334        assert!(!chmod.literal_value_satisfied(Some(&SinkLiteralValue::Integer(420))));
1335        assert!(
1336            !chmod.literal_value_satisfied(Some(&SinkLiteralValue::String("0o777".to_string())))
1337        );
1338    }
1339
1340    #[test]
1341    fn object_property_predicate_matches_nested_integer_values() {
1342        let toml = r#"
1343[[matcher]]
1344id = "x"
1345cwe = 732
1346title = "x"
1347effect = "unknown"
1348sink_shape = "member-call"
1349callee_patterns = ["fs.chmod"]
1350arg_index = 0
1351arg_kinds = ["object"]
1352object_properties = [{ key = "mode.value", integer = 511 }]
1353evidence_template = "x"
1354"#;
1355        let cat = parse_catalogue(toml).expect("catalogue parses");
1356        let matcher = cat.matchers().first().expect("matcher present");
1357        let properties = vec![SinkObjectProperty {
1358            key: "mode.value".to_string(),
1359            value: SinkLiteralValue::Integer(511),
1360        }];
1361
1362        assert!(matcher.object_properties_satisfied(&properties));
1363    }
1364
1365    #[test]
1366    fn object_missing_requires_complete_key_metadata() {
1367        let jwt_verify = catalogue()
1368            .matchers()
1369            .iter()
1370            .find(|m| m.id == "jwt-verify-missing-algorithms")
1371            .expect("jwt verify missing algorithms row present");
1372
1373        assert!(
1374            jwt_verify.is_literal_aware(),
1375            "object_missing rows opt into literal-aware matching"
1376        );
1377        assert!(jwt_verify.object_missing_satisfied(&[], true));
1378        assert!(jwt_verify.object_missing_satisfied(&["audience".to_string()], true));
1379        assert!(!jwt_verify.object_missing_satisfied(&["algorithms".to_string()], true));
1380        assert!(!jwt_verify.object_missing_satisfied(&["audience".to_string()], false));
1381    }
1382
1383    #[test]
1384    fn parse_rejects_unknown_arg_kind() {
1385        let toml = r#"
1386[[matcher]]
1387id = "x"
1388cwe = 89
1389title = "x"
1390effect = "unknown"
1391sink_shape = "member-call"
1392callee_patterns = ["*.query"]
1393arg_index = 0
1394arg_kinds = ["not-a-kind"]
1395evidence_template = "x"
1396"#;
1397        let err = parse_catalogue(toml).unwrap_err();
1398        assert!(err.contains("unknown arg_kind"), "got: {err}");
1399    }
1400
1401    #[test]
1402    fn enabler_unset_is_global() {
1403        // A matcher with no enabler is satisfied by ANY (even empty) dep set.
1404        let html = catalogue()
1405            .matchers()
1406            .iter()
1407            .find(|m| m.id == "dangerous-html")
1408            .expect("dangerous-html present");
1409        assert!(html.enabler.is_none(), "dangerous-html is a global row");
1410        assert!(html.enabler_satisfied(&FxHashSet::default()));
1411    }
1412
1413    #[test]
1414    fn enabler_satisfied_exact_and_prefix() {
1415        let mut m = catalogue()
1416            .matchers()
1417            .iter()
1418            .find(|m| m.id == "dangerous-html")
1419            .cloned()
1420            .expect("dangerous-html present");
1421
1422        // Exact match.
1423        m.enabler = Some("jquery".to_string());
1424        let mut deps = FxHashSet::default();
1425        assert!(!m.enabler_satisfied(&deps), "absent dep is not satisfied");
1426        deps.insert("jquery".to_string());
1427        assert!(m.enabler_satisfied(&deps), "present exact dep satisfies");
1428
1429        // Trailing-slash prefix match, plus the bare scope name.
1430        m.enabler = Some("@angular/".to_string());
1431        let mut scoped = FxHashSet::default();
1432        assert!(!m.enabler_satisfied(&scoped));
1433        scoped.insert("@angular/platform-browser".to_string());
1434        assert!(m.enabler_satisfied(&scoped), "prefix dep satisfies");
1435        let mut bare_scope = FxHashSet::default();
1436        bare_scope.insert("@angular".to_string());
1437        assert!(
1438            m.enabler_satisfied(&bare_scope),
1439            "bare scope name satisfies the prefix form"
1440        );
1441
1442        // A near-miss exact name does not satisfy a prefix-less enabler.
1443        m.enabler = Some("react".to_string());
1444        let mut reactish = FxHashSet::default();
1445        reactish.insert("react-dom".to_string());
1446        assert!(
1447            !m.enabler_satisfied(&reactish),
1448            "exact enabler must not prefix-match"
1449        );
1450    }
1451
1452    #[test]
1453    fn framework_scoped_rows_are_present() {
1454        // The framework-scoped rows added in #861 carry an enabler.
1455        let cat = catalogue();
1456        let angular = cat
1457            .matchers()
1458            .iter()
1459            .find(|m| m.id == "angular-trusted-html")
1460            .expect("angular-trusted-html present");
1461        assert_eq!(
1462            angular.enabler.as_deref(),
1463            Some("@angular/platform-browser")
1464        );
1465        assert!(
1466            cat.matchers().iter().any(|m| m.id == "jquery-html"),
1467            "jquery-html present"
1468        );
1469        assert!(
1470            cat.matchers().iter().any(|m| m.id == "dom-document-write"),
1471            "dom-document-write present"
1472        );
1473    }
1474
1475    #[test]
1476    fn parse_rejects_empty_enabler() {
1477        let toml = r#"
1478[[matcher]]
1479id = "x"
1480cwe = 79
1481title = "x"
1482effect = "unknown"
1483sink_shape = "member-call"
1484callee_patterns = ["*.html"]
1485arg_index = 0
1486enabler = "   "
1487evidence_template = "x"
1488"#;
1489        let err = parse_catalogue(toml).unwrap_err();
1490        assert!(err.contains("empty / whitespace enabler"), "got: {err}");
1491    }
1492
1493    #[test]
1494    fn catalogue_has_untrusted_sources() {
1495        // Issue #859: the embedded catalogue ships at least one [[source]] row,
1496        // each with a non-empty id, title, and path_patterns.
1497        let cat = catalogue();
1498        assert!(
1499            !cat.sources().is_empty(),
1500            "catalogue must ship untrusted-source rows"
1501        );
1502        for s in cat.sources() {
1503            assert!(!s.id.trim().is_empty(), "source id non-empty");
1504            assert!(!s.title.trim().is_empty(), "source title non-empty");
1505            assert!(!s.path_patterns.is_empty(), "source has path patterns");
1506        }
1507    }
1508
1509    #[test]
1510    fn source_paths_match_expected_request_inputs() {
1511        let cat = catalogue();
1512        // Wildcard object prefix matches common framework request accessors.
1513        assert!(cat.is_source_path("req.query"));
1514        assert!(cat.is_source_path("ctx.req.query"));
1515        assert!(cat.is_source_path("request.body"));
1516        assert!(cat.is_source_path("req.params"));
1517        assert!(cat.is_source_path("process.argv"));
1518        assert!(cat.is_source_path("event.data"));
1519        assert!(cat.is_source_path("request.rawBody"));
1520        assert!(cat.is_source_path("document.referrer"));
1521        assert!(cat.is_source_path("window.name"));
1522        assert!(cat.is_source_path("document.cookie"));
1523        // A plain object path that is not an untrusted source does not match.
1524        assert!(!cat.is_source_path("config.value"));
1525        assert!(!cat.is_source_path("user.name"));
1526        assert!(!cat.is_source_path("profile.name"));
1527        assert!(!cat.is_source_path("jar.cookie"));
1528    }
1529
1530    #[test]
1531    fn source_matcher_matches_helper() {
1532        let cat = catalogue();
1533        let http = cat
1534            .sources()
1535            .iter()
1536            .find(|s| s.id == "http-request-input")
1537            .expect("http-request-input source present");
1538        assert!(http.matches("req.query"));
1539        assert!(!http.matches("process.argv"));
1540    }
1541
1542    #[test]
1543    fn matched_receiver_returns_segment_before_suffix() {
1544        // Leading-wildcard `*.query`: the receiver is the segment right before
1545        // the matched `query`, regardless of how many object segments precede.
1546        let pat = parse_callee_pattern("*.query").expect("pattern parses");
1547        assert_eq!(pat.matched_receiver("db.query"), Some("db"));
1548        assert_eq!(pat.matched_receiver("req.query"), Some("req"));
1549        // Hono `c.req.query` flattens so the receiver of `.query` is `req`.
1550        assert_eq!(pat.matched_receiver("ctx.req.query"), Some("req"));
1551        // A non-matching path has no receiver.
1552        assert_eq!(pat.matched_receiver("req.body"), None);
1553        // An exact (non-wildcard) pattern's receiver is fixed in the pattern, so
1554        // `matched_receiver` returns None even on a match.
1555        let exact = parse_callee_pattern("process.env").expect("pattern parses");
1556        assert_eq!(exact.matched_receiver("process.env"), None);
1557    }
1558
1559    #[test]
1560    fn receiver_allowlist_rejects_orm_query_builders_keeps_request_objects() {
1561        // Issue #1092: the global HTTP-input row is receiver-gated. ORM /
1562        // data-access receivers no longer classify their module as a source...
1563        let cat = catalogue();
1564        assert!(!cat.is_source_path("db.query"), "Drizzle db.query");
1565        assert!(!cat.is_source_path("prisma.query"), "Prisma prisma.query");
1566        assert!(!cat.is_source_path("drizzle.query"));
1567        assert!(!cat.is_source_path("knex.body"));
1568        assert!(!cat.is_source_path("client.query"));
1569        // ...nor do non-request receivers that merely happen to have a `.query`
1570        // member (a sibling-collision check: `dbConn` is not `db`).
1571        assert!(!cat.is_source_path("dbConn.query"));
1572        assert!(!cat.is_source_path("database.params"));
1573        // A genuine request receiver still classifies as a source.
1574        assert!(cat.is_source_path("req.query"), "Express req.query");
1575        assert!(cat.is_source_path("request.body"));
1576        assert!(cat.is_source_path("ctx.params"), "Koa/Elysia ctx.params");
1577        assert!(cat.is_source_path("context.body"));
1578        assert!(cat.is_source_path("event.query"), "SvelteKit event.query");
1579        // Hono `c.req.query`: the matched receiver is `req`, which is allowed.
1580        assert!(cat.is_source_path("ctx.req.query"));
1581        // The allowlist is case-insensitive.
1582        assert!(cat.is_source_path("Req.query"));
1583    }
1584
1585    #[test]
1586    fn configured_request_receivers_extend_http_request_source_allowlist() {
1587        let cat = catalogue();
1588        let deps = FxHashSet::default();
1589        let receivers = FxHashSet::from_iter(["h".to_string(), "httpreq".to_string()]);
1590
1591        assert!(
1592            cat.matching_source_for_deps_with_receivers("h.query", &deps, &receivers)
1593                .is_some()
1594        );
1595        assert!(
1596            cat.matching_source_for_deps_with_receivers("HttpReq.body", &deps, &receivers)
1597                .is_some()
1598        );
1599        assert!(
1600            cat.matching_source_for_deps_with_receivers("req.params", &deps, &receivers)
1601                .is_some()
1602        );
1603        assert!(
1604            cat.matching_source_for_deps_with_receivers("db.query", &deps, &receivers)
1605                .is_none()
1606        );
1607    }
1608
1609    #[test]
1610    fn search_params_source_stays_ungated() {
1611        // Issue #1092: `*.searchParams` is intentionally NOT receiver-gated, so a
1612        // `new URL(...).searchParams` binding on an arbitrary local still counts.
1613        let cat = catalogue();
1614        assert!(cat.is_source_path("u.searchParams"));
1615        assert!(cat.is_source_path("url.searchParams"));
1616        assert!(cat.is_source_path("params.searchParams"));
1617    }
1618
1619    #[test]
1620    fn parse_rejects_empty_receiver_allowlist_entry() {
1621        let toml = r#"
1622[[matcher]]
1623id = "x"
1624cwe = 79
1625title = "x"
1626effect = "unknown"
1627sink_shape = "member-assign"
1628callee_patterns = ["*.innerHTML"]
1629arg_index = 0
1630evidence_template = "x"
1631
1632[[source]]
1633id = "http"
1634title = "HTTP"
1635path_patterns = ["*.query"]
1636receiver_allowlist = ["req", "  "]
1637"#;
1638        let err = parse_catalogue(toml).unwrap_err();
1639        assert!(err.contains("receiver_allowlist"), "got: {err}");
1640    }
1641
1642    #[test]
1643    fn source_enabler_gates_framework_param_sources() {
1644        let cat = catalogue();
1645        let source = cat
1646            .sources()
1647            .iter()
1648            .find(|s| s.id == "framework-handler-input" && s.enabler.as_deref() == Some("express"))
1649            .expect("express handler source present");
1650        assert!(source.matches("framework.request"));
1651
1652        let empty = FxHashSet::default();
1653        assert!(!source.enabler_satisfied(&empty));
1654        assert!(
1655            cat.matching_source_for_deps("framework.request", &empty)
1656                .is_none(),
1657            "framework handler params require an enabler"
1658        );
1659
1660        let mut deps = FxHashSet::default();
1661        deps.insert("express".to_string());
1662        assert!(source.enabler_satisfied(&deps));
1663        assert_eq!(
1664            cat.matching_source_for_deps("framework.request", &deps),
1665            Some(("framework-handler-input", "Framework handler input"))
1666        );
1667    }
1668
1669    #[test]
1670    fn source_enabler_gates_graphql_and_trpc_param_sources() {
1671        let cat = catalogue();
1672        let empty = FxHashSet::default();
1673        assert!(
1674            cat.matching_source_for_deps("graphql.args", &empty)
1675                .is_none(),
1676            "GraphQL resolver args require a matching package"
1677        );
1678        assert!(
1679            cat.matching_source_for_deps("trpc.input", &empty).is_none(),
1680            "tRPC procedure input requires a matching package"
1681        );
1682
1683        let mut graphql_deps = FxHashSet::default();
1684        graphql_deps.insert("@apollo/server".to_string());
1685        assert_eq!(
1686            cat.matching_source_for_deps("graphql.args", &graphql_deps),
1687            Some(("graphql-resolver-args", "GraphQL resolver args"))
1688        );
1689
1690        let mut trpc_deps = FxHashSet::default();
1691        trpc_deps.insert("@trpc/server".to_string());
1692        assert_eq!(
1693            cat.matching_source_for_deps("trpc.input", &trpc_deps),
1694            Some(("trpc-procedure-input", "tRPC procedure input"))
1695        );
1696    }
1697
1698    #[test]
1699    fn parse_rejects_source_without_patterns() {
1700        let toml = r#"
1701[[matcher]]
1702id = "x"
1703cwe = 79
1704title = "x"
1705effect = "unknown"
1706sink_shape = "member-assign"
1707callee_patterns = ["*.innerHTML"]
1708arg_index = 0
1709evidence_template = "x"
1710
1711[[source]]
1712id = "bad"
1713title = "bad"
1714path_patterns = []
1715"#;
1716        let err = parse_catalogue(toml).unwrap_err();
1717        assert!(err.contains("path_patterns"), "got: {err}");
1718    }
1719
1720    #[test]
1721    fn parse_rejects_empty_arg_kinds() {
1722        let toml = r#"
1723[[matcher]]
1724id = "x"
1725cwe = 89
1726title = "x"
1727effect = "unknown"
1728sink_shape = "member-call"
1729callee_patterns = ["*.query"]
1730arg_index = 0
1731arg_kinds = []
1732evidence_template = "x"
1733"#;
1734        let err = parse_catalogue(toml).unwrap_err();
1735        assert!(err.contains("empty arg_kinds"), "got: {err}");
1736    }
1737}