Skip to main content

skill_veil_core/adapters/
pattern_helpers.rs

1//! Composition seam between the [`PatternMatcher`] port and the default
2//! [`RegexPatternMatcher`] adapter.
3//!
4//! The hexagonal contract sealed in [`crate::ports`] requires the domain
5//! to depend only on [`PatternMatcher`]; concrete regex usage is confined
6//! to [`RegexPatternMatcher`]. Heuristics that rely on literal patterns
7//! (e.g. instruction-bait detection in [`crate::analyzer::assessment`])
8//! still need a one-shot compilation against the default adapter so they
9//! can sit in `LazyLock` statics without taking on `regex::Regex`
10//! themselves.
11//!
12//! This module lives under [`crate::adapters`] precisely because it is
13//! the *only* place inside the library that legitimately names the
14//! concrete default matcher: it is the composition seam, not domain
15//! code. Domain modules consume the [`lazy_pattern!`] macro abstraction
16//! and never import [`RegexPatternMatcher`] directly.
17//!
18//! Three forms are exported for domain code:
19//!
20//! * [`lazy_pattern!`] — module-scoped `LazyLock<CompiledPattern>` for a
21//!   single hardcoded literal. The default and most common shape.
22//! * [`compile_patterns`] — bulk compile a slice of hardcoded literals
23//!   into a `Vec<CompiledPattern>`. For modules that group several
24//!   compiled patterns under a single static (e.g. injection-pattern
25//!   tables keyed by rule id). Panics on a malformed literal, matching
26//!   the contract of [`lazy_pattern!`].
27//! * [`try_compile`] — single-shot compile that returns a `Result`.
28//!   For boundary code that validates patterns supplied by users
29//!   (e.g. YAML rule packs) where compilation failure must propagate
30//!   instead of panicking.
31//!
32//! [`default_matcher`] is the implementation seam these helpers wrap;
33//! it stays `pub` because the [`lazy_pattern!`] macro expansion names
34//! it directly. Domain modules MUST go through one of the three forms
35//! above.
36
37use crate::adapters::RegexPatternMatcher;
38use crate::ports::{CompiledPattern, PatternError, PatternMatcher};
39use std::sync::OnceLock;
40
41/// Shared adapter used by [`lazy_pattern!`] for hardcoded domain
42/// patterns. Tests that need a different matcher inject one through
43/// `Scanner::with_custom_adapters` rather than swapping this default.
44#[must_use]
45pub fn default_matcher() -> &'static (dyn PatternMatcher + 'static) {
46    static MATCHER: OnceLock<RegexPatternMatcher> = OnceLock::new();
47    MATCHER.get_or_init(RegexPatternMatcher::new)
48}
49
50/// Declare a `LazyLock<CompiledPattern>` over a hardcoded pattern.
51///
52/// The pattern is compiled lazily through [`default_matcher`]. Compile
53/// failures panic at first use because hardcoded patterns are part of
54/// the binary contract — a malformed literal is a build-time bug, not
55/// runtime data. Tests cover the patterns directly so the panic only
56/// fires when a developer hand-edits an invalid literal.
57///
58/// The macro expansion uses `unwrap_or_else(|err| panic!(...))` rather
59/// than `.expect(...)` so the surfaced diagnostic carries both the
60/// static name and the underlying [`PatternError`]. Matches the idiom
61/// used by [`compile_patterns`] below — keep both call sites aligned
62/// when editing one.
63///
64/// # Examples
65/// ```ignore
66/// lazy_pattern!(MY_RE, r"(?i)\bfoo\b");
67/// // ...
68/// if MY_RE.is_match(text) { /* ... */ }
69/// ```
70/// Compile a slice of hardcoded patterns through the default matcher.
71///
72/// Mirrors the contract of [`lazy_pattern!`]: every pattern is a binary
73/// literal, so a compilation failure is a build-time bug and panics
74/// with a diagnostic naming the offending pattern. Use this when a
75/// module groups several compiled patterns under one static (e.g. an
76/// injection-pattern table keyed by rule id) and the per-pattern
77/// `LazyLock` ergonomics of [`lazy_pattern!`] would force one static
78/// per id.
79#[must_use]
80pub(crate) fn compile_patterns(patterns: &[&str]) -> Vec<CompiledPattern> {
81    let matcher = default_matcher();
82    patterns
83        .iter()
84        .map(|pattern| {
85            matcher
86                .compile(pattern)
87                .unwrap_or_else(|err| panic!("hardcoded pattern must compile: {pattern}: {err}"))
88        })
89        .collect()
90}
91
92/// Compile a single pattern that may have been supplied by user input.
93///
94/// Returns the same `Result` shape as [`PatternMatcher::compile`], so
95/// boundary code (rule loaders, validators) propagates the error
96/// rather than panicking. Domain modules use this instead of
97/// [`default_matcher`] so the only place that names the singleton
98/// adapter directly is this composition seam.
99///
100/// # Errors
101/// Returns the matcher's [`PatternError`] when the pattern is invalid.
102pub(crate) fn try_compile(pattern: &str) -> Result<CompiledPattern, PatternError> {
103    default_matcher().compile(pattern)
104}
105
106#[macro_export]
107macro_rules! lazy_pattern {
108    ($name:ident, $pattern:expr $(,)?) => {
109        $crate::lazy_pattern!(@build (), $name, $pattern);
110    };
111    ($vis:vis $name:ident, $pattern:expr $(,)?) => {
112        $crate::lazy_pattern!(@build ($vis), $name, $pattern);
113    };
114    (@build ($($vis:tt)*), $name:ident, $pattern:expr) => {
115        $($vis)* static $name: std::sync::LazyLock<$crate::ports::CompiledPattern> =
116            std::sync::LazyLock::new(|| {
117                // Use `unwrap_or_else(|err| panic!(...))` instead of
118                // `.expect(...)` so the diagnostic surfaces both the
119                // static name AND the underlying PatternError. Matches
120                // the `compile_patterns` idiom and keeps the codebase
121                // free of `.expect()` in library code.
122                $crate::adapters::pattern_helpers::default_matcher()
123                    .compile($pattern)
124                    .unwrap_or_else(|err| panic!(
125                        "hardcoded pattern must compile: {}: {}",
126                        stringify!($name),
127                        err,
128                    ))
129            });
130    };
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    /// # Contract
138    /// `default_matcher` returns a stable `'static` reference; repeated
139    /// calls reuse the same instance so `LazyLock<CompiledPattern>`
140    /// statics share one regex compilation across the process.
141    #[test]
142    fn default_matcher_returns_stable_singleton() {
143        let a: *const dyn PatternMatcher = default_matcher();
144        let b: *const dyn PatternMatcher = default_matcher();
145        assert!(std::ptr::addr_eq(a, b));
146    }
147
148    lazy_pattern!(LAZY_DIGITS, r"\d+");
149
150    /// # Contract
151    /// `lazy_pattern!` produces a `LazyLock<CompiledPattern>` that
152    /// drives `find_matches`, `is_match`, and `captures_iter` in lockstep.
153    #[test]
154    fn lazy_pattern_macro_drives_all_three_operations() {
155        assert!(LAZY_DIGITS.is_match("abc 42"));
156        assert!(!LAZY_DIGITS.is_match("no digits here"));
157        assert_eq!(LAZY_DIGITS.find_matches("a 1 b 2 c").len(), 2);
158        assert_eq!(LAZY_DIGITS.captures_iter("a 1 b 2 c").len(), 2);
159    }
160
161    /// # Contract
162    /// `compile_patterns` MUST compile every input literal in the order
163    /// it was passed. Callers (e.g. injection-pattern tables keyed by
164    /// rule id) rely on positional alignment between the input slice
165    /// and the returned `Vec<CompiledPattern>`; reordering would silently
166    /// associate the wrong rule id with the wrong pattern.
167    #[test]
168    fn compile_patterns_compiles_every_input_in_order() {
169        let inputs = [r"\bfoo\b", r"\d+", r"(?i)bar"];
170        let compiled = compile_patterns(&inputs);
171        assert_eq!(compiled.len(), inputs.len());
172        assert!(compiled[0].is_match("say foo here"));
173        assert!(!compiled[0].is_match("foobar only"));
174        assert!(compiled[1].is_match("answer 42"));
175        assert!(compiled[2].is_match("BAR"));
176    }
177
178    /// # Contract (negative)
179    /// `compile_patterns` MUST panic with the documented diagnostic
180    /// when a hardcoded literal fails to compile. The literal is part of
181    /// the binary contract — surfacing a `Result` here would only let
182    /// callers re-panic on the same invariant.
183    #[test]
184    #[should_panic(expected = "hardcoded pattern must compile")]
185    fn compile_patterns_panics_on_invalid_literal() {
186        let inputs = [r"[unterminated"];
187        let _ = compile_patterns(&inputs);
188    }
189
190    /// # Contract
191    /// `try_compile` MUST return a usable `CompiledPattern` for any
192    /// pattern accepted by the underlying matcher; this is the seam
193    /// rule-pack loaders use to validate user-supplied patterns without
194    /// pulling `RegexPatternMatcher` into domain code.
195    #[test]
196    fn try_compile_returns_compiled_pattern_for_valid_input() {
197        let compiled = try_compile(r"^hello\s+world$").expect("valid pattern must compile");
198        assert!(compiled.is_match("hello world"));
199        assert!(!compiled.is_match("hello  there"));
200    }
201
202    /// # Contract (negative)
203    /// `try_compile` MUST surface the matcher's `PatternError` instead
204    /// of panicking, because YAML rule packs and other boundary inputs
205    /// can legitimately contain malformed regex authored by users.
206    /// Propagating the error lets the loader reject the pack with a
207    /// human-readable diagnostic.
208    #[test]
209    fn try_compile_returns_pattern_error_for_invalid_input() {
210        let result = try_compile(r"[unterminated");
211        assert!(
212            result.is_err(),
213            "malformed pattern must surface as Result::Err, not panic"
214        );
215    }
216}