skill_veil_core/adapters/pattern_helpers.rs
1//! Composition seam between the [`PatternMatcher`] port and the default
2//! [`RegexPatternMatcher`] adapter.
3//!
4//! The hexagonal contract sealed in [`crate::ports`] requires the domain
5//! to depend only on [`PatternMatcher`]; concrete regex usage is confined
6//! to [`RegexPatternMatcher`]. Heuristics that rely on literal patterns
7//! (e.g. instruction-bait detection in [`crate::analyzer::assessment`])
8//! still need a one-shot compilation against the default adapter so they
9//! can sit in `LazyLock` statics without taking on `regex::Regex`
10//! themselves.
11//!
12//! This module lives under [`crate::adapters`] precisely because it is
13//! the *only* place inside the library that legitimately names the
14//! concrete default matcher: it is the composition seam, not domain
15//! code. Domain modules consume the [`lazy_pattern!`] macro abstraction
16//! and never import [`RegexPatternMatcher`] directly.
17//!
18//! Three forms are exported for domain code:
19//!
20//! * [`lazy_pattern!`] — module-scoped `LazyLock<CompiledPattern>` for a
21//! single hardcoded literal. The default and most common shape.
22//! * [`compile_patterns`] — bulk compile a slice of hardcoded literals
23//! into a `Vec<CompiledPattern>`. For modules that group several
24//! compiled patterns under a single static (e.g. injection-pattern
25//! tables keyed by rule id). Panics on a malformed literal, matching
26//! the contract of [`lazy_pattern!`].
27//! * [`try_compile`] — single-shot compile that returns a `Result`.
28//! For boundary code that validates patterns supplied by users
29//! (e.g. YAML rule packs) where compilation failure must propagate
30//! instead of panicking.
31//!
32//! [`default_matcher`] is the implementation seam these helpers wrap;
33//! it stays `pub` because the [`lazy_pattern!`] macro expansion names
34//! it directly. Domain modules MUST go through one of the three forms
35//! above.
36
37use crate::adapters::RegexPatternMatcher;
38use crate::ports::{CompiledPattern, PatternError, PatternMatcher};
39use std::sync::OnceLock;
40
41/// Shared adapter used by [`lazy_pattern!`] for hardcoded domain
42/// patterns. Tests that need a different matcher inject one through
43/// `Scanner::with_custom_adapters` rather than swapping this default.
44#[must_use]
45pub fn default_matcher() -> &'static (dyn PatternMatcher + 'static) {
46 static MATCHER: OnceLock<RegexPatternMatcher> = OnceLock::new();
47 MATCHER.get_or_init(RegexPatternMatcher::new)
48}
49
50/// Declare a `LazyLock<CompiledPattern>` over a hardcoded pattern.
51///
52/// The pattern is compiled lazily through [`default_matcher`]. Compile
53/// failures panic at first use because hardcoded patterns are part of
54/// the binary contract — a malformed literal is a build-time bug, not
55/// runtime data. Tests cover the patterns directly so the panic only
56/// fires when a developer hand-edits an invalid literal.
57///
58/// The macro expansion uses `unwrap_or_else(|err| panic!(...))` rather
59/// than `.expect(...)` so the surfaced diagnostic carries both the
60/// static name and the underlying [`PatternError`]. Matches the idiom
61/// used by [`compile_patterns`] below — keep both call sites aligned
62/// when editing one.
63///
64/// # Examples
65/// ```ignore
66/// lazy_pattern!(MY_RE, r"(?i)\bfoo\b");
67/// // ...
68/// if MY_RE.is_match(text) { /* ... */ }
69/// ```
70/// Compile a slice of hardcoded patterns through the default matcher.
71///
72/// Mirrors the contract of [`lazy_pattern!`]: every pattern is a binary
73/// literal, so a compilation failure is a build-time bug and panics
74/// with a diagnostic naming the offending pattern. Use this when a
75/// module groups several compiled patterns under one static (e.g. an
76/// injection-pattern table keyed by rule id) and the per-pattern
77/// `LazyLock` ergonomics of [`lazy_pattern!`] would force one static
78/// per id.
79#[must_use]
80pub(crate) fn compile_patterns(patterns: &[&str]) -> Vec<CompiledPattern> {
81 let matcher = default_matcher();
82 patterns
83 .iter()
84 .map(|pattern| {
85 matcher
86 .compile(pattern)
87 .unwrap_or_else(|err| panic!("hardcoded pattern must compile: {pattern}: {err}"))
88 })
89 .collect()
90}
91
92/// Compile a single pattern that may have been supplied by user input.
93///
94/// Returns the same `Result` shape as [`PatternMatcher::compile`], so
95/// boundary code (rule loaders, validators) propagates the error
96/// rather than panicking. Domain modules use this instead of
97/// [`default_matcher`] so the only place that names the singleton
98/// adapter directly is this composition seam.
99///
100/// # Errors
101/// Returns the matcher's [`PatternError`] when the pattern is invalid.
102pub(crate) fn try_compile(pattern: &str) -> Result<CompiledPattern, PatternError> {
103 default_matcher().compile(pattern)
104}
105
106#[macro_export]
107macro_rules! lazy_pattern {
108 ($name:ident, $pattern:expr $(,)?) => {
109 $crate::lazy_pattern!(@build (), $name, $pattern);
110 };
111 ($vis:vis $name:ident, $pattern:expr $(,)?) => {
112 $crate::lazy_pattern!(@build ($vis), $name, $pattern);
113 };
114 (@build ($($vis:tt)*), $name:ident, $pattern:expr) => {
115 $($vis)* static $name: std::sync::LazyLock<$crate::ports::CompiledPattern> =
116 std::sync::LazyLock::new(|| {
117 // Use `unwrap_or_else(|err| panic!(...))` instead of
118 // `.expect(...)` so the diagnostic surfaces both the
119 // static name AND the underlying PatternError. Matches
120 // the `compile_patterns` idiom and keeps the codebase
121 // free of `.expect()` in library code.
122 $crate::adapters::pattern_helpers::default_matcher()
123 .compile($pattern)
124 .unwrap_or_else(|err| panic!(
125 "hardcoded pattern must compile: {}: {}",
126 stringify!($name),
127 err,
128 ))
129 });
130 };
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 /// # Contract
138 /// `default_matcher` returns a stable `'static` reference; repeated
139 /// calls reuse the same instance so `LazyLock<CompiledPattern>`
140 /// statics share one regex compilation across the process.
141 #[test]
142 fn default_matcher_returns_stable_singleton() {
143 let a: *const dyn PatternMatcher = default_matcher();
144 let b: *const dyn PatternMatcher = default_matcher();
145 assert!(std::ptr::addr_eq(a, b));
146 }
147
148 lazy_pattern!(LAZY_DIGITS, r"\d+");
149
150 /// # Contract
151 /// `lazy_pattern!` produces a `LazyLock<CompiledPattern>` that
152 /// drives `find_matches`, `is_match`, and `captures_iter` in lockstep.
153 #[test]
154 fn lazy_pattern_macro_drives_all_three_operations() {
155 assert!(LAZY_DIGITS.is_match("abc 42"));
156 assert!(!LAZY_DIGITS.is_match("no digits here"));
157 assert_eq!(LAZY_DIGITS.find_matches("a 1 b 2 c").len(), 2);
158 assert_eq!(LAZY_DIGITS.captures_iter("a 1 b 2 c").len(), 2);
159 }
160
161 /// # Contract
162 /// `compile_patterns` MUST compile every input literal in the order
163 /// it was passed. Callers (e.g. injection-pattern tables keyed by
164 /// rule id) rely on positional alignment between the input slice
165 /// and the returned `Vec<CompiledPattern>`; reordering would silently
166 /// associate the wrong rule id with the wrong pattern.
167 #[test]
168 fn compile_patterns_compiles_every_input_in_order() {
169 let inputs = [r"\bfoo\b", r"\d+", r"(?i)bar"];
170 let compiled = compile_patterns(&inputs);
171 assert_eq!(compiled.len(), inputs.len());
172 assert!(compiled[0].is_match("say foo here"));
173 assert!(!compiled[0].is_match("foobar only"));
174 assert!(compiled[1].is_match("answer 42"));
175 assert!(compiled[2].is_match("BAR"));
176 }
177
178 /// # Contract (negative)
179 /// `compile_patterns` MUST panic with the documented diagnostic
180 /// when a hardcoded literal fails to compile. The literal is part of
181 /// the binary contract — surfacing a `Result` here would only let
182 /// callers re-panic on the same invariant.
183 #[test]
184 #[should_panic(expected = "hardcoded pattern must compile")]
185 fn compile_patterns_panics_on_invalid_literal() {
186 let inputs = [r"[unterminated"];
187 let _ = compile_patterns(&inputs);
188 }
189
190 /// # Contract
191 /// `try_compile` MUST return a usable `CompiledPattern` for any
192 /// pattern accepted by the underlying matcher; this is the seam
193 /// rule-pack loaders use to validate user-supplied patterns without
194 /// pulling `RegexPatternMatcher` into domain code.
195 #[test]
196 fn try_compile_returns_compiled_pattern_for_valid_input() {
197 let compiled = try_compile(r"^hello\s+world$").expect("valid pattern must compile");
198 assert!(compiled.is_match("hello world"));
199 assert!(!compiled.is_match("hello there"));
200 }
201
202 /// # Contract (negative)
203 /// `try_compile` MUST surface the matcher's `PatternError` instead
204 /// of panicking, because YAML rule packs and other boundary inputs
205 /// can legitimately contain malformed regex authored by users.
206 /// Propagating the error lets the loader reject the pack with a
207 /// human-readable diagnostic.
208 #[test]
209 fn try_compile_returns_pattern_error_for_invalid_input() {
210 let result = try_compile(r"[unterminated");
211 assert!(
212 result.is_err(),
213 "malformed pattern must surface as Result::Err, not panic"
214 );
215 }
216}