axon/shield_registry.rs
1//! §Fase 40.b — Shield scanner extension point.
2//!
3//! # Why this exists
4//!
5//! Per the OSS / ENTERPRISE / SPLIT charter, OSS axon ships the shield
6//! *framework* (the `shield apply` algebraic-effect handler + wire shape)
7//! but **no scanners** — the OSS default is an identity passthrough. The
8//! vertical scanner *implementations* (HIPAA / legal / AML) are enterprise
9//! R&D and live in the BSL `axon-enterprise` workspace.
10//!
11//! Before Fase 40.b there was no clean way for an external crate to inject
12//! a scanner: the apply helper was a hardcoded identity. This module is the
13//! **public registration hook** the enterprise vertical crate uses. It is a
14//! deliberate language extension point — axon-for-axon: it makes axon a
15//! better host language for privileged downstream layers, independent of
16//! who registers scanners.
17//!
18//! # Model
19//!
20//! A [`ShieldScanner`] is registered under a shield *name* (the same name
21//! used in `shield apply <name> to <target>`). At dispatch time the
22//! `shield apply` handler looks the name up:
23//!
24//! - **registered** → run the scanner, which returns a [`ShieldVerdict`]
25//! (`Pass` with possibly-redacted content, or `Reject` with a stable
26//! blame code + adopter-facing reason);
27//! - **not registered** → OSS identity passthrough (backwards-compatible;
28//! adopters with no enterprise layer see their data unmodified).
29//!
30//! # Thread-safety / lifecycle
31//!
32//! The registry is a process-global behind an `RwLock`. Enterprise
33//! registers its scanners once at server boot (mirroring the pre-v2.0.0
34//! Python `default_registry`). Registration is `last-wins` per name, so a
35//! deployment can override a scanner deterministically.
36
37use std::collections::HashMap;
38use std::sync::{Arc, LazyLock, RwLock};
39
40/// Context handed to a [`ShieldScanner`] on each invocation.
41///
42/// Intentionally minimal in 40.b (the field the scanner always needs); the
43/// vertical scanners landing in 40.c extend their behaviour through their
44/// own state, not by widening this struct, to keep the trait stable.
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct ShieldScanContext {
47 /// The shield name as written in `shield apply <name> ...`.
48 pub shield_name: String,
49}
50
51impl ShieldScanContext {
52 /// Construct a context for `shield_name`.
53 pub fn new(shield_name: impl Into<String>) -> Self {
54 Self {
55 shield_name: shield_name.into(),
56 }
57 }
58}
59
60/// A scanner's verdict on a target.
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub enum ShieldVerdict {
63 /// Content is allowed through, possibly transformed/redacted. The
64 /// returned `String` is bound as the shield step's output.
65 Pass(String),
66 /// Content is rejected by policy. `code` is a stable slug for blame
67 /// attribution (e.g. `"hipaa.phi_unredacted"`); `reason` is the
68 /// adopter-facing message. The dispatcher surfaces this as a
69 /// `DispatchError::BackendError { name: "shield:<name>", ... }`.
70 Reject {
71 /// Stable machine slug for blame attribution / metrics.
72 code: String,
73 /// Human-readable, adopter-facing rejection reason.
74 reason: String,
75 },
76}
77
78impl ShieldVerdict {
79 /// Convenience constructor for a passing verdict.
80 pub fn pass(content: impl Into<String>) -> Self {
81 Self::Pass(content.into())
82 }
83
84 /// Convenience constructor for a rejecting verdict.
85 pub fn reject(code: impl Into<String>, reason: impl Into<String>) -> Self {
86 Self::Reject {
87 code: code.into(),
88 reason: reason.into(),
89 }
90 }
91
92 /// True for [`ShieldVerdict::Pass`].
93 pub fn is_pass(&self) -> bool {
94 matches!(self, Self::Pass(_))
95 }
96}
97
98/// Implemented by enterprise vertical scanners (HIPAA / legal / AML).
99///
100/// OSS ships **no** implementations. A scanner is pure-ish from the
101/// dispatcher's perspective: given a target string + context it returns a
102/// verdict. Scanners must be `Send + Sync` (the registry is shared across
103/// the async runtime's worker threads).
104pub trait ShieldScanner: Send + Sync {
105 /// Scan `target` and return a [`ShieldVerdict`].
106 fn scan(&self, target: &str, ctx: &ShieldScanContext) -> ShieldVerdict;
107}
108
109// ────────────────────────────────────────────────────────────────────────
110// Process-global registry
111// ────────────────────────────────────────────────────────────────────────
112
113static REGISTRY: LazyLock<RwLock<HashMap<String, Arc<dyn ShieldScanner>>>> =
114 LazyLock::new(|| RwLock::new(HashMap::new()));
115
116/// Register `scanner` under `shield_name`. Returns the previously
117/// registered scanner for that name, if any (last-wins). Safe to call from
118/// any thread; intended to run once per name at startup.
119pub fn register_shield_scanner(
120 shield_name: impl Into<String>,
121 scanner: Arc<dyn ShieldScanner>,
122) -> Option<Arc<dyn ShieldScanner>> {
123 REGISTRY
124 .write()
125 .expect("shield registry RwLock poisoned")
126 .insert(shield_name.into(), scanner)
127}
128
129/// Look up the scanner registered under `shield_name`, if any.
130pub fn lookup_shield_scanner(shield_name: &str) -> Option<Arc<dyn ShieldScanner>> {
131 REGISTRY
132 .read()
133 .expect("shield registry RwLock poisoned")
134 .get(shield_name)
135 .cloned()
136}
137
138/// True when at least one scanner is registered. Cheap O(1)-ish guard so
139/// the dispatcher can skip the lookup entirely in the common OSS case (no
140/// enterprise layer present).
141pub fn has_registered_scanners() -> bool {
142 !REGISTRY
143 .read()
144 .expect("shield registry RwLock poisoned")
145 .is_empty()
146}
147
148/// All registered shield names, sorted (for discovery endpoints + audit
149/// diagnostics). Deterministic ordering so wire/log output is stable.
150pub fn registered_shield_names() -> Vec<String> {
151 let mut names: Vec<String> = REGISTRY
152 .read()
153 .expect("shield registry RwLock poisoned")
154 .keys()
155 .cloned()
156 .collect();
157 names.sort();
158 names
159}
160
161/// Remove the scanner registered under `shield_name`, returning it if
162/// present. Mainly for deployments that hot-swap scanners + for tests.
163pub fn unregister_shield_scanner(shield_name: &str) -> Option<Arc<dyn ShieldScanner>> {
164 REGISTRY
165 .write()
166 .expect("shield registry RwLock poisoned")
167 .remove(shield_name)
168}
169
170/// Clear the entire registry. Test-support + clean-shutdown helper.
171#[doc(hidden)]
172pub fn clear_shield_registry() {
173 REGISTRY
174 .write()
175 .expect("shield registry RwLock poisoned")
176 .clear();
177}
178
179// ────────────────────────────────────────────────────────────────────────
180// §Fase 53.e — NO PHANTOM GUARDRAILS (founder refinement C)
181// ────────────────────────────────────────────────────────────────────────
182
183/// Every `(shield_name, category)` where a shield declares an
184/// EXTENSION-introduced scan category (one declared via an
185/// `extension { category: scan }` block) but has **no registered
186/// scanner** — i.e. a guardrail the operator believes is active that is
187/// actually a silent no-op.
188///
189/// Canonical scan categories are intentionally NOT gated: they carry a
190/// documented framework meaning, and the OSS identity passthrough (no
191/// scanner) is the backwards-compatible default. Only adopter-introduced
192/// extension categories — which have NO default semantics — require an
193/// explicit scanner; serving one unscanned is a false sense of security.
194pub fn unscanned_extension_scan_categories(
195 ir: &crate::ir_nodes::IRProgram,
196) -> Vec<(String, String)> {
197 let mut ext_cats: std::collections::HashSet<&str> = std::collections::HashSet::new();
198 for ext in &ir.extensions {
199 if ext.category == "scan" {
200 for m in &ext.members {
201 ext_cats.insert(m.name.as_str());
202 }
203 }
204 }
205 if ext_cats.is_empty() {
206 return Vec::new();
207 }
208 let mut violations = Vec::new();
209 for shield in &ir.shields {
210 // A registered scanner owns the shield: it is responsible for the
211 // declared categories. Only a shield with NO scanner can leave an
212 // extension category as a ghost guardrail.
213 if lookup_shield_scanner(&shield.name).is_some() {
214 continue;
215 }
216 for cat in &shield.scan {
217 if ext_cats.contains(cat.as_str()) {
218 violations.push((shield.name.clone(), cat.clone()));
219 }
220 }
221 }
222 violations
223}
224
225/// §Fase 53.e — the boot gate. `Ok(())` when every extension scan
226/// category used by a shield has a registered scanner; `Err(blame)` (a
227/// Server-Blame message) otherwise. The boot sequence MUST treat `Err`
228/// as FATAL — refuse to serve rather than present a ghost guardrail
229/// (founder refinement C: no silent no-op, fail loud).
230pub fn check_extension_scan_coverage(ir: &crate::ir_nodes::IRProgram) -> Result<(), String> {
231 let violations = unscanned_extension_scan_categories(ir);
232 if violations.is_empty() {
233 return Ok(());
234 }
235 let detail = violations
236 .iter()
237 .map(|(s, c)| format!("shield '{s}' → scan category '{c}'"))
238 .collect::<Vec<_>>()
239 .join("; ");
240 Err(format!(
241 "§Fase 53.e refusing to boot — extension scan categor(ies) declared but \
242 UNSCANNED (no scanner registered): {detail}. An `extension` scan category \
243 has no default meaning; serving it as a silent no-op would be a phantom \
244 guardrail. Register a scanner for the shield(s) or remove the category."
245 ))
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 // NOTE on test isolation: the registry is a process-global and cargo
253 // runs tests in parallel. These tests therefore use UNIQUE shield
254 // names (disjoint keys never collide under the RwLock), clean up after
255 // themselves with `unregister_shield_scanner`, and never assert on
256 // GLOBAL state (emptiness / the full name list) — only on the keys they
257 // own. `clear_shield_registry` is deliberately NOT used here (it would
258 // nuke a concurrent test's registration).
259
260 struct UppercaseScanner;
261 impl ShieldScanner for UppercaseScanner {
262 fn scan(&self, target: &str, _ctx: &ShieldScanContext) -> ShieldVerdict {
263 ShieldVerdict::pass(target.to_uppercase())
264 }
265 }
266
267 struct AlwaysReject;
268 impl ShieldScanner for AlwaysReject {
269 fn scan(&self, _target: &str, ctx: &ShieldScanContext) -> ShieldVerdict {
270 ShieldVerdict::reject(
271 format!("{}.blocked", ctx.shield_name),
272 "policy rejection (test)",
273 )
274 }
275 }
276
277 // ── §Fase 53.e — phantom-guardrail boot gate ───────────────────
278
279 fn ir_from(src: &str) -> crate::ir_nodes::IRProgram {
280 let tokens = crate::lexer::Lexer::new(src, "<test>")
281 .tokenize()
282 .expect("lex");
283 let program = crate::parser::Parser::new(tokens).parse().expect("parse");
284 crate::ir_generator::IRGenerator::new().generate(&program)
285 }
286
287 struct PassScanner;
288 impl ShieldScanner for PassScanner {
289 fn scan(&self, target: &str, _ctx: &ShieldScanContext) -> ShieldVerdict {
290 ShieldVerdict::pass(target.to_string())
291 }
292 }
293
294 /// A shield using ONLY canonical scan categories (no scanner) is NOT
295 /// a violation — the canonical passthrough is the documented default.
296 #[test]
297 fn canonical_category_without_scanner_is_not_a_violation() {
298 let ir = ir_from(
299 "shield T53e_canon { scan: [code_injection] strategy: pattern on_breach: halt }",
300 );
301 assert!(unscanned_extension_scan_categories(&ir).is_empty());
302 assert!(check_extension_scan_coverage(&ir).is_ok());
303 }
304
305 /// A shield using an EXTENSION scan category with NO registered
306 /// scanner is a phantom guardrail → reported + boot refused.
307 #[test]
308 fn extension_category_without_scanner_is_a_violation() {
309 let ir = ir_from(
310 "extension t53e_x { category: scan members: [ \"dunning_pressure\" ] }\n\
311 shield T53e_ghost { scan: [dunning_pressure] strategy: pattern on_breach: halt }",
312 );
313 let v = unscanned_extension_scan_categories(&ir);
314 assert_eq!(
315 v,
316 vec![("T53e_ghost".to_string(), "dunning_pressure".to_string())]
317 );
318 let err = check_extension_scan_coverage(&ir).expect_err("must refuse boot");
319 assert!(err.contains("phantom guardrail"), "got: {err}");
320 assert!(err.contains("dunning_pressure"), "got: {err}");
321 }
322
323 /// Same source, but a scanner registered under the shield name → the
324 /// extension category is covered → no violation.
325 #[test]
326 fn extension_category_with_scanner_is_ok() {
327 const SHIELD: &str = "T53e_covered";
328 let _prev = register_shield_scanner(SHIELD, Arc::new(PassScanner));
329 let ir = ir_from(&format!(
330 "extension t53e_y {{ category: scan members: [ \"dunning_pressure\" ] }}\n\
331 shield {SHIELD} {{ scan: [dunning_pressure] strategy: pattern on_breach: halt }}"
332 ));
333 let ok = check_extension_scan_coverage(&ir);
334 // Clean up BEFORE asserting so a failure doesn't leak the scanner.
335 unregister_shield_scanner(SHIELD);
336 assert!(ok.is_ok(), "a registered scanner must cover the category: {ok:?}");
337 }
338
339 #[test]
340 fn register_lookup_roundtrip() {
341 const NAME: &str = "t_reg_roundtrip_upper";
342 assert!(lookup_shield_scanner(NAME).is_none());
343
344 let prev = register_shield_scanner(NAME, Arc::new(UppercaseScanner));
345 assert!(prev.is_none(), "first registration has no predecessor");
346 assert!(has_registered_scanners(), "at least our scanner is present");
347
348 let s = lookup_shield_scanner(NAME).expect("registered");
349 let v = s.scan("phi data", &ShieldScanContext::new(NAME));
350 assert_eq!(v, ShieldVerdict::Pass("PHI DATA".to_string()));
351
352 unregister_shield_scanner(NAME);
353 assert!(lookup_shield_scanner(NAME).is_none());
354 }
355
356 #[test]
357 fn last_wins_and_unregister() {
358 const NAME: &str = "t_reg_last_wins";
359 register_shield_scanner(NAME, Arc::new(UppercaseScanner));
360 let prev = register_shield_scanner(NAME, Arc::new(AlwaysReject));
361 assert!(prev.is_some(), "second registration returns the predecessor");
362
363 let s = lookup_shield_scanner(NAME).unwrap();
364 assert!(matches!(
365 s.scan("x", &ShieldScanContext::new(NAME)),
366 ShieldVerdict::Reject { .. }
367 ));
368
369 let removed = unregister_shield_scanner(NAME);
370 assert!(removed.is_some());
371 assert!(lookup_shield_scanner(NAME).is_none());
372 }
373
374 #[test]
375 fn registered_names_includes_own_in_sorted_order() {
376 // Unique prefix so we can filter out any concurrently-registered
377 // scanners and assert only on the keys this test owns.
378 let names = ["t_names_zeta", "t_names_alpha", "t_names_mu"];
379 for n in names {
380 register_shield_scanner(n, Arc::new(UppercaseScanner));
381 }
382 let mut mine: Vec<String> = registered_shield_names()
383 .into_iter()
384 .filter(|n| n.starts_with("t_names_"))
385 .collect();
386 // `registered_shield_names` is documented sorted; filtering
387 // preserves order, so `mine` must already be sorted ascending.
388 let mut expected = mine.clone();
389 expected.sort();
390 assert_eq!(mine, expected, "registered names must be returned sorted");
391 mine.sort();
392 assert_eq!(
393 mine,
394 vec![
395 "t_names_alpha".to_string(),
396 "t_names_mu".to_string(),
397 "t_names_zeta".to_string()
398 ]
399 );
400 for n in names {
401 unregister_shield_scanner(n);
402 }
403 }
404
405 #[test]
406 fn verdict_constructors() {
407 assert!(ShieldVerdict::pass("ok").is_pass());
408 assert!(!ShieldVerdict::reject("c", "r").is_pass());
409 }
410}