Skip to main content

axon/
shield_registry.rs

1//! §Fase 40.b — Shield scanner extension point.
2//!
3//! # Why this exists
4//!
5//! Per the OSS / ENTERPRISE / SPLIT charter, OSS axon ships the shield
6//! *framework* (the `shield apply` algebraic-effect handler + wire shape)
7//! but **no scanners** — the OSS default is an identity passthrough. The
8//! vertical scanner *implementations* (HIPAA / legal / AML) are enterprise
9//! R&D and live in the BSL `axon-enterprise` workspace.
10//!
11//! Before Fase 40.b there was no clean way for an external crate to inject
12//! a scanner: the apply helper was a hardcoded identity. This module is the
13//! **public registration hook** the enterprise vertical crate uses. It is a
14//! deliberate language extension point — axon-for-axon: it makes axon a
15//! better host language for privileged downstream layers, independent of
16//! who registers scanners.
17//!
18//! # Model
19//!
20//! A [`ShieldScanner`] is registered under a shield *name* (the same name
21//! used in `shield apply <name> to <target>`). At dispatch time the
22//! `shield apply` handler looks the name up:
23//!
24//! - **registered** → run the scanner, which returns a [`ShieldVerdict`]
25//!   (`Pass` with possibly-redacted content, or `Reject` with a stable
26//!   blame code + adopter-facing reason);
27//! - **not registered** → OSS identity passthrough (backwards-compatible;
28//!   adopters with no enterprise layer see their data unmodified).
29//!
30//! # Thread-safety / lifecycle
31//!
32//! The registry is a process-global behind an `RwLock`. Enterprise
33//! registers its scanners once at server boot (mirroring the pre-v2.0.0
34//! Python `default_registry`). Registration is `last-wins` per name, so a
35//! deployment can override a scanner deterministically.
36
37use std::collections::HashMap;
38use std::sync::{Arc, LazyLock, RwLock};
39
40/// Context handed to a [`ShieldScanner`] on each invocation.
41///
42/// Intentionally minimal in 40.b (the field the scanner always needs); the
43/// vertical scanners landing in 40.c extend their behaviour through their
44/// own state, not by widening this struct, to keep the trait stable.
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct ShieldScanContext {
47    /// The shield name as written in `shield apply <name> ...`.
48    pub shield_name: String,
49}
50
51impl ShieldScanContext {
52    /// Construct a context for `shield_name`.
53    pub fn new(shield_name: impl Into<String>) -> Self {
54        Self {
55            shield_name: shield_name.into(),
56        }
57    }
58}
59
60/// A scanner's verdict on a target.
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub enum ShieldVerdict {
63    /// Content is allowed through, possibly transformed/redacted. The
64    /// returned `String` is bound as the shield step's output.
65    Pass(String),
66    /// Content is rejected by policy. `code` is a stable slug for blame
67    /// attribution (e.g. `"hipaa.phi_unredacted"`); `reason` is the
68    /// adopter-facing message. The dispatcher surfaces this as a
69    /// `DispatchError::BackendError { name: "shield:<name>", ... }`.
70    Reject {
71        /// Stable machine slug for blame attribution / metrics.
72        code: String,
73        /// Human-readable, adopter-facing rejection reason.
74        reason: String,
75    },
76}
77
78impl ShieldVerdict {
79    /// Convenience constructor for a passing verdict.
80    pub fn pass(content: impl Into<String>) -> Self {
81        Self::Pass(content.into())
82    }
83
84    /// Convenience constructor for a rejecting verdict.
85    pub fn reject(code: impl Into<String>, reason: impl Into<String>) -> Self {
86        Self::Reject {
87            code: code.into(),
88            reason: reason.into(),
89        }
90    }
91
92    /// True for [`ShieldVerdict::Pass`].
93    pub fn is_pass(&self) -> bool {
94        matches!(self, Self::Pass(_))
95    }
96}
97
98/// Implemented by enterprise vertical scanners (HIPAA / legal / AML).
99///
100/// OSS ships **no** implementations. A scanner is pure-ish from the
101/// dispatcher's perspective: given a target string + context it returns a
102/// verdict. Scanners must be `Send + Sync` (the registry is shared across
103/// the async runtime's worker threads).
104pub trait ShieldScanner: Send + Sync {
105    /// Scan `target` and return a [`ShieldVerdict`].
106    fn scan(&self, target: &str, ctx: &ShieldScanContext) -> ShieldVerdict;
107}
108
109// ────────────────────────────────────────────────────────────────────────
110//  Process-global registry
111// ────────────────────────────────────────────────────────────────────────
112
113static REGISTRY: LazyLock<RwLock<HashMap<String, Arc<dyn ShieldScanner>>>> =
114    LazyLock::new(|| RwLock::new(HashMap::new()));
115
116/// Register `scanner` under `shield_name`. Returns the previously
117/// registered scanner for that name, if any (last-wins). Safe to call from
118/// any thread; intended to run once per name at startup.
119pub fn register_shield_scanner(
120    shield_name: impl Into<String>,
121    scanner: Arc<dyn ShieldScanner>,
122) -> Option<Arc<dyn ShieldScanner>> {
123    REGISTRY
124        .write()
125        .expect("shield registry RwLock poisoned")
126        .insert(shield_name.into(), scanner)
127}
128
129/// Look up the scanner registered under `shield_name`, if any.
130pub fn lookup_shield_scanner(shield_name: &str) -> Option<Arc<dyn ShieldScanner>> {
131    REGISTRY
132        .read()
133        .expect("shield registry RwLock poisoned")
134        .get(shield_name)
135        .cloned()
136}
137
138/// True when at least one scanner is registered. Cheap O(1)-ish guard so
139/// the dispatcher can skip the lookup entirely in the common OSS case (no
140/// enterprise layer present).
141pub fn has_registered_scanners() -> bool {
142    !REGISTRY
143        .read()
144        .expect("shield registry RwLock poisoned")
145        .is_empty()
146}
147
148/// All registered shield names, sorted (for discovery endpoints + audit
149/// diagnostics). Deterministic ordering so wire/log output is stable.
150pub fn registered_shield_names() -> Vec<String> {
151    let mut names: Vec<String> = REGISTRY
152        .read()
153        .expect("shield registry RwLock poisoned")
154        .keys()
155        .cloned()
156        .collect();
157    names.sort();
158    names
159}
160
161/// Remove the scanner registered under `shield_name`, returning it if
162/// present. Mainly for deployments that hot-swap scanners + for tests.
163pub fn unregister_shield_scanner(shield_name: &str) -> Option<Arc<dyn ShieldScanner>> {
164    REGISTRY
165        .write()
166        .expect("shield registry RwLock poisoned")
167        .remove(shield_name)
168}
169
170/// Clear the entire registry. Test-support + clean-shutdown helper.
171#[doc(hidden)]
172pub fn clear_shield_registry() {
173    REGISTRY
174        .write()
175        .expect("shield registry RwLock poisoned")
176        .clear();
177}
178
179// ────────────────────────────────────────────────────────────────────────
180//  §Fase 53.e — NO PHANTOM GUARDRAILS (founder refinement C)
181// ────────────────────────────────────────────────────────────────────────
182
183/// Every `(shield_name, category)` where a shield declares an
184/// EXTENSION-introduced scan category (one declared via an
185/// `extension { category: scan }` block) but has **no registered
186/// scanner** — i.e. a guardrail the operator believes is active that is
187/// actually a silent no-op.
188///
189/// Canonical scan categories are intentionally NOT gated: they carry a
190/// documented framework meaning, and the OSS identity passthrough (no
191/// scanner) is the backwards-compatible default. Only adopter-introduced
192/// extension categories — which have NO default semantics — require an
193/// explicit scanner; serving one unscanned is a false sense of security.
194pub fn unscanned_extension_scan_categories(
195    ir: &crate::ir_nodes::IRProgram,
196) -> Vec<(String, String)> {
197    let mut ext_cats: std::collections::HashSet<&str> = std::collections::HashSet::new();
198    for ext in &ir.extensions {
199        if ext.category == "scan" {
200            for m in &ext.members {
201                ext_cats.insert(m.name.as_str());
202            }
203        }
204    }
205    if ext_cats.is_empty() {
206        return Vec::new();
207    }
208    let mut violations = Vec::new();
209    for shield in &ir.shields {
210        // A registered scanner owns the shield: it is responsible for the
211        // declared categories. Only a shield with NO scanner can leave an
212        // extension category as a ghost guardrail.
213        if lookup_shield_scanner(&shield.name).is_some() {
214            continue;
215        }
216        for cat in &shield.scan {
217            if ext_cats.contains(cat.as_str()) {
218                violations.push((shield.name.clone(), cat.clone()));
219            }
220        }
221    }
222    violations
223}
224
225/// §Fase 53.e — the boot gate. `Ok(())` when every extension scan
226/// category used by a shield has a registered scanner; `Err(blame)` (a
227/// Server-Blame message) otherwise. The boot sequence MUST treat `Err`
228/// as FATAL — refuse to serve rather than present a ghost guardrail
229/// (founder refinement C: no silent no-op, fail loud).
230pub fn check_extension_scan_coverage(ir: &crate::ir_nodes::IRProgram) -> Result<(), String> {
231    let violations = unscanned_extension_scan_categories(ir);
232    if violations.is_empty() {
233        return Ok(());
234    }
235    let detail = violations
236        .iter()
237        .map(|(s, c)| format!("shield '{s}' → scan category '{c}'"))
238        .collect::<Vec<_>>()
239        .join("; ");
240    Err(format!(
241        "§Fase 53.e refusing to boot — extension scan categor(ies) declared but \
242         UNSCANNED (no scanner registered): {detail}. An `extension` scan category \
243         has no default meaning; serving it as a silent no-op would be a phantom \
244         guardrail. Register a scanner for the shield(s) or remove the category."
245    ))
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    // NOTE on test isolation: the registry is a process-global and cargo
253    // runs tests in parallel. These tests therefore use UNIQUE shield
254    // names (disjoint keys never collide under the RwLock), clean up after
255    // themselves with `unregister_shield_scanner`, and never assert on
256    // GLOBAL state (emptiness / the full name list) — only on the keys they
257    // own. `clear_shield_registry` is deliberately NOT used here (it would
258    // nuke a concurrent test's registration).
259
260    struct UppercaseScanner;
261    impl ShieldScanner for UppercaseScanner {
262        fn scan(&self, target: &str, _ctx: &ShieldScanContext) -> ShieldVerdict {
263            ShieldVerdict::pass(target.to_uppercase())
264        }
265    }
266
267    struct AlwaysReject;
268    impl ShieldScanner for AlwaysReject {
269        fn scan(&self, _target: &str, ctx: &ShieldScanContext) -> ShieldVerdict {
270            ShieldVerdict::reject(
271                format!("{}.blocked", ctx.shield_name),
272                "policy rejection (test)",
273            )
274        }
275    }
276
277    // ── §Fase 53.e — phantom-guardrail boot gate ───────────────────
278
279    fn ir_from(src: &str) -> crate::ir_nodes::IRProgram {
280        let tokens = crate::lexer::Lexer::new(src, "<test>")
281            .tokenize()
282            .expect("lex");
283        let program = crate::parser::Parser::new(tokens).parse().expect("parse");
284        crate::ir_generator::IRGenerator::new().generate(&program)
285    }
286
287    struct PassScanner;
288    impl ShieldScanner for PassScanner {
289        fn scan(&self, target: &str, _ctx: &ShieldScanContext) -> ShieldVerdict {
290            ShieldVerdict::pass(target.to_string())
291        }
292    }
293
294    /// A shield using ONLY canonical scan categories (no scanner) is NOT
295    /// a violation — the canonical passthrough is the documented default.
296    #[test]
297    fn canonical_category_without_scanner_is_not_a_violation() {
298        let ir = ir_from(
299            "shield T53e_canon { scan: [code_injection] strategy: pattern on_breach: halt }",
300        );
301        assert!(unscanned_extension_scan_categories(&ir).is_empty());
302        assert!(check_extension_scan_coverage(&ir).is_ok());
303    }
304
305    /// A shield using an EXTENSION scan category with NO registered
306    /// scanner is a phantom guardrail → reported + boot refused.
307    #[test]
308    fn extension_category_without_scanner_is_a_violation() {
309        let ir = ir_from(
310            "extension t53e_x { category: scan members: [ \"dunning_pressure\" ] }\n\
311             shield T53e_ghost { scan: [dunning_pressure] strategy: pattern on_breach: halt }",
312        );
313        let v = unscanned_extension_scan_categories(&ir);
314        assert_eq!(
315            v,
316            vec![("T53e_ghost".to_string(), "dunning_pressure".to_string())]
317        );
318        let err = check_extension_scan_coverage(&ir).expect_err("must refuse boot");
319        assert!(err.contains("phantom guardrail"), "got: {err}");
320        assert!(err.contains("dunning_pressure"), "got: {err}");
321    }
322
323    /// Same source, but a scanner registered under the shield name → the
324    /// extension category is covered → no violation.
325    #[test]
326    fn extension_category_with_scanner_is_ok() {
327        const SHIELD: &str = "T53e_covered";
328        let _prev = register_shield_scanner(SHIELD, Arc::new(PassScanner));
329        let ir = ir_from(&format!(
330            "extension t53e_y {{ category: scan members: [ \"dunning_pressure\" ] }}\n\
331             shield {SHIELD} {{ scan: [dunning_pressure] strategy: pattern on_breach: halt }}"
332        ));
333        let ok = check_extension_scan_coverage(&ir);
334        // Clean up BEFORE asserting so a failure doesn't leak the scanner.
335        unregister_shield_scanner(SHIELD);
336        assert!(ok.is_ok(), "a registered scanner must cover the category: {ok:?}");
337    }
338
339    #[test]
340    fn register_lookup_roundtrip() {
341        const NAME: &str = "t_reg_roundtrip_upper";
342        assert!(lookup_shield_scanner(NAME).is_none());
343
344        let prev = register_shield_scanner(NAME, Arc::new(UppercaseScanner));
345        assert!(prev.is_none(), "first registration has no predecessor");
346        assert!(has_registered_scanners(), "at least our scanner is present");
347
348        let s = lookup_shield_scanner(NAME).expect("registered");
349        let v = s.scan("phi data", &ShieldScanContext::new(NAME));
350        assert_eq!(v, ShieldVerdict::Pass("PHI DATA".to_string()));
351
352        unregister_shield_scanner(NAME);
353        assert!(lookup_shield_scanner(NAME).is_none());
354    }
355
356    #[test]
357    fn last_wins_and_unregister() {
358        const NAME: &str = "t_reg_last_wins";
359        register_shield_scanner(NAME, Arc::new(UppercaseScanner));
360        let prev = register_shield_scanner(NAME, Arc::new(AlwaysReject));
361        assert!(prev.is_some(), "second registration returns the predecessor");
362
363        let s = lookup_shield_scanner(NAME).unwrap();
364        assert!(matches!(
365            s.scan("x", &ShieldScanContext::new(NAME)),
366            ShieldVerdict::Reject { .. }
367        ));
368
369        let removed = unregister_shield_scanner(NAME);
370        assert!(removed.is_some());
371        assert!(lookup_shield_scanner(NAME).is_none());
372    }
373
374    #[test]
375    fn registered_names_includes_own_in_sorted_order() {
376        // Unique prefix so we can filter out any concurrently-registered
377        // scanners and assert only on the keys this test owns.
378        let names = ["t_names_zeta", "t_names_alpha", "t_names_mu"];
379        for n in names {
380            register_shield_scanner(n, Arc::new(UppercaseScanner));
381        }
382        let mut mine: Vec<String> = registered_shield_names()
383            .into_iter()
384            .filter(|n| n.starts_with("t_names_"))
385            .collect();
386        // `registered_shield_names` is documented sorted; filtering
387        // preserves order, so `mine` must already be sorted ascending.
388        let mut expected = mine.clone();
389        expected.sort();
390        assert_eq!(mine, expected, "registered names must be returned sorted");
391        mine.sort();
392        assert_eq!(
393            mine,
394            vec![
395                "t_names_alpha".to_string(),
396                "t_names_mu".to_string(),
397                "t_names_zeta".to_string()
398            ]
399        );
400        for n in names {
401            unregister_shield_scanner(n);
402        }
403    }
404
405    #[test]
406    fn verdict_constructors() {
407        assert!(ShieldVerdict::pass("ok").is_pass());
408        assert!(!ShieldVerdict::reject("c", "r").is_pass());
409    }
410}