Skip to main content

arcly_http/compliance/
masking.rs

1//! PII masking — classification-driven redaction at every data sink.
2//!
3//! ## Why this is load-bearing for the rest of the framework
4//!
5//! The compliance machinery shipped so far makes data **durable on purpose**:
6//! the audit trail is hash-chained append-only, outbox rows are committed,
7//! idempotency caches replay responses for hours, and the DLQ parks full
8//! payloads. Every one of those becomes a *permanent* PII leak if a card
9//! number or patient name reaches it raw — you cannot delete from a sealed
10//! ledger without destroying the very tamper-evidence it exists to provide.
11//! Masking therefore happens **at the sink**, before durability.
12//!
13//! ## Zero-lock mechanics
14//!
15//! The rule set lives behind one `ArcSwap<MaskingPolicy>` (the proven
16//! pattern from secrets / tenants / policies): applying masks is one atomic
17//! pointer load plus a pure walk over the JSON tree against paths that were
18//! parsed **at load time** — no regex, no I/O, no locks on any hot path.
19//!
20//! ## Usage
21//!
22//! ```ignore
23//! // boot — global rules by data classification:
24//! ctx.provide(Masker::new(
25//!     MaskingPolicy::new(1)
26//!         .field("email")                      // Redact (default)
27//!         .field("payment.card.number:last4")  // PCI PAN
28//!         .field("ssn:drop")
29//!         .field("items.*.patient_name:hash"), // joinable, unreadable
30//! ));
31//!
32//! // route — extra fields for this endpoint only:
33//! #[Get("/:id", security("bearer"))]
34//! #[MaskFields("notes.*.body", "phone:last4")]
35//! async fn get_patient(/* ... */) -> Result<Json<Patient>, HttpException> { /* ... */ }
36//! ```
37//!
38//! No `Masker` in the DI container → every sink passes data through
39//! untouched (dev mode needs zero configuration).
40
41use arc_swap::ArcSwap;
42use serde_json::Value;
43use sha2::{Digest, Sha256};
44use std::sync::Arc;
45
46// ─── Strategies ───────────────────────────────────────────────────────────────
47
48/// How a matched field is transformed.
49#[derive(Clone, Copy, Debug, PartialEq, Eq)]
50pub enum MaskStrategy {
51    /// Shape-preserving redaction: `"jane@example.com"` → `"j***@e***.com"`,
52    /// other strings → first char + `***`. Default.
53    Redact,
54    /// SHA-256 hex of the UTF-8 value — correlatable, unreadable.
55    Hash,
56    /// Keep the final 4 characters: `"************4242"` (PCI PAN style).
57    Last4,
58    /// Replace with JSON `null` — when no residue may remain.
59    Drop,
60}
61
62impl MaskStrategy {
63    fn parse(s: &str) -> Option<Self> {
64        match s {
65            "redact" => Some(Self::Redact),
66            "hash" => Some(Self::Hash),
67            "last4" => Some(Self::Last4),
68            "drop" => Some(Self::Drop),
69            _ => None,
70        }
71    }
72}
73
74// ─── Path engine ──────────────────────────────────────────────────────────────
75
76/// One segment of a compiled field path. `*` matches every array element
77/// (or every object value), enabling rules like `items.*.email`.
78#[derive(Clone, Debug)]
79pub enum PathSeg {
80    Key(String),
81    Any,
82}
83
84/// A field rule: where + how.
85#[derive(Clone, Debug)]
86pub struct MaskRule {
87    pub path: Vec<PathSeg>,
88    pub strategy: MaskStrategy,
89}
90
91impl MaskRule {
92    /// Parse `"payment.card.number:last4"` / `"email"` (strategy defaults to
93    /// Redact). Returns `None` for an empty path or unknown strategy.
94    pub fn parse(spec: &str) -> Option<Self> {
95        let (path_str, strategy) = match spec.rsplit_once(':') {
96            Some((p, s)) => (p, MaskStrategy::parse(s)?),
97            None => (spec, MaskStrategy::Redact),
98        };
99        if path_str.is_empty() {
100            return None;
101        }
102        let path = path_str
103            .split('.')
104            .map(|seg| {
105                if seg == "*" {
106                    PathSeg::Any
107                } else {
108                    PathSeg::Key(seg.to_owned())
109                }
110            })
111            .collect();
112        Some(Self { path, strategy })
113    }
114}
115
116// ─── Policy & masker ──────────────────────────────────────────────────────────
117
118/// Versioned, immutable rule set (hot-swapped whole, like `PolicySet`).
119pub struct MaskingPolicy {
120    pub version: u64,
121    rules: Vec<MaskRule>,
122}
123
124impl MaskingPolicy {
125    pub fn new(version: u64) -> Self {
126        Self {
127            version,
128            rules: Vec::new(),
129        }
130    }
131
132    /// Add a rule from `"path[:strategy]"` syntax. Invalid specs are a boot
133    /// configuration error — panic loudly rather than silently not masking.
134    pub fn field(mut self, spec: &str) -> Self {
135        let rule =
136            MaskRule::parse(spec).unwrap_or_else(|| panic!("invalid mask field spec: {spec:?}"));
137        self.rules.push(rule);
138        self
139    }
140
141    pub fn rules(&self) -> &[MaskRule] {
142        &self.rules
143    }
144}
145
146/// Hot-swappable redaction point. Provide via `ctx.provide(Masker::new(…))`.
147pub struct Masker {
148    policy: ArcSwap<MaskingPolicy>,
149}
150
151impl Masker {
152    pub fn new(initial: MaskingPolicy) -> Self {
153        Self {
154            policy: ArcSwap::from_pointee(initial),
155        }
156    }
157
158    /// Swap in a new rule set — effective on the very next request/sink
159    /// write. Stale (≤ current) versions are ignored.
160    pub fn reload(&self, next: MaskingPolicy) {
161        let current = self.policy.load().version;
162        if next.version <= current {
163            tracing::warn!(
164                current,
165                offered = next.version,
166                "ignoring stale masking policy reload"
167            );
168            return;
169        }
170        tracing::info!(version = next.version, "masking policy reloaded (live)");
171        self.policy.store(Arc::new(next));
172    }
173
174    pub fn version(&self) -> u64 {
175        self.policy.load().version
176    }
177
178    /// Apply the global rules in place. Returns `true` when anything matched
179    /// (sinks use it to annotate "redacted" in their own metadata).
180    pub fn apply(&self, value: &mut Value) -> bool {
181        let policy = self.policy.load();
182        let mut touched = false;
183        for rule in &policy.rules {
184            touched |= apply_rule(value, &rule.path, rule.strategy);
185        }
186        touched
187    }
188
189    /// Global rules + route-local extras (from `#[MaskFields]`).
190    pub fn apply_with(&self, value: &mut Value, extra: &[MaskRule]) -> bool {
191        let mut touched = self.apply(value);
192        for rule in extra {
193            touched |= apply_rule(value, &rule.path, rule.strategy);
194        }
195        touched
196    }
197}
198
199// ─── Pure tree walk ───────────────────────────────────────────────────────────
200
201fn apply_rule(v: &mut Value, path: &[PathSeg], strategy: MaskStrategy) -> bool {
202    match path.split_first() {
203        None => {
204            if v.is_null() {
205                return false;
206            }
207            *v = mask_leaf(v, strategy);
208            true
209        }
210        Some((PathSeg::Key(k), rest)) => match v {
211            Value::Object(map) => map
212                .get_mut(k)
213                .map(|child| apply_rule(child, rest, strategy))
214                .unwrap_or(false),
215            // A keyed segment looks *through* arrays: `items.email` behaves
216            // like `items.*.email`, which is what rule authors expect.
217            Value::Array(items) => items
218                .iter_mut()
219                .fold(false, |acc, item| acc | apply_rule(item, path, strategy)),
220            _ => false,
221        },
222        Some((PathSeg::Any, rest)) => match v {
223            Value::Array(items) => items
224                .iter_mut()
225                .fold(false, |acc, item| acc | apply_rule(item, rest, strategy)),
226            Value::Object(map) => map
227                .values_mut()
228                .fold(false, |acc, child| acc | apply_rule(child, rest, strategy)),
229            _ => false,
230        },
231    }
232}
233
234fn mask_leaf(v: &Value, strategy: MaskStrategy) -> Value {
235    match strategy {
236        MaskStrategy::Drop => Value::Null,
237        MaskStrategy::Hash => {
238            let raw = match v {
239                Value::String(s) => s.clone(),
240                other => other.to_string(),
241            };
242            let digest = Sha256::digest(raw.as_bytes());
243            Value::String(digest.iter().map(|b| format!("{b:02x}")).collect())
244        }
245        MaskStrategy::Last4 => {
246            let raw = match v {
247                Value::String(s) => s.clone(),
248                other => other.to_string(),
249            };
250            let chars: Vec<char> = raw.chars().collect();
251            let keep = chars.len().min(4);
252            let masked: String = std::iter::repeat_n('*', chars.len().saturating_sub(keep))
253                .chain(chars[chars.len() - keep..].iter().copied())
254                .collect();
255            Value::String(masked)
256        }
257        MaskStrategy::Redact => Value::String(redact_string(&match v {
258            Value::String(s) => s.clone(),
259            other => other.to_string(),
260        })),
261    }
262}
263
264/// `jane@example.com` → `j***@e***.com`; other strings → first char + `***`.
265fn redact_string(s: &str) -> String {
266    if let Some((local, domain)) = s.split_once('@') {
267        if let Some((host, tld)) = domain.rsplit_once('.') {
268            return format!(
269                "{}***@{}***.{}",
270                local.chars().next().unwrap_or('*'),
271                host.chars().next().unwrap_or('*'),
272                tld,
273            );
274        }
275    }
276    match s.chars().next() {
277        Some(c) => format!("{c}***"),
278        None => "***".to_owned(),
279    }
280}
281
282// ─── Macro support: response-sink wrapper ─────────────────────────────────────
283
284/// Cap on response bodies eligible for masking (mirrors the idempotency cap).
285const MAX_MASKED_BODY: usize = 256 * 1024;
286
287/// Called by the `#[MaskFields]` expansion — sits *inside* `#[Idempotent]`,
288/// so replay caches only ever store masked bodies. Non-JSON or oversized
289/// bodies pass through untouched (masking never breaks a response).
290#[doc(hidden)]
291pub async fn mask_response(
292    ctx: &crate::web::context::RequestContext,
293    fields: &'static [&'static str],
294    resp: axum::response::Response,
295) -> axum::response::Response {
296    let Some(masker) = ctx.try_inject::<Masker>() else {
297        return resp;
298    };
299
300    let is_json = resp
301        .headers()
302        .get("content-type")
303        .and_then(|v| v.to_str().ok())
304        .map(|ct| ct.starts_with("application/json"))
305        .unwrap_or(false);
306    if !is_json {
307        return resp;
308    }
309
310    let (parts, body) = resp.into_parts();
311    let bytes = match axum::body::to_bytes(body, MAX_MASKED_BODY).await {
312        Ok(b) => b,
313        Err(_) => return axum::response::Response::from_parts(parts, axum::body::Body::empty()),
314    };
315
316    let Ok(mut value) = serde_json::from_slice::<Value>(&bytes) else {
317        return axum::response::Response::from_parts(parts, axum::body::Body::from(bytes));
318    };
319
320    let extra: Vec<MaskRule> = fields.iter().filter_map(|f| MaskRule::parse(f)).collect();
321    if masker.apply_with(&mut value, &extra) {
322        metrics::counter!("masked_responses_total").increment(1);
323    }
324
325    let masked = serde_json::to_vec(&value).unwrap_or_else(|_| bytes.to_vec());
326    let mut parts = parts;
327    parts.headers.remove("content-length"); // body length changed
328    axum::response::Response::from_parts(parts, axum::body::Body::from(masked))
329}