arcly-http 0.1.0

Enterprise-grade NestJS-inspired web framework on axum: zero-lock DI, declarative controllers, multi-tenant data routing, transactional outbox, ABAC, and a self-documenting OpenAPI surface
Documentation
//! PII masking — classification-driven redaction at every data sink.
//!
//! ## Why this is load-bearing for the rest of the framework
//!
//! The compliance machinery shipped so far makes data **durable on purpose**:
//! the audit trail is hash-chained append-only, outbox rows are committed,
//! idempotency caches replay responses for hours, and the DLQ parks full
//! payloads. Every one of those becomes a *permanent* PII leak if a card
//! number or patient name reaches it raw — you cannot delete from a sealed
//! ledger without destroying the very tamper-evidence it exists to provide.
//! Masking therefore happens **at the sink**, before durability.
//!
//! ## Zero-lock mechanics
//!
//! The rule set lives behind one `ArcSwap<MaskingPolicy>` (the proven
//! pattern from secrets / tenants / policies): applying masks is one atomic
//! pointer load plus a pure walk over the JSON tree against paths that were
//! parsed **at load time** — no regex, no I/O, no locks on any hot path.
//!
//! ## Usage
//!
//! ```ignore
//! // boot — global rules by data classification:
//! ctx.provide(Masker::new(
//!     MaskingPolicy::new(1)
//!         .field("email")                      // Redact (default)
//!         .field("payment.card.number:last4")  // PCI PAN
//!         .field("ssn:drop")
//!         .field("items.*.patient_name:hash"), // joinable, unreadable
//! ));
//!
//! // route — extra fields for this endpoint only:
//! #[Get("/:id", security("bearer"))]
//! #[MaskFields("notes.*.body", "phone:last4")]
//! async fn get_patient(/* ... */) -> Result<Json<Patient>, HttpException> { /* ... */ }
//! ```
//!
//! No `Masker` in the DI container → every sink passes data through
//! untouched (dev mode needs zero configuration).

use arc_swap::ArcSwap;
use serde_json::Value;
use sha2::{Digest, Sha256};
use std::sync::Arc;

// ─── Strategies ───────────────────────────────────────────────────────────────

/// How a matched field is transformed.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MaskStrategy {
    /// Shape-preserving redaction: `"jane@example.com"` → `"j***@e***.com"`,
    /// other strings → first char + `***`. Default.
    Redact,
    /// SHA-256 hex of the UTF-8 value — correlatable, unreadable.
    Hash,
    /// Keep the final 4 characters: `"************4242"` (PCI PAN style).
    Last4,
    /// Replace with JSON `null` — when no residue may remain.
    Drop,
}

impl MaskStrategy {
    fn parse(s: &str) -> Option<Self> {
        match s {
            "redact" => Some(Self::Redact),
            "hash" => Some(Self::Hash),
            "last4" => Some(Self::Last4),
            "drop" => Some(Self::Drop),
            _ => None,
        }
    }
}

// ─── Path engine ──────────────────────────────────────────────────────────────

/// One segment of a compiled field path. `*` matches every array element
/// (or every object value), enabling rules like `items.*.email`.
#[derive(Clone, Debug)]
pub enum PathSeg {
    Key(String),
    Any,
}

/// A field rule: where + how.
#[derive(Clone, Debug)]
pub struct MaskRule {
    pub path: Vec<PathSeg>,
    pub strategy: MaskStrategy,
}

impl MaskRule {
    /// Parse `"payment.card.number:last4"` / `"email"` (strategy defaults to
    /// Redact). Returns `None` for an empty path or unknown strategy.
    pub fn parse(spec: &str) -> Option<Self> {
        let (path_str, strategy) = match spec.rsplit_once(':') {
            Some((p, s)) => (p, MaskStrategy::parse(s)?),
            None => (spec, MaskStrategy::Redact),
        };
        if path_str.is_empty() {
            return None;
        }
        let path = path_str
            .split('.')
            .map(|seg| {
                if seg == "*" {
                    PathSeg::Any
                } else {
                    PathSeg::Key(seg.to_owned())
                }
            })
            .collect();
        Some(Self { path, strategy })
    }
}

// ─── Policy & masker ──────────────────────────────────────────────────────────

/// Versioned, immutable rule set (hot-swapped whole, like `PolicySet`).
pub struct MaskingPolicy {
    pub version: u64,
    rules: Vec<MaskRule>,
}

impl MaskingPolicy {
    pub fn new(version: u64) -> Self {
        Self {
            version,
            rules: Vec::new(),
        }
    }

    /// Add a rule from `"path[:strategy]"` syntax. Invalid specs are a boot
    /// configuration error — panic loudly rather than silently not masking.
    pub fn field(mut self, spec: &str) -> Self {
        let rule =
            MaskRule::parse(spec).unwrap_or_else(|| panic!("invalid mask field spec: {spec:?}"));
        self.rules.push(rule);
        self
    }

    pub fn rules(&self) -> &[MaskRule] {
        &self.rules
    }
}

/// Hot-swappable redaction point. Provide via `ctx.provide(Masker::new(…))`.
pub struct Masker {
    policy: ArcSwap<MaskingPolicy>,
}

impl Masker {
    pub fn new(initial: MaskingPolicy) -> Self {
        Self {
            policy: ArcSwap::from_pointee(initial),
        }
    }

    /// Swap in a new rule set — effective on the very next request/sink
    /// write. Stale (≤ current) versions are ignored.
    pub fn reload(&self, next: MaskingPolicy) {
        let current = self.policy.load().version;
        if next.version <= current {
            tracing::warn!(
                current,
                offered = next.version,
                "ignoring stale masking policy reload"
            );
            return;
        }
        tracing::info!(version = next.version, "masking policy reloaded (live)");
        self.policy.store(Arc::new(next));
    }

    pub fn version(&self) -> u64 {
        self.policy.load().version
    }

    /// Apply the global rules in place. Returns `true` when anything matched
    /// (sinks use it to annotate "redacted" in their own metadata).
    pub fn apply(&self, value: &mut Value) -> bool {
        let policy = self.policy.load();
        let mut touched = false;
        for rule in &policy.rules {
            touched |= apply_rule(value, &rule.path, rule.strategy);
        }
        touched
    }

    /// Global rules + route-local extras (from `#[MaskFields]`).
    pub fn apply_with(&self, value: &mut Value, extra: &[MaskRule]) -> bool {
        let mut touched = self.apply(value);
        for rule in extra {
            touched |= apply_rule(value, &rule.path, rule.strategy);
        }
        touched
    }
}

// ─── Pure tree walk ───────────────────────────────────────────────────────────

fn apply_rule(v: &mut Value, path: &[PathSeg], strategy: MaskStrategy) -> bool {
    match path.split_first() {
        None => {
            if v.is_null() {
                return false;
            }
            *v = mask_leaf(v, strategy);
            true
        }
        Some((PathSeg::Key(k), rest)) => match v {
            Value::Object(map) => map
                .get_mut(k)
                .map(|child| apply_rule(child, rest, strategy))
                .unwrap_or(false),
            // A keyed segment looks *through* arrays: `items.email` behaves
            // like `items.*.email`, which is what rule authors expect.
            Value::Array(items) => items
                .iter_mut()
                .fold(false, |acc, item| acc | apply_rule(item, path, strategy)),
            _ => false,
        },
        Some((PathSeg::Any, rest)) => match v {
            Value::Array(items) => items
                .iter_mut()
                .fold(false, |acc, item| acc | apply_rule(item, rest, strategy)),
            Value::Object(map) => map
                .values_mut()
                .fold(false, |acc, child| acc | apply_rule(child, rest, strategy)),
            _ => false,
        },
    }
}

fn mask_leaf(v: &Value, strategy: MaskStrategy) -> Value {
    match strategy {
        MaskStrategy::Drop => Value::Null,
        MaskStrategy::Hash => {
            let raw = match v {
                Value::String(s) => s.clone(),
                other => other.to_string(),
            };
            let digest = Sha256::digest(raw.as_bytes());
            Value::String(digest.iter().map(|b| format!("{b:02x}")).collect())
        }
        MaskStrategy::Last4 => {
            let raw = match v {
                Value::String(s) => s.clone(),
                other => other.to_string(),
            };
            let chars: Vec<char> = raw.chars().collect();
            let keep = chars.len().min(4);
            let masked: String = std::iter::repeat_n('*', chars.len().saturating_sub(keep))
                .chain(chars[chars.len() - keep..].iter().copied())
                .collect();
            Value::String(masked)
        }
        MaskStrategy::Redact => Value::String(redact_string(&match v {
            Value::String(s) => s.clone(),
            other => other.to_string(),
        })),
    }
}

/// `jane@example.com` → `j***@e***.com`; other strings → first char + `***`.
fn redact_string(s: &str) -> String {
    if let Some((local, domain)) = s.split_once('@') {
        if let Some((host, tld)) = domain.rsplit_once('.') {
            return format!(
                "{}***@{}***.{}",
                local.chars().next().unwrap_or('*'),
                host.chars().next().unwrap_or('*'),
                tld,
            );
        }
    }
    match s.chars().next() {
        Some(c) => format!("{c}***"),
        None => "***".to_owned(),
    }
}

// ─── Macro support: response-sink wrapper ─────────────────────────────────────

/// Cap on response bodies eligible for masking (mirrors the idempotency cap).
const MAX_MASKED_BODY: usize = 256 * 1024;

/// Called by the `#[MaskFields]` expansion — sits *inside* `#[Idempotent]`,
/// so replay caches only ever store masked bodies. Non-JSON or oversized
/// bodies pass through untouched (masking never breaks a response).
#[doc(hidden)]
pub async fn mask_response(
    ctx: &crate::web::context::RequestContext,
    fields: &'static [&'static str],
    resp: axum::response::Response,
) -> axum::response::Response {
    let Some(masker) = ctx.try_inject::<Masker>() else {
        return resp;
    };

    let is_json = resp
        .headers()
        .get("content-type")
        .and_then(|v| v.to_str().ok())
        .map(|ct| ct.starts_with("application/json"))
        .unwrap_or(false);
    if !is_json {
        return resp;
    }

    let (parts, body) = resp.into_parts();
    let bytes = match axum::body::to_bytes(body, MAX_MASKED_BODY).await {
        Ok(b) => b,
        Err(_) => return axum::response::Response::from_parts(parts, axum::body::Body::empty()),
    };

    let Ok(mut value) = serde_json::from_slice::<Value>(&bytes) else {
        return axum::response::Response::from_parts(parts, axum::body::Body::from(bytes));
    };

    let extra: Vec<MaskRule> = fields.iter().filter_map(|f| MaskRule::parse(f)).collect();
    if masker.apply_with(&mut value, &extra) {
        metrics::counter!("masked_responses_total").increment(1);
    }

    let masked = serde_json::to_vec(&value).unwrap_or_else(|_| bytes.to_vec());
    let mut parts = parts;
    parts.headers.remove("content-length"); // body length changed
    axum::response::Response::from_parts(parts, axum::body::Body::from(masked))
}