Skip to main content

agent_sdk_foundation/privacy/
redaction.rs

1//! Configurable structural redaction policy for tool inputs, outputs,
2//! and observability payloads.
3//!
4//! Tool calls, audit records, and span attributes routinely carry
5//! sensitive data (passwords, API keys, tokens, connection strings,
6//! and — importantly for financial workloads — card PANs, CPFs,
7//! CNPJs, emails, phone numbers) that should not be stored in
8//! durable audit logs or exported across an observability boundary
9//! without explicit redaction. This module provides:
10//!
11//! - [`RedactionPolicy`] — configurable redaction rules with three
12//!   levels: [`None`](RedactionLevel::None),
13//!   [`Baseline`](RedactionLevel::Baseline), and
14//!   [`Full`](RedactionLevel::Full).
15//! - [`redact_value`] — applies redaction rules to a JSON value,
16//!   replacing sensitive keys with a `[REDACTED]` marker and
17//!   masking entity PII in string leaves with `[REDACTED:<category>]`.
18//! - [`redact_string`] / [`redact_error`] — apply redaction rules
19//!   to plain strings.
20//! - [`redact_for_observability`] — combined helper that runs the
21//!   structural [`RedactionPolicy`] *and* a caller-supplied
22//!   [`PiiDetector`] in a single pass, suitable for the SDK
23//!   observability boundary.
24//!
25//! # Baseline policy
26//!
27//! The [`RedactionPolicy::baseline`] constructor returns a policy
28//! that composes two redaction layers:
29//!
30//! 1. **Structural** — JSON object keys matching sensitive names
31//!    (`password`, `secret`, `token`, `api_key`, `authorization`,
32//!    `credential`, `cpf`, `cnpj`, etc.) wholesale-redact their
33//!    value. String values that *start with* a sensitive prefix
34//!    (`Bearer `, `sk-`, `ghp_`, `AKIA…`) are likewise wholesale
35//!    redacted.
36//! 2. **Entity-level** — a [`PiiDetector`] scans every remaining
37//!    string leaf for emails, E.164 phones, credit card PANs (Luhn
38//!    validated), Brazilian CPFs and CNPJs (mod-11 validated), Pix
39//!    UUID keys, IPv4 addresses, JWTs, and embedded credential
40//!    tokens. Detected spans are replaced with
41//!    `[REDACTED:<category>]` while the surrounding context stays
42//!    intact. This catches PII that leaks into freeform text (e.g.
43//!    a PAN mentioned in a tool response) without wrecking
44//!    debuggability.
45//!
46//! The detector defaults to [`BaselineDetector`]. Callers can plug
47//! in a custom detector by assigning [`RedactionPolicy::detector`]
48//! directly, or by passing a different detector to
49//! [`redact_for_observability`].
50//!
51//! # Default impl
52//!
53//! [`RedactionPolicy`] implements `Default` by returning
54//! [`RedactionPolicy::baseline()`] — never an empty policy. Code
55//! that wants a genuinely empty policy must opt in via
56//! [`RedactionPolicy::none`] and acknowledge the production-safety
57//! implications.
58//!
59//! # Serialisation
60//!
61//! [`RedactionPolicy`] is `Serialize` + `Deserialize`. The detector
62//! is skipped on serialize and re-populated with the process-wide
63//! baseline on deserialize — policies persisted to disk retain
64//! their levels and pattern lists, and the runtime detector is
65//! rebound on load.
66//!
67//! # Usage
68//!
69//! ```
70//! use agent_sdk_foundation::privacy::{RedactionPolicy, redact_value};
71//!
72//! let policy = RedactionPolicy::baseline();
73//! let input = serde_json::json!({
74//!     "command": "echo hello",
75//!     "api_key": "sk-abc123",
76//!     "note": "CPF 111.444.777-35 on file"
77//! });
78//! let redacted = redact_value(&input, &policy);
79//! // redacted["api_key"] == "[REDACTED]"       (sensitive key)
80//! // redacted["command"] == "echo hello"       (no PII)
81//! // redacted["note"] contains "[REDACTED:cpf]" (entity mask)
82//! # let _ = redacted;
83//! ```
84
85use serde::{Deserialize, Serialize};
86use std::sync::{Arc, LazyLock};
87
88use super::{BaselineDetector, NoopDetector, PiiDetector, mask_spans};
89
90/// Redaction marker used for wholesale redaction (sensitive key
91/// match or full-string secret prefix). Entity-level masks use
92/// `[REDACTED:<category>]` — see [`crate::privacy`].
93pub const REDACTED_MARKER: &str = "[REDACTED]";
94
95/// Shared baseline detector. Compiled lazily on first use; cloning
96/// the `Arc` is a single atomic inc.
97static BASELINE_DETECTOR: LazyLock<Arc<dyn PiiDetector>> = LazyLock::new(|| {
98    BaselineDetector::new().map_or_else(
99        |_| Arc::new(NoopDetector) as Arc<dyn PiiDetector>,
100        |d| Arc::new(d) as Arc<dyn PiiDetector>,
101    )
102});
103
104/// Shared noop detector.
105static NOOP_DETECTOR: LazyLock<Arc<dyn PiiDetector>> =
106    LazyLock::new(|| Arc::new(NoopDetector) as Arc<dyn PiiDetector>);
107
108/// Default detector used when a policy is deserialised without an
109/// embedded detector (which is always, since the field is
110/// `#[serde(skip)]`).
111fn default_detector() -> Arc<dyn PiiDetector> {
112    BASELINE_DETECTOR.clone()
113}
114
115// ─────────────────────────────────────────────────────────────────────
116// Redaction level
117// ─────────────────────────────────────────────────────────────────────
118
119/// How aggressively to redact a given field category.
120#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
121#[serde(rename_all = "snake_case")]
122pub enum RedactionLevel {
123    /// No redaction — store full values as-is.
124    None,
125    /// Redact values whose keys match sensitive patterns.
126    Baseline,
127    /// Full redaction — all values replaced with [`REDACTED_MARKER`].
128    Full,
129}
130
131// ─────────────────────────────────────────────────────────────────────
132// Redaction policy
133// ─────────────────────────────────────────────────────────────────────
134
135/// Configurable redaction rules for tool audit records and
136/// observability payloads.
137///
138/// Each field category (input, output, error) has its own
139/// [`RedactionLevel`]. At [`Baseline`](RedactionLevel::Baseline) the
140/// policy composes two layers:
141///
142/// 1. Structural — [`sensitive_key_patterns`](Self::sensitive_key_patterns)
143///    triggers wholesale replacement of JSON object values by key
144///    name, and [`sensitive_value_prefixes`](Self::sensitive_value_prefixes)
145///    does the same for strings that *start with* a known prefix.
146/// 2. Entity-level — [`detector`](Self::detector) scans every
147///    remaining string leaf for emails, PANs, CPFs, CNPJs, etc. and
148///    masks the spans it finds in place.
149///
150/// The detector is a runtime object not persisted across
151/// serialisation; on deserialise it is rebound to the process-wide
152/// [`BaselineDetector`].
153#[derive(Clone, Debug, Serialize, Deserialize)]
154pub struct RedactionPolicy {
155    /// Redaction level for tool input values.
156    pub input_level: RedactionLevel,
157    /// Redaction level for tool output values.
158    pub output_level: RedactionLevel,
159    /// Redaction level for error detail strings.
160    pub error_level: RedactionLevel,
161    /// Key substrings that trigger redaction at baseline level.
162    /// Stored lowercase; matched case-insensitively.
163    pub sensitive_key_patterns: Vec<String>,
164    /// String patterns in values that trigger redaction at baseline
165    /// level (e.g. `"Bearer "`, `"sk-"`). Case-sensitive prefix match.
166    pub sensitive_value_prefixes: Vec<String>,
167    /// Entity-level PII detector applied at baseline. Defaults to
168    /// [`BaselineDetector`]; assign directly to plug in a custom
169    /// implementation.
170    #[serde(skip, default = "default_detector")]
171    pub detector: Arc<dyn PiiDetector>,
172}
173
174impl RedactionPolicy {
175    /// Baseline redaction policy suitable for production audit logs
176    /// and observability exports.
177    ///
178    /// Redacts JSON object keys that look like credentials and
179    /// string values that look like tokens wholesale, and masks
180    /// entity-level PII (emails, PANs, CPFs, CNPJs, Pix UUIDs,
181    /// E.164 phones, IPs, JWTs) detected anywhere in remaining
182    /// string leaves. Preserves non-sensitive structural data for
183    /// debugging.
184    ///
185    /// All three levels (`input_level`, `output_level`,
186    /// `error_level`) default to [`Baseline`](RedactionLevel::Baseline).
187    /// Error strings routinely embed user data in stack traces
188    /// (`NotFound: user cpf=…`), so masking them is the safer
189    /// default — callers that need raw errors can explicitly set
190    /// `error_level: RedactionLevel::None` on a baseline policy.
191    #[must_use]
192    pub fn baseline() -> Self {
193        Self {
194            input_level: RedactionLevel::Baseline,
195            output_level: RedactionLevel::Baseline,
196            error_level: RedactionLevel::Baseline,
197            sensitive_key_patterns: vec![
198                "password".into(),
199                "passwd".into(),
200                "secret".into(),
201                "token".into(),
202                "api_key".into(),
203                "apikey".into(),
204                "authorization".into(),
205                "credential".into(),
206                "private_key".into(),
207                "private".into(),
208                "access_key".into(),
209                "session".into(),
210                "cookie".into(),
211                "bearer".into(),
212                "ssn".into(),
213                "credit_card".into(),
214                "cpf".into(),
215                "cnh".into(),
216                "cnpj".into(),
217                "crm".into(),
218                "passport".into(),
219                "driver_license".into(),
220                "social_security".into(),
221                "social_security_number".into(),
222            ],
223            sensitive_value_prefixes: vec![
224                "Bearer ".into(),
225                "sk-".into(),
226                "pk-".into(),
227                "xox".into(),
228                "ghp_".into(),
229                "gho_".into(),
230                "github_pat_".into(),
231                "AKIA".into(),
232            ],
233            detector: default_detector(),
234        }
235    }
236
237    /// Baseline policy plus additional sensitive key patterns.
238    ///
239    /// Custom keys *augment* the baseline list — they do not replace
240    /// it. Patterns are normalised to lowercase to keep the
241    /// case-insensitive matching contract intact.
242    ///
243    /// ```
244    /// use agent_sdk_foundation::privacy::RedactionPolicy;
245    /// let policy = RedactionPolicy::with_keys(["chave_pix".to_owned()]);
246    /// assert!(policy.sensitive_key_patterns.iter().any(|p| p == "password"));
247    /// assert!(policy.sensitive_key_patterns.iter().any(|p| p == "chave_pix"));
248    /// ```
249    #[must_use]
250    pub fn with_keys(keys: impl IntoIterator<Item = String>) -> Self {
251        let mut policy = Self::baseline();
252        policy.extend(keys);
253        policy
254    }
255
256    /// Append additional sensitive key patterns to this policy.
257    ///
258    /// Patterns are normalised to lowercase. Duplicates (relative
259    /// to the existing list) are silently dropped.
260    pub fn extend(&mut self, keys: impl IntoIterator<Item = String>) {
261        for key in keys {
262            let lower = key.to_lowercase();
263            if !self.sensitive_key_patterns.contains(&lower) {
264                self.sensitive_key_patterns.push(lower);
265            }
266        }
267    }
268
269    /// No-redaction policy — stores all values as-is.
270    ///
271    /// Suitable only for development and testing. Never use in
272    /// production audit logs.
273    #[must_use]
274    pub fn none() -> Self {
275        Self {
276            input_level: RedactionLevel::None,
277            output_level: RedactionLevel::None,
278            error_level: RedactionLevel::None,
279            sensitive_key_patterns: Vec::new(),
280            sensitive_value_prefixes: Vec::new(),
281            detector: NOOP_DETECTOR.clone(),
282        }
283    }
284
285    /// Full-redaction policy — replaces all input/output/error content.
286    ///
287    /// Suitable for high-security environments where no tool data
288    /// should be stored in audit logs.
289    #[must_use]
290    pub fn full() -> Self {
291        Self {
292            input_level: RedactionLevel::Full,
293            output_level: RedactionLevel::Full,
294            error_level: RedactionLevel::Full,
295            sensitive_key_patterns: Vec::new(),
296            sensitive_value_prefixes: Vec::new(),
297            detector: NOOP_DETECTOR.clone(),
298        }
299    }
300
301    /// Inherent shorthand for [`redact_value`].
302    ///
303    /// Returns a fresh `serde_json::Value` with the policy applied.
304    /// Use [`redact_in_place`](Self::redact_in_place) instead when
305    /// the caller already owns the value and wants to avoid the
306    /// clone.
307    #[must_use]
308    pub fn redact(&self, value: &serde_json::Value) -> serde_json::Value {
309        redact_value(value, self)
310    }
311
312    /// Apply the policy's `input_level` rules to `value` in place.
313    ///
314    /// Mutates `value` directly: object/array contents are walked
315    /// and string leaves are replaced with masked strings without
316    /// cloning the entire tree.
317    pub fn redact_in_place(&self, value: &mut serde_json::Value) {
318        match self.input_level {
319            RedactionLevel::None => {}
320            RedactionLevel::Full => {
321                *value = serde_json::json!(REDACTED_MARKER);
322            }
323            RedactionLevel::Baseline => self.redact_baseline_in_place(value),
324        }
325    }
326
327    fn redact_baseline_in_place(&self, value: &mut serde_json::Value) {
328        match value {
329            serde_json::Value::Object(map) => {
330                for (key, val) in map.iter_mut() {
331                    if self.is_sensitive_key(key) {
332                        *val = serde_json::json!(REDACTED_MARKER);
333                    } else {
334                        self.redact_baseline_in_place(val);
335                    }
336                }
337            }
338            serde_json::Value::Array(arr) => {
339                for v in arr.iter_mut() {
340                    self.redact_baseline_in_place(v);
341                }
342            }
343            serde_json::Value::String(s) => {
344                if self.is_sensitive_value(s) {
345                    *value = serde_json::json!(REDACTED_MARKER);
346                    return;
347                }
348                let spans = self.detector.detect(s);
349                if !spans.is_empty() {
350                    *s = mask_spans(s, &spans);
351                }
352            }
353            _ => {}
354        }
355    }
356
357    /// Check whether a JSON key matches any sensitive key pattern
358    /// (case-insensitive substring match).
359    #[must_use]
360    fn is_sensitive_key(&self, key: &str) -> bool {
361        let lower = key.to_lowercase();
362        self.sensitive_key_patterns
363            .iter()
364            .any(|pattern| lower.contains(pattern.as_str()))
365    }
366
367    /// Check whether a string value matches any sensitive value prefix.
368    #[must_use]
369    fn is_sensitive_value(&self, value: &str) -> bool {
370        self.sensitive_value_prefixes
371            .iter()
372            .any(|prefix| value.starts_with(prefix.as_str()))
373    }
374}
375
376impl Default for RedactionPolicy {
377    /// Returns [`Self::baseline()`] — never an empty policy.
378    ///
379    /// This is loud on purpose: code that derives `Default` on a
380    /// struct containing `RedactionPolicy` gets the baseline
381    /// (sensitive-key list + entity detector) automatically rather
382    /// than an empty pass-through that would silently leak PII.
383    /// Code that wants a genuinely empty policy must opt in via
384    /// [`RedactionPolicy::none`].
385    fn default() -> Self {
386        Self::baseline()
387    }
388}
389
390// ─────────────────────────────────────────────────────────────────────
391// Free redaction functions
392// ─────────────────────────────────────────────────────────────────────
393
394/// Apply redaction rules to a JSON value based on the given policy's
395/// input level.
396///
397/// - [`None`](RedactionLevel::None): returns the value unchanged.
398/// - [`Baseline`](RedactionLevel::Baseline): recursively walks JSON
399///   objects and redacts values whose keys match sensitive patterns,
400///   or string values that match sensitive value prefixes; remaining
401///   string leaves are scanned by [`RedactionPolicy::detector`].
402/// - [`Full`](RedactionLevel::Full): returns `json!("[REDACTED]")`.
403#[must_use]
404pub fn redact_value(value: &serde_json::Value, policy: &RedactionPolicy) -> serde_json::Value {
405    apply_redaction(value, policy.input_level, policy)
406}
407
408/// Apply redaction rules to a string value based on the given policy's
409/// output level.
410///
411/// - [`None`](RedactionLevel::None): returns the string unchanged.
412/// - [`Baseline`](RedactionLevel::Baseline): wholesale-masks if the
413///   string matches any sensitive value prefix; otherwise applies
414///   entity detection and masks individual PII spans
415///   (`[REDACTED:<category>]`) while preserving surrounding context.
416/// - [`Full`](RedactionLevel::Full): returns `"[REDACTED]"`.
417#[must_use]
418pub fn redact_string(value: &str, policy: &RedactionPolicy) -> String {
419    match policy.output_level {
420        RedactionLevel::None => value.to_owned(),
421        RedactionLevel::Baseline => baseline_redact_str(value, &*policy.detector, policy),
422        RedactionLevel::Full => REDACTED_MARKER.to_owned(),
423    }
424}
425
426/// Apply redaction rules to an error string based on the given policy's
427/// error level. Same semantics as [`redact_string`], but gated by
428/// [`RedactionPolicy::error_level`].
429#[must_use]
430pub fn redact_error(value: &str, policy: &RedactionPolicy) -> String {
431    match policy.error_level {
432        RedactionLevel::None => value.to_owned(),
433        RedactionLevel::Baseline => baseline_redact_str(value, &*policy.detector, policy),
434        RedactionLevel::Full => REDACTED_MARKER.to_owned(),
435    }
436}
437
438/// Combined helper for the SDK observability boundary.
439///
440/// Performs a single tree walk that:
441///
442/// 1. Runs the structural [`RedactionPolicy`] (key-name match
443///    wholesale-redacts, sensitive-prefix strings wholesale-redact).
444/// 2. Runs the supplied [`PiiDetector`] on remaining string leaves
445///    (`[REDACTED:<category>]` markers preserve surrounding context).
446///
447/// This signature lets callers reuse a process-wide
448/// `Arc<dyn PiiDetector>` across many call sites without forcing
449/// every policy instance to carry the same detector. The policy's
450/// own [`detector`](RedactionPolicy::detector) field is **not**
451/// consulted by this function — pass it explicitly if that is
452/// the desired behaviour.
453///
454/// Composition contract: running this helper twice produces the
455/// same output as running it once. Already-masked
456/// `[REDACTED]` / `[REDACTED:<category>]` markers are left intact —
457/// the entity detector's regexes do not match them.
458///
459/// Honours `policy.input_level`:
460/// - [`None`](RedactionLevel::None): clones `value` unchanged.
461/// - [`Full`](RedactionLevel::Full): returns `json!("[REDACTED]")`.
462/// - [`Baseline`](RedactionLevel::Baseline): structural + entity
463///   detection as described above.
464#[must_use]
465pub fn redact_for_observability(
466    value: &serde_json::Value,
467    policy: &RedactionPolicy,
468    detector: &dyn PiiDetector,
469) -> serde_json::Value {
470    match policy.input_level {
471        RedactionLevel::None => value.clone(),
472        RedactionLevel::Full => serde_json::json!(REDACTED_MARKER),
473        RedactionLevel::Baseline => redact_baseline_with_detector(value, policy, detector),
474    }
475}
476
477// ─────────────────────────────────────────────────────────────────────
478// Internal helpers
479// ─────────────────────────────────────────────────────────────────────
480
481/// Shared baseline redaction for a plain string: prefix-match first
482/// (wholesale), then entity detection (span-level).
483fn baseline_redact_str(
484    value: &str,
485    detector: &dyn PiiDetector,
486    policy: &RedactionPolicy,
487) -> String {
488    if policy.is_sensitive_value(value) {
489        return REDACTED_MARKER.to_owned();
490    }
491    let spans = detector.detect(value);
492    if spans.is_empty() {
493        value.to_owned()
494    } else {
495        mask_spans(value, &spans)
496    }
497}
498
499/// Internal recursive redaction for JSON values.
500fn apply_redaction(
501    value: &serde_json::Value,
502    level: RedactionLevel,
503    policy: &RedactionPolicy,
504) -> serde_json::Value {
505    match level {
506        RedactionLevel::None => value.clone(),
507        RedactionLevel::Full => serde_json::json!(REDACTED_MARKER),
508        RedactionLevel::Baseline => redact_baseline(value, policy),
509    }
510}
511
512/// Baseline redaction using the policy's bundled detector.
513fn redact_baseline(value: &serde_json::Value, policy: &RedactionPolicy) -> serde_json::Value {
514    redact_baseline_with_detector(value, policy, &*policy.detector)
515}
516
517/// Baseline redaction with an externally supplied detector. Recursively
518/// walks JSON and redacts sensitive keys (wholesale), sensitive value
519/// prefixes (wholesale), and any entity-level PII detected within
520/// remaining string leaves (span-level).
521fn redact_baseline_with_detector(
522    value: &serde_json::Value,
523    policy: &RedactionPolicy,
524    detector: &dyn PiiDetector,
525) -> serde_json::Value {
526    match value {
527        serde_json::Value::Object(map) => {
528            let mut redacted = serde_json::Map::new();
529            for (key, val) in map {
530                if policy.is_sensitive_key(key) {
531                    redacted.insert(key.clone(), serde_json::json!(REDACTED_MARKER));
532                } else {
533                    redacted.insert(
534                        key.clone(),
535                        redact_baseline_with_detector(val, policy, detector),
536                    );
537                }
538            }
539            serde_json::Value::Object(redacted)
540        }
541        serde_json::Value::Array(arr) => serde_json::Value::Array(
542            arr.iter()
543                .map(|v| redact_baseline_with_detector(v, policy, detector))
544                .collect(),
545        ),
546        serde_json::Value::String(s) => {
547            if policy.is_sensitive_value(s) {
548                return serde_json::json!(REDACTED_MARKER);
549            }
550            let spans = detector.detect(s);
551            if spans.is_empty() {
552                value.clone()
553            } else {
554                serde_json::Value::String(mask_spans(s, &spans))
555            }
556        }
557        _ => value.clone(),
558    }
559}
560
561// ─────────────────────────────────────────────────────────────────────
562// Tests
563// ─────────────────────────────────────────────────────────────────────
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568    use crate::privacy::BaselineDetector;
569
570    // ── RedactionLevel ──────────────────────────────────────────
571
572    #[test]
573    fn redaction_level_round_trips_through_json() -> serde_json::Result<()> {
574        for level in [
575            RedactionLevel::None,
576            RedactionLevel::Baseline,
577            RedactionLevel::Full,
578        ] {
579            let json = serde_json::to_string(&level)?;
580            let back: RedactionLevel = serde_json::from_str(&json)?;
581            assert_eq!(back, level);
582        }
583        Ok(())
584    }
585
586    // ── RedactionPolicy construction ────────────────────────────
587
588    #[test]
589    fn baseline_policy_has_expected_defaults() {
590        let policy = RedactionPolicy::baseline();
591        assert_eq!(policy.input_level, RedactionLevel::Baseline);
592        assert_eq!(policy.output_level, RedactionLevel::Baseline);
593        // Errors default to Baseline too — stack traces often leak
594        // user PII and we'd rather mask by default than ship raw.
595        assert_eq!(policy.error_level, RedactionLevel::Baseline);
596        assert!(!policy.sensitive_key_patterns.is_empty());
597        assert!(!policy.sensitive_value_prefixes.is_empty());
598    }
599
600    #[test]
601    fn default_impl_returns_baseline_not_empty() {
602        // Hard-rule: Default must not produce a silent pass-through
603        // policy. It must equal baseline() in every observable way.
604        let default_policy = RedactionPolicy::default();
605        let baseline = RedactionPolicy::baseline();
606        assert_eq!(default_policy.input_level, baseline.input_level);
607        assert_eq!(
608            default_policy.sensitive_key_patterns,
609            baseline.sensitive_key_patterns
610        );
611        assert_eq!(
612            default_policy.sensitive_value_prefixes,
613            baseline.sensitive_value_prefixes
614        );
615    }
616
617    #[test]
618    fn none_policy_has_no_redaction() {
619        let policy = RedactionPolicy::none();
620        assert_eq!(policy.input_level, RedactionLevel::None);
621        assert_eq!(policy.output_level, RedactionLevel::None);
622        assert_eq!(policy.error_level, RedactionLevel::None);
623    }
624
625    #[test]
626    fn full_policy_redacts_everything() {
627        let policy = RedactionPolicy::full();
628        assert_eq!(policy.input_level, RedactionLevel::Full);
629        assert_eq!(policy.output_level, RedactionLevel::Full);
630        assert_eq!(policy.error_level, RedactionLevel::Full);
631    }
632
633    #[test]
634    fn policy_round_trips_through_json() -> serde_json::Result<()> {
635        let policy = RedactionPolicy::baseline();
636        let json = serde_json::to_string(&policy)?;
637        let back: RedactionPolicy = serde_json::from_str(&json)?;
638        assert_eq!(back.input_level, policy.input_level);
639        assert_eq!(
640            back.sensitive_key_patterns.len(),
641            policy.sensitive_key_patterns.len(),
642        );
643        Ok(())
644    }
645
646    // ── with_keys / extend ──────────────────────────────────────
647
648    #[test]
649    fn with_keys_includes_baseline_and_custom_keys() {
650        let policy = RedactionPolicy::with_keys(["chave_pix".to_owned()]);
651        // Baseline keys are still present.
652        assert!(
653            policy
654                .sensitive_key_patterns
655                .iter()
656                .any(|k| k == "password")
657        );
658        assert!(policy.sensitive_key_patterns.iter().any(|k| k == "api_key"));
659        // Custom key was added.
660        assert!(
661            policy
662                .sensitive_key_patterns
663                .iter()
664                .any(|k| k == "chave_pix")
665        );
666        // Both kinds redact correctly.
667        let input = serde_json::json!({
668            "chave_pix": "abc-123",
669            "password": "secret",
670            "ok": "visible",
671        });
672        let redacted = redact_value(&input, &policy);
673        assert_eq!(redacted["chave_pix"], REDACTED_MARKER);
674        assert_eq!(redacted["password"], REDACTED_MARKER);
675        assert_eq!(redacted["ok"], "visible");
676    }
677
678    #[test]
679    fn with_keys_normalises_case() {
680        // Custom keys get lower-cased so the case-insensitive
681        // contains() matcher still works.
682        let policy = RedactionPolicy::with_keys(["Chave_Pix".to_owned()]);
683        let input = serde_json::json!({ "CHAVE_PIX": "abc-123" });
684        let redacted = redact_value(&input, &policy);
685        assert_eq!(redacted["CHAVE_PIX"], REDACTED_MARKER);
686    }
687
688    #[test]
689    fn extend_appends_keys_to_existing_policy() {
690        let mut policy = RedactionPolicy::baseline();
691        let baseline_len = policy.sensitive_key_patterns.len();
692        policy.extend(["chave_pix".to_owned(), "internal_id".to_owned()]);
693        assert_eq!(policy.sensitive_key_patterns.len(), baseline_len + 2);
694        let input = serde_json::json!({ "internal_id": "xyz" });
695        let redacted = redact_value(&input, &policy);
696        assert_eq!(redacted["internal_id"], REDACTED_MARKER);
697    }
698
699    #[test]
700    fn extend_drops_duplicates() {
701        let mut policy = RedactionPolicy::baseline();
702        let baseline_len = policy.sensitive_key_patterns.len();
703        // "PASSWORD" lower-cased duplicates the existing "password".
704        policy.extend(["PASSWORD".to_owned()]);
705        assert_eq!(policy.sensitive_key_patterns.len(), baseline_len);
706    }
707
708    // ── inherent redact / redact_in_place ───────────────────────
709
710    #[test]
711    fn redact_method_matches_redact_value() {
712        let policy = RedactionPolicy::baseline();
713        let input = serde_json::json!({
714            "api_key": "sk-abc",
715            "name": "test",
716        });
717        assert_eq!(policy.redact(&input), redact_value(&input, &policy));
718    }
719
720    #[test]
721    fn redact_in_place_mutates_in_place() {
722        let policy = RedactionPolicy::baseline();
723        let mut value = serde_json::json!({
724            "api_key": "sk-abc",
725            "nested": {
726                "password": "shh",
727                "name": "ok",
728            },
729            "note": "CPF 111.444.777-35 attached",
730        });
731        policy.redact_in_place(&mut value);
732        assert_eq!(value["api_key"], REDACTED_MARKER);
733        assert_eq!(value["nested"]["password"], REDACTED_MARKER);
734        assert_eq!(value["nested"]["name"], "ok");
735        let note = value["note"].as_str().expect("note remains a string");
736        assert!(note.contains("[REDACTED:cpf]"), "got: {note}");
737    }
738
739    #[test]
740    fn redact_in_place_handles_full_level() {
741        let policy = RedactionPolicy::full();
742        let mut value = serde_json::json!({"a": 1, "b": "two"});
743        policy.redact_in_place(&mut value);
744        assert_eq!(value, serde_json::json!(REDACTED_MARKER));
745    }
746
747    #[test]
748    fn redact_in_place_handles_none_level() {
749        let policy = RedactionPolicy::none();
750        let original = serde_json::json!({"api_key": "sk-abc", "ok": "vis"});
751        let mut value = original.clone();
752        policy.redact_in_place(&mut value);
753        assert_eq!(value, original);
754    }
755
756    // ── redact_value: none level ────────────────────────────────
757
758    #[test]
759    fn none_level_preserves_all_values() {
760        let policy = RedactionPolicy::none();
761        let input = serde_json::json!({
762            "password": "secret123",
763            "api_key": "sk-abc",
764            "normal": "hello",
765        });
766        let result = redact_value(&input, &policy);
767        assert_eq!(result, input);
768    }
769
770    // ── redact_value: full level ────────────────────────────────
771
772    #[test]
773    fn full_level_redacts_entire_value() {
774        let policy = RedactionPolicy::full();
775        let input = serde_json::json!({
776            "command": "echo hello",
777            "data": [1, 2, 3],
778        });
779        let result = redact_value(&input, &policy);
780        assert_eq!(result, serde_json::json!(REDACTED_MARKER));
781    }
782
783    // ── redact_value: baseline level ────────────────────────────
784
785    #[test]
786    fn baseline_redacts_sensitive_keys() {
787        let policy = RedactionPolicy::baseline();
788        let input = serde_json::json!({
789            "command": "echo hello",
790            "password": "secret123",
791            "api_key": "sk-abc",
792            "normal_field": "visible",
793        });
794        let result = redact_value(&input, &policy);
795
796        assert_eq!(result["command"], "echo hello");
797        assert_eq!(result["password"], REDACTED_MARKER);
798        assert_eq!(result["api_key"], REDACTED_MARKER);
799        assert_eq!(result["normal_field"], "visible");
800    }
801
802    #[test]
803    fn baseline_redacts_case_insensitively() {
804        let policy = RedactionPolicy::baseline();
805        let input = serde_json::json!({
806            "Password": "secret",
807            "API_KEY": "key",
808            "Authorization": "Bearer xyz",
809        });
810        let result = redact_value(&input, &policy);
811
812        assert_eq!(result["Password"], REDACTED_MARKER);
813        assert_eq!(result["API_KEY"], REDACTED_MARKER);
814        assert_eq!(result["Authorization"], REDACTED_MARKER);
815    }
816
817    #[test]
818    fn baseline_redacts_sensitive_value_prefixes() {
819        let policy = RedactionPolicy::baseline();
820        let input = serde_json::json!({
821            "header": "Bearer eyJ...",
822            "key": "sk-abc123",
823            "normal": "just a string",
824        });
825        let result = redact_value(&input, &policy);
826
827        assert_eq!(result["header"], REDACTED_MARKER);
828        assert_eq!(result["key"], REDACTED_MARKER);
829        assert_eq!(result["normal"], "just a string");
830    }
831
832    #[test]
833    fn baseline_recurses_into_nested_objects() {
834        let policy = RedactionPolicy::baseline();
835        let input = serde_json::json!({
836            "config": {
837                "api_key": "sk-nested",
838                "endpoint": "https://example.com",
839            },
840            "name": "test",
841        });
842        let result = redact_value(&input, &policy);
843
844        assert_eq!(result["config"]["api_key"], REDACTED_MARKER);
845        assert_eq!(result["config"]["endpoint"], "https://example.com");
846        assert_eq!(result["name"], "test");
847    }
848
849    #[test]
850    fn baseline_recurses_into_arrays() {
851        let policy = RedactionPolicy::baseline();
852        let input = serde_json::json!([
853            {"password": "secret", "name": "test"},
854            {"token": "abc", "data": 42},
855        ]);
856        let result = redact_value(&input, &policy);
857
858        assert_eq!(result[0]["password"], REDACTED_MARKER);
859        assert_eq!(result[0]["name"], "test");
860        assert_eq!(result[1]["token"], REDACTED_MARKER);
861        assert_eq!(result[1]["data"], 42);
862    }
863
864    #[test]
865    fn baseline_preserves_non_string_values() {
866        let policy = RedactionPolicy::baseline();
867        let input = serde_json::json!({
868            "count": 42,
869            "active": true,
870            "ratio": 2.72,
871            "empty": null,
872        });
873        let result = redact_value(&input, &policy);
874        assert_eq!(result, input);
875    }
876
877    #[test]
878    fn redact_value_is_noop_for_explicit_null() {
879        let policy = RedactionPolicy::baseline();
880        let input = serde_json::Value::Null;
881        let result = redact_value(&input, &policy);
882        assert_eq!(result, serde_json::Value::Null);
883    }
884
885    // ── redact_string ───────────────────────────────────────────
886
887    #[test]
888    fn redact_string_none_preserves() {
889        let policy = RedactionPolicy::none();
890        assert_eq!(redact_string("Bearer token123", &policy), "Bearer token123");
891    }
892
893    #[test]
894    fn redact_string_baseline_masks_sensitive() {
895        let policy = RedactionPolicy::baseline();
896        assert_eq!(redact_string("Bearer token123", &policy), REDACTED_MARKER);
897        assert_eq!(redact_string("sk-abc123", &policy), REDACTED_MARKER);
898        assert_eq!(
899            redact_string("just normal output", &policy),
900            "just normal output"
901        );
902    }
903
904    #[test]
905    fn redact_string_full_masks_everything() {
906        let policy = RedactionPolicy::full();
907        assert_eq!(
908            redact_string("totally safe output", &policy),
909            REDACTED_MARKER
910        );
911    }
912
913    // ── redact_error ────────────────────────────────────────────
914
915    #[test]
916    fn redact_error_baseline_preserves_non_pii() {
917        // Baseline error_level masks PII but leaves ordinary
918        // operational messages untouched.
919        let policy = RedactionPolicy::baseline();
920        assert_eq!(
921            redact_error("connection timeout after 30s", &policy),
922            "connection timeout after 30s"
923        );
924    }
925
926    #[test]
927    fn redact_error_baseline_masks_pii_by_default() {
928        // New default (ERROR level = Baseline) — PII in error strings
929        // gets entity-masked without requiring an explicit opt-in.
930        let policy = RedactionPolicy::baseline();
931        let masked = redact_error(
932            "Failed to process order for user CPF 111.444.777-35",
933            &policy,
934        );
935        assert!(masked.contains("[REDACTED:cpf]"), "got: {masked}");
936        assert!(!masked.contains("111.444.777-35"));
937    }
938
939    #[test]
940    fn redact_error_explicit_none_passes_through() {
941        // Callers that need raw errors can opt out of the default.
942        let policy = RedactionPolicy {
943            error_level: RedactionLevel::None,
944            ..RedactionPolicy::baseline()
945        };
946        let raw = "Failed to process order for user CPF 111.444.777-35";
947        assert_eq!(redact_error(raw, &policy), raw);
948    }
949
950    #[test]
951    fn redact_error_full_masks() {
952        let policy = RedactionPolicy::full();
953        assert_eq!(
954            redact_error("internal error details", &policy),
955            REDACTED_MARKER
956        );
957    }
958
959    // ── Sensitive key detection ─────────────────────────────────
960
961    #[test]
962    fn sensitive_key_detection() {
963        let policy = RedactionPolicy::baseline();
964
965        // Should match
966        assert!(policy.is_sensitive_key("password"));
967        assert!(policy.is_sensitive_key("user_password"));
968        assert!(policy.is_sensitive_key("api_key"));
969        assert!(policy.is_sensitive_key("MY_API_KEY"));
970        assert!(policy.is_sensitive_key("Authorization"));
971        assert!(policy.is_sensitive_key("session_id"));
972        assert!(policy.is_sensitive_key("private_key"));
973        assert!(policy.is_sensitive_key("access_key_id"));
974
975        // Should not match — guards against overly-short substring patterns
976        assert!(!policy.is_sensitive_key("username"));
977        assert!(!policy.is_sensitive_key("command"));
978        assert!(!policy.is_sensitive_key("amount"));
979        assert!(!policy.is_sensitive_key("path"));
980        assert!(!policy.is_sensitive_key("args"));
981        assert!(!policy.is_sensitive_key("target"));
982        assert!(!policy.is_sensitive_key("author"));
983        assert!(!policy.is_sensitive_key("org_id"));
984        assert!(!policy.is_sensitive_key("merge"));
985    }
986
987    // ── Sensitive value detection ───────────────────────────────
988
989    #[test]
990    fn sensitive_value_detection() {
991        let policy = RedactionPolicy::baseline();
992
993        // Should match
994        assert!(policy.is_sensitive_value("Bearer eyJhbGciOiJIUzI1NiJ9"));
995        assert!(policy.is_sensitive_value("sk-abc123def456"));
996        assert!(policy.is_sensitive_value("ghp_xxxxxxxxxxxx"));
997        assert!(policy.is_sensitive_value("xoxb-token-value"));
998        assert!(policy.is_sensitive_value("AKIAIOSFODNN7EXAMPLE"));
999
1000        // Should not match
1001        assert!(!policy.is_sensitive_value("hello world"));
1002        assert!(!policy.is_sensitive_value("echo test"));
1003        assert!(!policy.is_sensitive_value("123.45"));
1004    }
1005
1006    // ── Edge cases ──────────────────────────────────────────────
1007
1008    #[test]
1009    fn redact_empty_object() {
1010        let policy = RedactionPolicy::baseline();
1011        let input = serde_json::json!({});
1012        let result = redact_value(&input, &policy);
1013        assert_eq!(result, serde_json::json!({}));
1014    }
1015
1016    #[test]
1017    fn redact_empty_array() {
1018        let policy = RedactionPolicy::baseline();
1019        let input = serde_json::json!([]);
1020        let result = redact_value(&input, &policy);
1021        assert_eq!(result, serde_json::json!([]));
1022    }
1023
1024    #[test]
1025    fn redact_scalar_string() {
1026        let policy = RedactionPolicy::baseline();
1027        let input = serde_json::json!("sk-secret");
1028        let result = redact_value(&input, &policy);
1029        assert_eq!(result, serde_json::json!(REDACTED_MARKER));
1030    }
1031
1032    #[test]
1033    fn redact_scalar_number() {
1034        let policy = RedactionPolicy::baseline();
1035        let input = serde_json::json!(42);
1036        let result = redact_value(&input, &policy);
1037        assert_eq!(result, serde_json::json!(42));
1038    }
1039
1040    #[test]
1041    fn deeply_nested_redaction() {
1042        let policy = RedactionPolicy::baseline();
1043        let input = serde_json::json!({
1044            "level1": {
1045                "level2": {
1046                    "level3": {
1047                        "api_key": "sk-deep",
1048                        "value": "safe",
1049                    }
1050                }
1051            }
1052        });
1053        let result = redact_value(&input, &policy);
1054        assert_eq!(
1055            result["level1"]["level2"]["level3"]["api_key"],
1056            REDACTED_MARKER,
1057        );
1058        assert_eq!(result["level1"]["level2"]["level3"]["value"], "safe");
1059    }
1060
1061    #[test]
1062    fn non_ascii_keys_do_not_panic() {
1063        // Non-ASCII keys must not crash the case-insensitive
1064        // matcher — they simply do not match the lowercase ASCII
1065        // baseline patterns.
1066        let policy = RedactionPolicy::baseline();
1067        let input = serde_json::json!({
1068            "contraseña": "secret",
1069            "密码": "shh",
1070            "ok": "visible",
1071        });
1072        let result = redact_value(&input, &policy);
1073        // Patterns are ASCII; non-ASCII keys are not matched.
1074        assert_eq!(result["contraseña"], "secret");
1075        assert_eq!(result["密码"], "shh");
1076        assert_eq!(result["ok"], "visible");
1077    }
1078
1079    // ── Entity-level detection via the plugged-in PiiDetector ──
1080
1081    #[test]
1082    fn baseline_masks_email_in_non_sensitive_string_value() {
1083        let policy = RedactionPolicy::baseline();
1084        let input = serde_json::json!({
1085            "note": "forward to ana.silva+tag@example.com please"
1086        });
1087        let result = redact_value(&input, &policy);
1088        let note = result["note"].as_str().expect("note is string");
1089        assert!(note.contains("[REDACTED:email]"), "got: {note}");
1090        assert!(!note.contains("ana.silva+tag@example.com"));
1091    }
1092
1093    #[test]
1094    fn baseline_masks_cpf_in_freeform_text() {
1095        let policy = RedactionPolicy::baseline();
1096        let input = serde_json::json!({
1097            "description": "confirmou pelo CPF 111.444.777-35 ontem"
1098        });
1099        let result = redact_value(&input, &policy);
1100        let desc = result["description"].as_str().expect("desc is string");
1101        assert!(desc.contains("[REDACTED:cpf]"), "got: {desc}");
1102        assert!(!desc.contains("111.444.777-35"));
1103    }
1104
1105    #[test]
1106    fn baseline_masks_cnpj_in_freeform_text() {
1107        let policy = RedactionPolicy::baseline();
1108        let input = serde_json::json!({
1109            "description": "pagar CNPJ 11.222.333/0001-81 até sexta"
1110        });
1111        let result = redact_value(&input, &policy);
1112        let desc = result["description"].as_str().expect("desc is string");
1113        assert!(desc.contains("[REDACTED:cnpj]"), "got: {desc}");
1114    }
1115
1116    #[test]
1117    fn baseline_masks_luhn_valid_pan_in_tool_output() {
1118        let policy = RedactionPolicy::baseline();
1119        let output = "charged card 4111 1111 1111 1111 successfully for 150 BRL";
1120        let result = redact_string(output, &policy);
1121        assert!(result.contains("[REDACTED:credit_card]"), "got: {result}");
1122        assert!(!result.contains("4111 1111 1111 1111"));
1123    }
1124
1125    #[test]
1126    fn baseline_does_not_mask_luhn_invalid_digits() {
1127        // 16 digits that aren't Luhn-valid — must not be flagged as a PAN.
1128        let policy = RedactionPolicy::baseline();
1129        let output = "order 1234 5678 9012 3456 processed";
1130        let result = redact_string(output, &policy);
1131        assert!(
1132            !result.contains("[REDACTED:"),
1133            "false positive on non-PAN digits: {result}"
1134        );
1135    }
1136
1137    #[test]
1138    fn baseline_masks_embedded_secret_token() {
1139        // The wholesale prefix check only fires when the WHOLE string
1140        // starts with a prefix. Embedded secrets rely on the entity
1141        // detector's SecretDetector component.
1142        let policy = RedactionPolicy::baseline();
1143        let output = "deploy failed: key=sk-abcdefghijklmnopqrstuv rejected";
1144        let result = redact_string(output, &policy);
1145        assert!(result.contains("[REDACTED:secret]"), "got: {result}");
1146    }
1147
1148    #[test]
1149    fn baseline_preserves_wholesale_prefix_behaviour() {
1150        // A string that STARTS with a sensitive prefix still falls
1151        // into the wholesale `[REDACTED]` path — entity detection
1152        // does not override that stronger signal.
1153        let policy = RedactionPolicy::baseline();
1154        let result = redact_string("sk-abc123def456ghi789jkl", &policy);
1155        assert_eq!(result, REDACTED_MARKER);
1156    }
1157
1158    #[test]
1159    fn baseline_masks_pii_in_nested_string_leaves() {
1160        let policy = RedactionPolicy::baseline();
1161        let input = serde_json::json!({
1162            "audit_log": [
1163                {
1164                    "actor": "system",
1165                    "details": "user CPF 111.444.777-35 contacted from 192.168.1.100"
1166                }
1167            ]
1168        });
1169        let result = redact_value(&input, &policy);
1170        let details = result["audit_log"][0]["details"]
1171            .as_str()
1172            .expect("details string");
1173        assert!(details.contains("[REDACTED:cpf]"), "got: {details}");
1174        assert!(details.contains("[REDACTED:ip_address]"), "got: {details}");
1175    }
1176
1177    #[test]
1178    fn sensitive_key_match_wins_over_entity_detection() {
1179        // Values under a sensitive key still get wholesale `[REDACTED]`
1180        // — not a partial entity mask. Preserves the pre-upgrade
1181        // contract.
1182        let policy = RedactionPolicy::baseline();
1183        let input = serde_json::json!({
1184            "api_key": "sk-leaky",
1185            "access_token": "Bearer eyJ..."
1186        });
1187        let result = redact_value(&input, &policy);
1188        assert_eq!(result["api_key"], REDACTED_MARKER);
1189        assert_eq!(result["access_token"], REDACTED_MARKER);
1190    }
1191
1192    #[test]
1193    fn none_policy_performs_no_entity_detection() {
1194        let policy = RedactionPolicy::none();
1195        let input = serde_json::json!({
1196            "note": "CPF 111.444.777-35 email a@b.co"
1197        });
1198        let result = redact_value(&input, &policy);
1199        assert_eq!(result, input, "none policy must not mutate input");
1200    }
1201
1202    #[test]
1203    fn deserialized_policy_retains_baseline_entity_detection() -> serde_json::Result<()> {
1204        // The detector field is `#[serde(skip)]`. After a round-trip
1205        // through JSON, the policy must still perform entity
1206        // detection via the default BaselineDetector.
1207        let policy = RedactionPolicy::baseline();
1208        let json = serde_json::to_string(&policy)?;
1209        let back: RedactionPolicy = serde_json::from_str(&json)?;
1210        let result = redact_string("pix para CPF 111.444.777-35 agora", &back);
1211        assert!(
1212            result.contains("[REDACTED:cpf]"),
1213            "deserialized policy stopped detecting CPF: {result}"
1214        );
1215        Ok(())
1216    }
1217
1218    #[test]
1219    fn error_level_baseline_masks_entities_in_stack_trace() {
1220        // Opt-in: callers can flip error_level to Baseline and the
1221        // detector applies to error strings too.
1222        let policy = RedactionPolicy {
1223            error_level: RedactionLevel::Baseline,
1224            ..RedactionPolicy::baseline()
1225        };
1226        let trace = "NotFound: user with CPF 111.444.777-35 missing in table users";
1227        let result = redact_error(trace, &policy);
1228        assert!(result.contains("[REDACTED:cpf]"), "got: {result}");
1229    }
1230
1231    // ── redact_for_observability ────────────────────────────────
1232
1233    #[test]
1234    fn redact_for_observability_runs_structural_then_pii() -> Result<(), regex::Error> {
1235        let policy = RedactionPolicy::baseline();
1236        let detector = BaselineDetector::new()?;
1237        let input = serde_json::json!({
1238            "api_key": "sk-leaky",
1239            "details": "user CPF 111.444.777-35 in table users",
1240            "ok": "visible",
1241        });
1242        let result = redact_for_observability(&input, &policy, &detector);
1243        assert_eq!(result["api_key"], REDACTED_MARKER);
1244        let details = result["details"].as_str().expect("string");
1245        assert!(details.contains("[REDACTED:cpf]"), "got: {details}");
1246        assert_eq!(result["ok"], "visible");
1247        Ok(())
1248    }
1249
1250    #[test]
1251    fn redact_for_observability_idempotent_on_already_masked() -> Result<(), regex::Error> {
1252        let policy = RedactionPolicy::baseline();
1253        let detector = BaselineDetector::new()?;
1254        let input = serde_json::json!({
1255            "details": "user CPF 111.444.777-35 contacted",
1256        });
1257        let once = redact_for_observability(&input, &policy, &detector);
1258        let twice = redact_for_observability(&once, &policy, &detector);
1259        // Running the helper a second time over already-masked
1260        // output is a no-op — no double masking, no panic.
1261        assert_eq!(once, twice);
1262        Ok(())
1263    }
1264
1265    #[test]
1266    fn redact_for_observability_honours_full_level() -> Result<(), regex::Error> {
1267        let policy = RedactionPolicy::full();
1268        let detector = BaselineDetector::new()?;
1269        let input = serde_json::json!({"a": "b"});
1270        let result = redact_for_observability(&input, &policy, &detector);
1271        assert_eq!(result, serde_json::json!(REDACTED_MARKER));
1272        Ok(())
1273    }
1274
1275    #[test]
1276    fn redact_for_observability_honours_none_level() -> Result<(), regex::Error> {
1277        let policy = RedactionPolicy::none();
1278        let detector = BaselineDetector::new()?;
1279        let input = serde_json::json!({
1280            "api_key": "sk-leaky",
1281            "note": "CPF 111.444.777-35",
1282        });
1283        let result = redact_for_observability(&input, &policy, &detector);
1284        assert_eq!(result, input);
1285        Ok(())
1286    }
1287}