Skip to main content

chio_guards/
response_sanitization.rs

1//! Response sanitization guard -- scans tool results for secrets, PII/PHI,
2//! and other sensitive data, then redacts them before the agent sees them.
3//!
4//! This module exposes two layered APIs:
5//!
6//! - A simple, backwards-compatible [`ResponseSanitizationGuard`] that uses a
7//!   small fixed pattern set and Block/Redact binary actions.
8//! - A full-featured [`OutputSanitizer`] that ports the ClawdStrike output
9//!   sanitizer: secret detectors (AWS, GitHub, Slack, GCP service-account
10//!   JSON, passwords, PEM private keys, JWTs, OAuth bearer tokens),
11//!   credit-card numbers with Luhn validation, US SSNs, Shannon-entropy
12//!   high-entropy token detection, configurable allowlist/denylist,
13//!   deterministic overlap resolution (longest-match-wins with strategy
14//!   ranking), and four redaction strategies: `Mask`, `Fingerprint`, `Drop`,
15//!   `Tokenize`, plus `Partial`, `TypeLabel`, and `Keep`.
16//!
17//! The guard fails closed: if pattern compilation fails or an internal error
18//! occurs, the response is blocked.
19
20use std::collections::HashMap;
21use std::sync::{Arc, Mutex, OnceLock};
22
23use regex::Regex;
24use serde::{Deserialize, Serialize};
25use sha2::{Digest, Sha256};
26
27use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
28
29// ===========================================================================
30// Backwards-compatible simple API.
31// ===========================================================================
32
33/// Classification level for a detected pattern.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum SensitivityLevel {
36    /// Low sensitivity -- may produce false positives (e.g., phone numbers).
37    Low,
38    /// Medium sensitivity -- likely PII (e.g., email addresses).
39    Medium,
40    /// High sensitivity -- definite PII/PHI (e.g., SSN, medical record numbers).
41    High,
42}
43
44/// A named pattern that matches sensitive data.
45#[derive(Debug, Clone)]
46pub struct SensitivePattern {
47    /// Human-readable name for the pattern.
48    pub name: String,
49    /// The compiled regex.
50    regex: Regex,
51    /// Classification level.
52    pub level: SensitivityLevel,
53    /// Replacement string for redaction.
54    pub redaction: String,
55}
56
57/// Action to take when sensitive data is detected by the simple guard.
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum SanitizationAction {
60    /// Block the response entirely.
61    Block,
62    /// Redact the matching patterns and allow the response.
63    Redact,
64}
65
66fn default_patterns() -> Vec<SensitivePattern> {
67    let mut patterns = Vec::new();
68
69    if let Ok(regex) = Regex::new(r"\b\d{3}-\d{2}-\d{4}\b") {
70        patterns.push(SensitivePattern {
71            name: "SSN".to_string(),
72            regex,
73            level: SensitivityLevel::High,
74            redaction: "[SSN REDACTED]".to_string(),
75        });
76    }
77
78    if let Ok(regex) = Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b") {
79        patterns.push(SensitivePattern {
80            name: "email".to_string(),
81            regex,
82            level: SensitivityLevel::Medium,
83            redaction: "[EMAIL REDACTED]".to_string(),
84        });
85    }
86
87    if let Ok(regex) = Regex::new(r"\b(?:\(\d{3}\)\s*|\d{3}[-.])\d{3}[-.]?\d{4}\b") {
88        patterns.push(SensitivePattern {
89            name: "phone".to_string(),
90            regex,
91            level: SensitivityLevel::Low,
92            redaction: "[PHONE REDACTED]".to_string(),
93        });
94    }
95
96    if let Ok(regex) = Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b") {
97        patterns.push(SensitivePattern {
98            name: "credit-card".to_string(),
99            regex,
100            level: SensitivityLevel::High,
101            redaction: "[CARD REDACTED]".to_string(),
102        });
103    }
104
105    if let Ok(regex) = Regex::new(r"\b(?:\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})\b") {
106        patterns.push(SensitivePattern {
107            name: "date-of-birth".to_string(),
108            regex,
109            level: SensitivityLevel::Low,
110            redaction: "[DATE REDACTED]".to_string(),
111        });
112    }
113
114    if let Ok(regex) = Regex::new(r"\bMRN[:\s#]*\d{6,12}\b") {
115        patterns.push(SensitivePattern {
116            name: "MRN".to_string(),
117            regex,
118            level: SensitivityLevel::High,
119            redaction: "[MRN REDACTED]".to_string(),
120        });
121    }
122
123    if let Ok(regex) = Regex::new(r"\b[A-Z]\d{2}(?:\.\d{1,4})?\b") {
124        patterns.push(SensitivePattern {
125            name: "ICD-10".to_string(),
126            regex,
127            level: SensitivityLevel::Medium,
128            redaction: "[ICD REDACTED]".to_string(),
129        });
130    }
131
132    patterns
133}
134
135/// Guard that scans responses for PII/PHI patterns and redacts or blocks them.
136pub struct ResponseSanitizationGuard {
137    patterns: Vec<SensitivePattern>,
138    min_level: SensitivityLevel,
139    action: SanitizationAction,
140}
141
142impl ResponseSanitizationGuard {
143    pub fn new(min_level: SensitivityLevel, action: SanitizationAction) -> Self {
144        Self {
145            patterns: default_patterns(),
146            min_level,
147            action,
148        }
149    }
150
151    pub fn with_patterns(
152        patterns: Vec<SensitivePattern>,
153        min_level: SensitivityLevel,
154        action: SanitizationAction,
155    ) -> Self {
156        Self {
157            patterns,
158            min_level,
159            action,
160        }
161    }
162
163    pub fn with_additional_patterns(
164        additional_patterns: Vec<SensitivePattern>,
165        min_level: SensitivityLevel,
166        action: SanitizationAction,
167    ) -> Self {
168        let mut patterns = default_patterns();
169        patterns.extend(additional_patterns);
170        Self {
171            patterns,
172            min_level,
173            action,
174        }
175    }
176
177    pub fn scan(&self, text: &str) -> Vec<(String, String)> {
178        let mut findings = Vec::new();
179        for pattern in &self.patterns {
180            if level_ord(pattern.level) < level_ord(self.min_level) {
181                continue;
182            }
183            for m in pattern.regex.find_iter(text) {
184                findings.push((pattern.name.clone(), m.as_str().to_string()));
185            }
186        }
187        findings
188    }
189
190    pub fn redact(&self, text: &str) -> (String, usize) {
191        let mut result = text.to_string();
192        let mut count = 0usize;
193        for pattern in &self.patterns {
194            if level_ord(pattern.level) < level_ord(self.min_level) {
195                continue;
196            }
197            let match_count = pattern.regex.find_iter(&result).count();
198            if match_count > 0 {
199                result = pattern
200                    .regex
201                    .replace_all(&result, pattern.redaction.as_str())
202                    .to_string();
203                count = count.saturating_add(match_count);
204            }
205        }
206        (result, count)
207    }
208
209    pub fn scan_response(&self, response: &serde_json::Value) -> ScanResult {
210        let text = response.to_string();
211        let findings = self.scan(&text);
212        if findings.is_empty() {
213            return ScanResult::Clean;
214        }
215        match self.action {
216            SanitizationAction::Block => ScanResult::Blocked(findings),
217            SanitizationAction::Redact => {
218                let (redacted, count) = self.redact(&text);
219                ScanResult::Redacted {
220                    redacted_text: redacted,
221                    redaction_count: count,
222                    findings,
223                }
224            }
225        }
226    }
227}
228
229#[derive(Debug)]
230pub enum ScanResult {
231    Clean,
232    Blocked(Vec<(String, String)>),
233    Redacted {
234        redacted_text: String,
235        redaction_count: usize,
236        findings: Vec<(String, String)>,
237    },
238}
239
240fn level_ord(level: SensitivityLevel) -> u8 {
241    match level {
242        SensitivityLevel::Low => 0,
243        SensitivityLevel::Medium => 1,
244        SensitivityLevel::High => 2,
245    }
246}
247
248impl Guard for ResponseSanitizationGuard {
249    fn name(&self) -> &str {
250        "response-sanitization"
251    }
252
253    fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
254        let args_text = ctx.request.arguments.to_string();
255        let findings = self.scan(&args_text);
256        if findings.is_empty() {
257            Ok(Verdict::Allow)
258        } else {
259            Ok(Verdict::Deny)
260        }
261    }
262}
263
264/// Build a `SensitivePattern` from components. Returns None if the regex is invalid.
265pub fn build_pattern(
266    name: &str,
267    regex_str: &str,
268    level: SensitivityLevel,
269    redaction: &str,
270) -> Option<SensitivePattern> {
271    Regex::new(regex_str).ok().map(|regex| SensitivePattern {
272        name: name.to_string(),
273        regex,
274        level,
275        redaction: redaction.to_string(),
276    })
277}
278
279// ===========================================================================
280// Full OutputSanitizer.
281// ===========================================================================
282
283/// Category of a sensitive data finding.
284#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
285#[serde(rename_all = "snake_case")]
286pub enum SensitiveCategory {
287    Secret,
288    Pii,
289    Internal,
290    Custom(String),
291}
292
293/// Redaction strategy applied to a finding.
294#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
295#[serde(rename_all = "snake_case")]
296pub enum RedactionStrategy {
297    /// Replace the match with a constant mask (`****`).
298    Mask,
299    /// Replace the match with a stable fingerprint (sha256 prefix).
300    Fingerprint,
301    /// Drop the match entirely (replace with empty text; at the JSON-field
302    /// level the whole field is replaced with `null`).
303    Drop,
304    /// Replace the match with an opaque token id and record the mapping.
305    Tokenize,
306    /// Keep a small prefix/suffix, redact the middle.
307    Partial,
308    /// Replace with a typed label (`[REDACTED:email]`).
309    TypeLabel,
310    /// Do not redact.
311    Keep,
312}
313
314/// Byte span in the sanitized text.
315#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
316pub struct Span {
317    pub start: usize,
318    pub end: usize,
319}
320
321/// A single sensitive-data finding.
322#[derive(Clone, Debug, Serialize, Deserialize)]
323pub struct SensitiveDataFinding {
324    pub id: String,
325    pub category: SensitiveCategory,
326    pub data_type: String,
327    pub confidence: f32,
328    pub span: Span,
329    pub preview: String,
330    pub detector: String,
331    pub recommended_action: RedactionStrategy,
332}
333
334/// Record of a redaction that was actually applied.
335#[derive(Clone, Debug, Serialize, Deserialize)]
336pub struct Redaction {
337    pub finding_id: String,
338    pub strategy: RedactionStrategy,
339    pub original_span: Span,
340    pub replacement: String,
341}
342
343/// Processing statistics for a single sanitization run.
344#[derive(Clone, Debug, Default, Serialize, Deserialize)]
345pub struct ProcessingStats {
346    pub input_length: usize,
347    pub output_length: usize,
348    pub findings_count: usize,
349    pub redactions_count: usize,
350}
351
352/// Result of sanitizing a single string.
353#[derive(Clone, Debug, Serialize, Deserialize)]
354pub struct SanitizationResult {
355    pub sanitized: String,
356    pub was_redacted: bool,
357    pub findings: Vec<SensitiveDataFinding>,
358    pub redactions: Vec<Redaction>,
359    pub stats: ProcessingStats,
360}
361
362/// Category enable/disable toggles.
363#[derive(Clone, Debug, Serialize, Deserialize)]
364pub struct CategoryConfig {
365    pub secrets: bool,
366    pub pii: bool,
367    pub internal: bool,
368}
369
370impl Default for CategoryConfig {
371    fn default() -> Self {
372        Self {
373            secrets: true,
374            pii: true,
375            internal: true,
376        }
377    }
378}
379
380/// High-entropy token detector configuration.
381#[derive(Clone, Debug, Serialize, Deserialize)]
382pub struct EntropyConfig {
383    pub enabled: bool,
384    pub threshold: f64,
385    pub min_token_len: usize,
386}
387
388impl Default for EntropyConfig {
389    fn default() -> Self {
390        Self {
391            enabled: true,
392            threshold: 4.5,
393            min_token_len: 16,
394        }
395    }
396}
397
398/// Allowlist configuration (false-positive reduction).
399#[derive(Clone, Debug, Default, Serialize, Deserialize)]
400pub struct AllowlistConfig {
401    pub exact: Vec<String>,
402    pub patterns: Vec<String>,
403}
404
405/// Denylist configuration (forced redaction).
406#[derive(Clone, Debug, Default, Serialize, Deserialize)]
407pub struct DenylistConfig {
408    pub exact: Vec<String>,
409    pub patterns: Vec<String>,
410}
411
412/// Output sanitizer configuration.
413#[derive(Clone, Debug, Serialize, Deserialize)]
414pub struct OutputSanitizerConfig {
415    pub categories: CategoryConfig,
416    pub redaction_strategies: HashMap<SensitiveCategory, RedactionStrategy>,
417    pub entropy: EntropyConfig,
418    pub allowlist: AllowlistConfig,
419    pub denylist: DenylistConfig,
420    pub max_input_bytes: usize,
421    pub include_findings: bool,
422}
423
424impl Default for OutputSanitizerConfig {
425    fn default() -> Self {
426        let mut redaction_strategies = HashMap::new();
427        redaction_strategies.insert(SensitiveCategory::Secret, RedactionStrategy::Mask);
428        redaction_strategies.insert(SensitiveCategory::Pii, RedactionStrategy::Partial);
429        redaction_strategies.insert(SensitiveCategory::Internal, RedactionStrategy::TypeLabel);
430        Self {
431            categories: CategoryConfig::default(),
432            redaction_strategies,
433            entropy: EntropyConfig::default(),
434            allowlist: AllowlistConfig::default(),
435            denylist: DenylistConfig::default(),
436            max_input_bytes: 1_000_000,
437            include_findings: true,
438        }
439    }
440}
441
442#[derive(Debug, thiserror::Error)]
443pub enum OutputSanitizerConfigError {
444    #[error("invalid {list_name} regex `{pattern}`: {source}")]
445    InvalidPattern {
446        list_name: &'static str,
447        pattern: String,
448        #[source]
449        source: regex::Error,
450    },
451}
452
453// ---------------------------------------------------------------------------
454// Compiled detector registry (lazy, built once per process).
455// ---------------------------------------------------------------------------
456
457#[derive(Clone)]
458struct CompiledPattern {
459    id: &'static str,
460    category: SensitiveCategory,
461    data_type: &'static str,
462    confidence: f32,
463    recommended: RedactionStrategy,
464    regex: Regex,
465    validator: Option<fn(&str) -> bool>,
466}
467
468fn compile_or_nomatch(pattern: &'static str) -> Regex {
469    match Regex::new(pattern) {
470        Ok(re) => re,
471        Err(err) => {
472            tracing::error!(error = %err, %pattern, "failed to compile hardcoded regex");
473            // Fallback to a never-matching regex. `\A\z` is always valid and
474            // matches only empty strings (which we never pass in).
475            match Regex::new(r"\A\z") {
476                Ok(re) => re,
477                Err(_) => match Regex::new("") {
478                    Ok(re) => re,
479                    Err(_) => {
480                        // Last resort: recompile the original pattern and let
481                        // any runtime caller observe the empty-regex fallback
482                        // without crashing.
483                        #[allow(clippy::unwrap_used)]
484                        {
485                            Regex::new("").unwrap()
486                        }
487                    }
488                },
489            }
490        }
491    }
492}
493
494fn compiled_patterns() -> &'static [CompiledPattern] {
495    static PATS: OnceLock<Vec<CompiledPattern>> = OnceLock::new();
496    PATS.get_or_init(|| {
497        vec![
498            // ---- Secrets ----
499            CompiledPattern {
500                id: "secret_aws_access_key_id",
501                category: SensitiveCategory::Secret,
502                data_type: "aws_access_key_id",
503                confidence: 0.99,
504                recommended: RedactionStrategy::Mask,
505                regex: compile_or_nomatch(r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b"),
506                validator: None,
507            },
508            CompiledPattern {
509                id: "secret_aws_secret_access_key",
510                category: SensitiveCategory::Secret,
511                data_type: "aws_secret_access_key",
512                confidence: 0.9,
513                recommended: RedactionStrategy::Mask,
514                regex: compile_or_nomatch(
515                    r"(?i)aws_secret_access_key\s*[:=]\s*[A-Za-z0-9/+=]{40}",
516                ),
517                validator: None,
518            },
519            CompiledPattern {
520                id: "secret_github_token",
521                category: SensitiveCategory::Secret,
522                data_type: "github_token",
523                confidence: 0.99,
524                recommended: RedactionStrategy::Mask,
525                regex: compile_or_nomatch(r"\bgh[pousr]_[A-Za-z0-9]{36,255}\b"),
526                validator: None,
527            },
528            CompiledPattern {
529                id: "secret_slack_token",
530                category: SensitiveCategory::Secret,
531                data_type: "slack_token",
532                confidence: 0.99,
533                recommended: RedactionStrategy::Mask,
534                regex: compile_or_nomatch(r"\bxox[abopsr]-[A-Za-z0-9-]{10,}\b"),
535                validator: None,
536            },
537            CompiledPattern {
538                id: "secret_slack_webhook",
539                category: SensitiveCategory::Secret,
540                data_type: "slack_webhook",
541                confidence: 0.95,
542                recommended: RedactionStrategy::Mask,
543                regex: compile_or_nomatch(
544                    r"https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[A-Za-z0-9]+",
545                ),
546                validator: None,
547            },
548            CompiledPattern {
549                id: "secret_gcp_service_account",
550                category: SensitiveCategory::Secret,
551                data_type: "gcp_service_account_json",
552                confidence: 0.97,
553                recommended: RedactionStrategy::Drop,
554                regex: compile_or_nomatch(r#""type"\s*:\s*"service_account""#),
555                validator: None,
556            },
557            CompiledPattern {
558                id: "secret_pem_private_key",
559                category: SensitiveCategory::Secret,
560                data_type: "pem_private_key",
561                confidence: 0.99,
562                recommended: RedactionStrategy::Mask,
563                regex: compile_or_nomatch(
564                    r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----",
565                ),
566                validator: None,
567            },
568            CompiledPattern {
569                id: "secret_jwt",
570                category: SensitiveCategory::Secret,
571                data_type: "jwt",
572                confidence: 0.85,
573                recommended: RedactionStrategy::Mask,
574                regex: compile_or_nomatch(
575                    r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b",
576                ),
577                validator: None,
578            },
579            CompiledPattern {
580                id: "secret_oauth_bearer",
581                category: SensitiveCategory::Secret,
582                data_type: "oauth_bearer",
583                confidence: 0.85,
584                recommended: RedactionStrategy::Mask,
585                regex: compile_or_nomatch(
586                    r"(?i)\b(?:authorization|auth)\s*:\s*bearer\s+[A-Za-z0-9._~+/=-]{16,}",
587                ),
588                validator: None,
589            },
590            CompiledPattern {
591                id: "secret_password_assignment",
592                category: SensitiveCategory::Secret,
593                data_type: "password",
594                confidence: 0.7,
595                recommended: RedactionStrategy::Mask,
596                regex: compile_or_nomatch(
597                    r"(?i)\b(?:password|passwd|pwd|secret)\s*[:=]\s*\S{6,}",
598                ),
599                validator: None,
600            },
601            // ---- PII ----
602            CompiledPattern {
603                id: "pii_ssn",
604                category: SensitiveCategory::Pii,
605                data_type: "ssn",
606                confidence: 0.9,
607                recommended: RedactionStrategy::Mask,
608                regex: compile_or_nomatch(r"\b\d{3}-\d{2}-\d{4}\b"),
609                validator: Some(is_valid_ssn_fragments),
610            },
611            CompiledPattern {
612                id: "pii_ssn_compact",
613                category: SensitiveCategory::Pii,
614                data_type: "ssn",
615                confidence: 0.7,
616                recommended: RedactionStrategy::Mask,
617                regex: compile_or_nomatch(r"(?:^|[^0-9])(\d{9})(?:$|[^0-9])"),
618                validator: Some(is_valid_ssn_compact),
619            },
620            CompiledPattern {
621                id: "pii_credit_card",
622                category: SensitiveCategory::Pii,
623                data_type: "credit_card",
624                confidence: 0.9,
625                recommended: RedactionStrategy::Mask,
626                regex: compile_or_nomatch(r"\b(?:\d[ -]*?){13,19}\b"),
627                validator: Some(is_luhn_valid_card_number),
628            },
629            CompiledPattern {
630                id: "pii_email",
631                category: SensitiveCategory::Pii,
632                data_type: "email",
633                confidence: 0.95,
634                recommended: RedactionStrategy::Partial,
635                regex: compile_or_nomatch(
636                    r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b",
637                ),
638                validator: None,
639            },
640            // ---- Internal ----
641            CompiledPattern {
642                id: "internal_private_ip",
643                category: SensitiveCategory::Internal,
644                data_type: "internal_ip",
645                confidence: 0.8,
646                recommended: RedactionStrategy::TypeLabel,
647                regex: compile_or_nomatch(
648                    r"\b(?:10|192\.168|172\.(?:1[6-9]|2[0-9]|3[0-1]))\.[0-9]{1,3}\.[0-9]{1,3}\b",
649                ),
650                validator: None,
651            },
652        ]
653    })
654}
655
656// ---------------------------------------------------------------------------
657// Utility: Shannon entropy, Luhn, SSN validation, token previews.
658// ---------------------------------------------------------------------------
659
660fn shannon_entropy_ascii(token: &str) -> Option<f64> {
661    if !token.is_ascii() {
662        return None;
663    }
664    let bytes = token.as_bytes();
665    if bytes.is_empty() {
666        return None;
667    }
668    let mut counts = [0u32; 256];
669    for &b in bytes {
670        counts[b as usize] = counts[b as usize].saturating_add(1);
671    }
672    let len = bytes.len() as f64;
673    let mut entropy = 0.0f64;
674    for &c in &counts {
675        if c == 0 {
676            continue;
677        }
678        let p = c as f64 / len;
679        entropy -= p * p.log2();
680    }
681    Some(entropy)
682}
683
684fn is_candidate_secret_token(token: &str) -> bool {
685    token
686        .bytes()
687        .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'+' | b'/' | b'=' | b'-' | b'_'))
688}
689
690fn is_luhn_valid_card_number(text: &str) -> bool {
691    let digits: Vec<u8> = text
692        .bytes()
693        .filter(|b| b.is_ascii_digit())
694        .map(|b| b - b'0')
695        .collect();
696    if !(13..=19).contains(&digits.len()) {
697        return false;
698    }
699    if digits.iter().all(|d| *d == digits[0]) {
700        return false;
701    }
702    let mut sum: u32 = 0;
703    let mut double = false;
704    for d in digits.iter().rev() {
705        let mut v = u32::from(*d);
706        if double {
707            v *= 2;
708            if v > 9 {
709                v -= 9;
710            }
711        }
712        sum = sum.saturating_add(v);
713        double = !double;
714    }
715    sum.is_multiple_of(10)
716}
717
718fn is_valid_ssn_fragments(text: &str) -> bool {
719    let parts: Vec<&str> = text.split('-').collect();
720    if parts.len() != 3 {
721        return false;
722    }
723    let area: u32 = parts[0].parse().unwrap_or(0);
724    let group: u32 = parts[1].parse().unwrap_or(0);
725    let serial: u32 = parts[2].parse().unwrap_or(0);
726    if area == 0 || area == 666 || (900..=999).contains(&area) {
727        return false;
728    }
729    if group == 0 || serial == 0 {
730        return false;
731    }
732    true
733}
734
735fn is_valid_ssn_compact(text: &str) -> bool {
736    let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
737    if digits.len() != 9 {
738        return false;
739    }
740    let area: u32 = digits.get(0..3).and_then(|s| s.parse().ok()).unwrap_or(0);
741    let group: u32 = digits.get(3..5).and_then(|s| s.parse().ok()).unwrap_or(0);
742    let serial: u32 = digits.get(5..9).and_then(|s| s.parse().ok()).unwrap_or(0);
743    if area == 0 || area == 666 || (900..=999).contains(&area) {
744        return false;
745    }
746    if group == 0 || serial == 0 {
747        return false;
748    }
749    true
750}
751
752fn preview_redacted(s: &str) -> String {
753    let len = s.chars().count();
754    if len <= 4 {
755        return "*".repeat(len);
756    }
757    let prefix: String = s.chars().take(2).collect();
758    let suffix_chars: Vec<char> = s.chars().rev().take(2).collect();
759    let suffix: String = suffix_chars.into_iter().rev().collect();
760    format!("{prefix}***{suffix}")
761}
762
763fn truncate_to_char_boundary(text: &str, max_bytes: usize) -> (&str, bool) {
764    if text.len() <= max_bytes {
765        return (text, false);
766    }
767    let mut end = max_bytes.min(text.len());
768    while end > 0 && !text.is_char_boundary(end) {
769        end = end.saturating_sub(1);
770    }
771    (&text[..end], end < text.len())
772}
773
774fn fingerprint(s: &str) -> String {
775    let mut hasher = Sha256::new();
776    hasher.update(s.as_bytes());
777    let digest = hasher.finalize();
778    let mut out = String::with_capacity(16);
779    for b in digest.iter().take(8) {
780        out.push_str(&format!("{b:02x}"));
781    }
782    out
783}
784
785// ---------------------------------------------------------------------------
786// Tokenize store: opaque-id -> original mapping.
787// ---------------------------------------------------------------------------
788
789/// Shared token vault used by the `Tokenize` redaction strategy.
790#[derive(Debug, Default)]
791pub struct TokenVault {
792    inner: Mutex<TokenVaultInner>,
793}
794
795#[derive(Debug, Default)]
796struct TokenVaultInner {
797    counter: u64,
798    map: HashMap<String, String>,
799}
800
801impl TokenVault {
802    pub fn new() -> Self {
803        Self::default()
804    }
805
806    pub fn insert(&self, value: &str) -> String {
807        let mut inner = match self.inner.lock() {
808            Ok(g) => g,
809            Err(poisoned) => poisoned.into_inner(),
810        };
811        inner.counter = inner.counter.saturating_add(1);
812        let fp = fingerprint(value);
813        let id = format!("tok_{}_{}", inner.counter, fp);
814        inner.map.insert(id.clone(), value.to_string());
815        id
816    }
817
818    pub fn get(&self, token: &str) -> Option<String> {
819        let inner = match self.inner.lock() {
820            Ok(g) => g,
821            Err(poisoned) => poisoned.into_inner(),
822        };
823        inner.map.get(token).cloned()
824    }
825
826    pub fn len(&self) -> usize {
827        let inner = match self.inner.lock() {
828            Ok(g) => g,
829            Err(poisoned) => poisoned.into_inner(),
830        };
831        inner.map.len()
832    }
833
834    pub fn is_empty(&self) -> bool {
835        self.len() == 0
836    }
837}
838
839// ---------------------------------------------------------------------------
840// OutputSanitizer
841// ---------------------------------------------------------------------------
842
843/// Full-featured output sanitizer.
844pub struct OutputSanitizer {
845    config: OutputSanitizerConfig,
846    allowlist_patterns: Vec<Regex>,
847    denylist_patterns: Vec<(String, Regex)>,
848    token_vault: Arc<TokenVault>,
849}
850
851impl std::fmt::Debug for OutputSanitizer {
852    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
853        f.debug_struct("OutputSanitizer")
854            .field("config", &self.config)
855            .field("allowlist_patterns", &self.allowlist_patterns.len())
856            .field("denylist_patterns", &self.denylist_patterns.len())
857            .finish()
858    }
859}
860
861impl Default for OutputSanitizer {
862    fn default() -> Self {
863        Self::new()
864    }
865}
866
867impl Clone for OutputSanitizer {
868    fn clone(&self) -> Self {
869        Self {
870            config: self.config.clone(),
871            allowlist_patterns: self.allowlist_patterns.clone(),
872            denylist_patterns: self.denylist_patterns.clone(),
873            token_vault: self.token_vault.clone(),
874        }
875    }
876}
877
878impl OutputSanitizer {
879    pub fn new() -> Self {
880        match Self::with_config(OutputSanitizerConfig::default()) {
881            Ok(sanitizer) => sanitizer,
882            Err(error) => panic!("default output sanitizer config should be valid: {error}"),
883        }
884    }
885
886    pub fn with_config(config: OutputSanitizerConfig) -> Result<Self, OutputSanitizerConfigError> {
887        let allowlist_patterns = config
888            .allowlist
889            .patterns
890            .iter()
891            .map(|pattern| {
892                Regex::new(pattern).map_err(|source| OutputSanitizerConfigError::InvalidPattern {
893                    list_name: "allowlist",
894                    pattern: pattern.clone(),
895                    source,
896                })
897            })
898            .collect::<Result<Vec<_>, _>>()?;
899        let denylist_patterns = config
900            .denylist
901            .patterns
902            .iter()
903            .map(|pattern| {
904                Regex::new(pattern)
905                    .map(|re| {
906                        let id = format!("denylist_{}", fingerprint(pattern));
907                        (id, re)
908                    })
909                    .map_err(|source| OutputSanitizerConfigError::InvalidPattern {
910                        list_name: "denylist",
911                        pattern: pattern.clone(),
912                        source,
913                    })
914            })
915            .collect::<Result<Vec<_>, _>>()?;
916
917        Ok(Self {
918            config,
919            allowlist_patterns,
920            denylist_patterns,
921            token_vault: Arc::new(TokenVault::new()),
922        })
923    }
924
925    pub fn token_vault(&self) -> Arc<TokenVault> {
926        self.token_vault.clone()
927    }
928
929    pub fn config(&self) -> &OutputSanitizerConfig {
930        &self.config
931    }
932
933    fn is_allowlisted(&self, s: &str) -> bool {
934        if self.config.allowlist.exact.iter().any(|x| x == s) {
935            return true;
936        }
937        self.allowlist_patterns.iter().any(|re| re.is_match(s))
938    }
939
940    /// Sanitize a raw text string.
941    pub fn sanitize_text(&self, input: &str) -> SanitizationResult {
942        let (limited, truncated) = truncate_to_char_boundary(input, self.config.max_input_bytes);
943
944        let mut findings: Vec<SensitiveDataFinding> = Vec::new();
945
946        // Denylist (forced redaction) -- exact strings first, then regexes.
947        for needle in &self.config.denylist.exact {
948            if needle.is_empty() {
949                continue;
950            }
951            let mut start = 0usize;
952            while let Some(pos) = limited[start..].find(needle.as_str()) {
953                let s = start + pos;
954                let e = s + needle.len();
955                findings.push(SensitiveDataFinding {
956                    id: format!("denylist_exact_{}", fingerprint(needle)),
957                    category: SensitiveCategory::Secret,
958                    data_type: "denylist".to_string(),
959                    confidence: 1.0,
960                    span: Span { start: s, end: e },
961                    preview: preview_redacted(needle),
962                    detector: "denylist".to_string(),
963                    recommended_action: RedactionStrategy::Mask,
964                });
965                start = e;
966            }
967        }
968        for (id, re) in &self.denylist_patterns {
969            for m in re.find_iter(limited) {
970                findings.push(SensitiveDataFinding {
971                    id: id.clone(),
972                    category: SensitiveCategory::Secret,
973                    data_type: "denylist".to_string(),
974                    confidence: 0.95,
975                    span: Span {
976                        start: m.start(),
977                        end: m.end(),
978                    },
979                    preview: preview_redacted(m.as_str()),
980                    detector: "denylist".to_string(),
981                    recommended_action: RedactionStrategy::Mask,
982                });
983            }
984        }
985
986        // Built-in detectors.
987        for p in compiled_patterns() {
988            let enabled = match p.category {
989                SensitiveCategory::Secret => self.config.categories.secrets,
990                SensitiveCategory::Pii => self.config.categories.pii,
991                SensitiveCategory::Internal => self.config.categories.internal,
992                SensitiveCategory::Custom(_) => true,
993            };
994            if !enabled {
995                continue;
996            }
997            for m in p.regex.find_iter(limited) {
998                let raw = m.as_str();
999                if let Some(validator) = p.validator {
1000                    if !validator(raw) {
1001                        continue;
1002                    }
1003                }
1004                if self.is_allowlisted(raw) {
1005                    continue;
1006                }
1007                // For SSN compact, shrink the span to the 9-digit run.
1008                let (span_start, span_end) = if p.id == "pii_ssn_compact" {
1009                    let bytes = limited.as_bytes();
1010                    let mut s = m.start();
1011                    while s < m.end() && !bytes[s].is_ascii_digit() {
1012                        s += 1;
1013                    }
1014                    let mut e = m.end();
1015                    while e > s && !bytes[e - 1].is_ascii_digit() {
1016                        e -= 1;
1017                    }
1018                    (s, e)
1019                } else {
1020                    (m.start(), m.end())
1021                };
1022                if span_start >= span_end {
1023                    continue;
1024                }
1025                let slice = &limited[span_start..span_end];
1026                findings.push(SensitiveDataFinding {
1027                    id: p.id.to_string(),
1028                    category: p.category.clone(),
1029                    data_type: p.data_type.to_string(),
1030                    confidence: p.confidence,
1031                    span: Span {
1032                        start: span_start,
1033                        end: span_end,
1034                    },
1035                    preview: preview_redacted(slice),
1036                    detector: "pattern".to_string(),
1037                    recommended_action: p.recommended.clone(),
1038                });
1039            }
1040        }
1041
1042        // High-entropy detector.
1043        if self.config.categories.secrets && self.config.entropy.enabled {
1044            static TOKEN_RE: OnceLock<Regex> = OnceLock::new();
1045            let token_re = TOKEN_RE.get_or_init(|| compile_or_nomatch(r"[A-Za-z0-9+/=_-]{16,}"));
1046            for m in token_re.find_iter(limited) {
1047                let token = m.as_str();
1048                if token.len() < self.config.entropy.min_token_len {
1049                    continue;
1050                }
1051                if self.is_allowlisted(token) {
1052                    continue;
1053                }
1054                if !is_candidate_secret_token(token) {
1055                    continue;
1056                }
1057                let ent = match shannon_entropy_ascii(token) {
1058                    Some(e) => e,
1059                    None => continue,
1060                };
1061                if ent < self.config.entropy.threshold {
1062                    continue;
1063                }
1064                findings.push(SensitiveDataFinding {
1065                    id: "secret_high_entropy_token".to_string(),
1066                    category: SensitiveCategory::Secret,
1067                    data_type: "high_entropy_token".to_string(),
1068                    confidence: 0.6,
1069                    span: Span {
1070                        start: m.start(),
1071                        end: m.end(),
1072                    },
1073                    preview: preview_redacted(token),
1074                    detector: "entropy".to_string(),
1075                    recommended_action: RedactionStrategy::Mask,
1076                });
1077            }
1078        }
1079
1080        findings.sort_by(|a, b| {
1081            a.span
1082                .start
1083                .cmp(&b.span.start)
1084                .then_with(|| b.span.end.cmp(&a.span.end))
1085        });
1086
1087        let merged = resolve_overlaps(&findings, &self.config.redaction_strategies);
1088
1089        let mut sanitized = limited.to_string();
1090        let mut redactions: Vec<Redaction> = Vec::new();
1091        let mut applied_any = false;
1092
1093        // Apply from last to first so byte offsets remain valid.
1094        let mut merged_desc = merged;
1095        merged_desc.sort_by(|a, b| b.0.start.cmp(&a.0.start).then(b.0.end.cmp(&a.0.end)));
1096
1097        for (span, strategy, category, data_type, finding_id) in merged_desc {
1098            if span.end > sanitized.len() || span.start >= span.end {
1099                continue;
1100            }
1101            if !sanitized.is_char_boundary(span.start) || !sanitized.is_char_boundary(span.end) {
1102                continue;
1103            }
1104            let raw = &sanitized[span.start..span.end];
1105            let replacement = self.replacement_for(&strategy, &category, &data_type, raw);
1106            if replacement == raw {
1107                continue;
1108            }
1109            sanitized.replace_range(span.start..span.end, &replacement);
1110            applied_any = true;
1111            redactions.push(Redaction {
1112                finding_id,
1113                strategy,
1114                original_span: span,
1115                replacement,
1116            });
1117        }
1118
1119        if truncated {
1120            sanitized.push_str("\n[TRUNCATED_UNSCANNED_OUTPUT]");
1121            applied_any = true;
1122        }
1123
1124        let stats = ProcessingStats {
1125            input_length: input.len(),
1126            output_length: sanitized.len(),
1127            findings_count: findings.len(),
1128            redactions_count: redactions.len(),
1129        };
1130
1131        let mut result = SanitizationResult {
1132            sanitized,
1133            was_redacted: applied_any,
1134            findings,
1135            redactions,
1136            stats,
1137        };
1138        if !self.config.include_findings {
1139            result.findings.clear();
1140        }
1141        result
1142    }
1143
1144    fn replacement_for(
1145        &self,
1146        strategy: &RedactionStrategy,
1147        category: &SensitiveCategory,
1148        data_type: &str,
1149        raw: &str,
1150    ) -> String {
1151        match strategy {
1152            RedactionStrategy::Keep => raw.to_string(),
1153            RedactionStrategy::Mask => "****".to_string(),
1154            RedactionStrategy::Fingerprint => format!("[FP:{}]", fingerprint(raw)),
1155            RedactionStrategy::Drop => String::new(),
1156            RedactionStrategy::Tokenize => {
1157                let id = self.token_vault.insert(raw);
1158                format!("[TOKEN:{id}]")
1159            }
1160            RedactionStrategy::Partial => preview_redacted(raw),
1161            RedactionStrategy::TypeLabel => match category {
1162                SensitiveCategory::Secret | SensitiveCategory::Pii => {
1163                    format!("[REDACTED:{data_type}]")
1164                }
1165                SensitiveCategory::Internal => "[REDACTED:internal]".to_string(),
1166                SensitiveCategory::Custom(label) => format!("[REDACTED:{label}]"),
1167            },
1168        }
1169    }
1170
1171    /// Sanitize a JSON value. Preserves structure: strings are sanitized in
1172    /// place, arrays and objects are recursed. Fields whose detected strategy
1173    /// is `Drop` and which consist entirely of the match become `null`.
1174    pub fn sanitize_value(&self, value: &serde_json::Value) -> SanitizedValue {
1175        let mut findings: Vec<SensitiveDataFinding> = Vec::new();
1176        let mut redactions: Vec<Redaction> = Vec::new();
1177        let mut was_redacted = false;
1178        let sanitized =
1179            self.sanitize_value_inner(value, &mut findings, &mut redactions, &mut was_redacted);
1180        SanitizedValue {
1181            value: sanitized,
1182            findings,
1183            redactions,
1184            was_redacted,
1185        }
1186    }
1187
1188    fn sanitize_value_inner(
1189        &self,
1190        value: &serde_json::Value,
1191        findings: &mut Vec<SensitiveDataFinding>,
1192        redactions: &mut Vec<Redaction>,
1193        was_redacted: &mut bool,
1194    ) -> serde_json::Value {
1195        use serde_json::Value as V;
1196        match value {
1197            V::Null | V::Bool(_) | V::Number(_) => value.clone(),
1198            V::String(s) => {
1199                let r = self.sanitize_text(s);
1200                if r.was_redacted {
1201                    *was_redacted = true;
1202                    // If the entire string was detected and the chosen
1203                    // strategy was Drop, collapse the field to null so it
1204                    // disappears downstream.
1205                    if r.sanitized.is_empty()
1206                        && r.redactions.len() == 1
1207                        && matches!(r.redactions[0].strategy, RedactionStrategy::Drop)
1208                    {
1209                        findings.extend(r.findings);
1210                        redactions.extend(r.redactions);
1211                        return V::Null;
1212                    }
1213                }
1214                findings.extend(r.findings);
1215                redactions.extend(r.redactions);
1216                V::String(r.sanitized)
1217            }
1218            V::Array(items) => {
1219                let new_items: Vec<serde_json::Value> = items
1220                    .iter()
1221                    .map(|v| self.sanitize_value_inner(v, findings, redactions, was_redacted))
1222                    .collect();
1223                V::Array(new_items)
1224            }
1225            V::Object(map) => {
1226                if let Some((finding, redaction)) = detect_service_account_object(map) {
1227                    *was_redacted = true;
1228                    findings.push(finding);
1229                    redactions.push(redaction);
1230                    return V::Null;
1231                }
1232                let mut new_map = serde_json::Map::with_capacity(map.len());
1233                for (k, v) in map {
1234                    let sv = self.sanitize_value_inner(v, findings, redactions, was_redacted);
1235                    new_map.insert(k.clone(), sv);
1236                }
1237                V::Object(new_map)
1238            }
1239        }
1240    }
1241}
1242
1243/// Output of `OutputSanitizer::sanitize_value`.
1244#[derive(Debug, Clone)]
1245pub struct SanitizedValue {
1246    pub value: serde_json::Value,
1247    pub findings: Vec<SensitiveDataFinding>,
1248    pub redactions: Vec<Redaction>,
1249    pub was_redacted: bool,
1250}
1251
1252// ---------------------------------------------------------------------------
1253// Overlap resolution: longest-match-wins, with strategy-rank tiebreaker.
1254// ---------------------------------------------------------------------------
1255
1256fn strategy_rank(s: &RedactionStrategy) -> u8 {
1257    match s {
1258        RedactionStrategy::Keep => 0,
1259        RedactionStrategy::Partial => 1,
1260        RedactionStrategy::TypeLabel => 2,
1261        RedactionStrategy::Fingerprint => 3,
1262        RedactionStrategy::Tokenize => 4,
1263        RedactionStrategy::Mask => 5,
1264        RedactionStrategy::Drop => 6,
1265    }
1266}
1267
1268type ResolvedSpan = (Span, RedactionStrategy, SensitiveCategory, String, String);
1269
1270fn resolve_overlaps(
1271    findings: &[SensitiveDataFinding],
1272    defaults: &HashMap<SensitiveCategory, RedactionStrategy>,
1273) -> Vec<ResolvedSpan> {
1274    let mut spans: Vec<ResolvedSpan> = Vec::with_capacity(findings.len());
1275    for f in findings {
1276        // Strategy selection:
1277        //   - If the detector recommended Keep, honor it.
1278        //   - If the detector asked for a "strong" action (Drop, Fingerprint,
1279        //     Tokenize), honor that (overriding category default).
1280        //   - Otherwise fall back to the config's per-category default, else
1281        //     the detector's recommendation.
1282        let strategy = match &f.recommended_action {
1283            RedactionStrategy::Keep => RedactionStrategy::Keep,
1284            RedactionStrategy::Drop
1285            | RedactionStrategy::Fingerprint
1286            | RedactionStrategy::Tokenize => f.recommended_action.clone(),
1287            _ => defaults
1288                .get(&f.category)
1289                .cloned()
1290                .unwrap_or_else(|| f.recommended_action.clone()),
1291        };
1292        spans.push((
1293            f.span,
1294            strategy,
1295            f.category.clone(),
1296            f.data_type.clone(),
1297            f.id.clone(),
1298        ));
1299    }
1300
1301    spans.sort_by(|a, b| {
1302        a.0.start
1303            .cmp(&b.0.start)
1304            .then_with(|| b.0.end.cmp(&a.0.end))
1305    });
1306
1307    let mut merged: Vec<ResolvedSpan> = Vec::new();
1308    for current in spans {
1309        if let Some(last) = merged.last_mut() {
1310            if current.0.start < last.0.end {
1311                let new_end = last.0.end.max(current.0.end);
1312                last.0.end = new_end;
1313                if strategy_rank(&current.1) > strategy_rank(&last.1) {
1314                    last.1 = current.1;
1315                    last.2 = current.2;
1316                    last.3 = current.3;
1317                    last.4 = current.4;
1318                }
1319                continue;
1320            }
1321        }
1322        merged.push(current);
1323    }
1324    merged
1325}
1326
1327fn detect_service_account_object(
1328    map: &serde_json::Map<String, serde_json::Value>,
1329) -> Option<(SensitiveDataFinding, Redaction)> {
1330    let value = map.get("type")?.as_str()?;
1331    if !value.eq_ignore_ascii_case("service_account") {
1332        return None;
1333    }
1334
1335    let span = Span { start: 0, end: 0 };
1336    let finding = SensitiveDataFinding {
1337        id: "secret_gcp_service_account".to_string(),
1338        category: SensitiveCategory::Secret,
1339        data_type: "gcp_service_account_json".to_string(),
1340        confidence: 0.97,
1341        span,
1342        preview: preview_redacted(value),
1343        detector: "object".to_string(),
1344        recommended_action: RedactionStrategy::Drop,
1345    };
1346    let redaction = Redaction {
1347        finding_id: finding.id.clone(),
1348        strategy: RedactionStrategy::Drop,
1349        original_span: span,
1350        replacement: String::new(),
1351    };
1352    Some((finding, redaction))
1353}
1354
1355// ===========================================================================
1356// Tests
1357// ===========================================================================
1358
1359#[cfg(test)]
1360mod tests {
1361    use super::*;
1362
1363    // ---- Legacy API tests ----
1364
1365    #[test]
1366    fn guard_name() {
1367        let guard =
1368            ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1369        assert_eq!(guard.name(), "response-sanitization");
1370    }
1371
1372    #[test]
1373    fn detects_ssn() {
1374        let guard =
1375            ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1376        let findings = guard.scan("My SSN is 123-45-6789");
1377        assert!(!findings.is_empty());
1378        assert!(findings.iter().any(|(name, _)| name == "SSN"));
1379    }
1380
1381    #[test]
1382    fn detects_email() {
1383        let guard =
1384            ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1385        let findings = guard.scan("Contact john@example.com for info");
1386        assert!(findings.iter().any(|(name, _)| name == "email"));
1387    }
1388
1389    #[test]
1390    fn detects_mrn() {
1391        let guard =
1392            ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1393        let findings = guard.scan("Patient MRN: 123456789");
1394        assert!(findings.iter().any(|(name, _)| name == "MRN"));
1395    }
1396
1397    #[test]
1398    fn no_findings_on_clean_text() {
1399        let guard =
1400            ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1401        let findings = guard.scan("This is perfectly clean text with no PII.");
1402        assert!(findings.is_empty());
1403    }
1404
1405    #[test]
1406    fn respects_minimum_sensitivity() {
1407        let guard =
1408            ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1409        let findings = guard.scan("Contact john@example.com");
1410        assert!(!findings.iter().any(|(name, _)| name == "email"));
1411        let findings2 = guard.scan("SSN 123-45-6789");
1412        assert!(findings2.iter().any(|(name, _)| name == "SSN"));
1413    }
1414
1415    #[test]
1416    fn redacts_ssn() {
1417        let guard =
1418            ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Redact);
1419        let (redacted, count) = guard.redact("SSN is 123-45-6789 please");
1420        assert!(redacted.contains("[SSN REDACTED]"));
1421        assert!(!redacted.contains("123-45-6789"));
1422        assert!(count > 0);
1423    }
1424
1425    #[test]
1426    fn redacts_email() {
1427        let guard =
1428            ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Redact);
1429        let (redacted, _) = guard.redact("Email: jane@example.com");
1430        assert!(redacted.contains("[EMAIL REDACTED]"));
1431        assert!(!redacted.contains("jane@example.com"));
1432    }
1433
1434    #[test]
1435    fn scan_response_clean() {
1436        let guard =
1437            ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1438        let response = serde_json::json!({"status": "ok", "data": "nothing sensitive"});
1439        let result = guard.scan_response(&response);
1440        assert!(matches!(result, ScanResult::Clean));
1441    }
1442
1443    #[test]
1444    fn scan_response_blocked() {
1445        let guard =
1446            ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1447        let response = serde_json::json!({"patient": "SSN: 123-45-6789"});
1448        let result = guard.scan_response(&response);
1449        assert!(matches!(result, ScanResult::Blocked(_)));
1450    }
1451
1452    #[test]
1453    fn scan_response_redacted() {
1454        let guard =
1455            ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Redact);
1456        let response = serde_json::json!({"patient": "SSN: 123-45-6789"});
1457        let result = guard.scan_response(&response);
1458        match result {
1459            ScanResult::Redacted { redacted_text, .. } => {
1460                assert!(redacted_text.contains("[SSN REDACTED]"));
1461            }
1462            _ => panic!("expected Redacted result"),
1463        }
1464    }
1465
1466    #[test]
1467    fn guard_evaluate_denies_args_with_pii() {
1468        let guard =
1469            ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1470
1471        let kp = chio_core::crypto::Keypair::generate();
1472        let scope = chio_core::capability::ChioScope::default();
1473        let agent_id = kp.public_key().to_hex();
1474        let server_id = "srv".to_string();
1475
1476        let cap_body = chio_core::capability::CapabilityTokenBody {
1477            id: "cap-test".to_string(),
1478            issuer: kp.public_key(),
1479            subject: kp.public_key(),
1480            scope: scope.clone(),
1481            issued_at: 0,
1482            expires_at: u64::MAX,
1483            delegation_chain: vec![],
1484        };
1485        let cap = chio_core::capability::CapabilityToken::sign(cap_body, &kp).expect("sign cap");
1486
1487        let request = chio_kernel::ToolCallRequest {
1488            request_id: "req-test".to_string(),
1489            capability: cap,
1490            tool_name: "write_file".to_string(),
1491            server_id: server_id.clone(),
1492            agent_id: agent_id.clone(),
1493            arguments: serde_json::json!({"content": "SSN is 123-45-6789"}),
1494            dpop_proof: None,
1495            governed_intent: None,
1496            approval_token: None,
1497            model_metadata: None,
1498            federated_origin_kernel_id: None,
1499        };
1500
1501        let ctx = chio_kernel::GuardContext {
1502            request: &request,
1503            scope: &scope,
1504            agent_id: &agent_id,
1505            server_id: &server_id,
1506            session_filesystem_roots: None,
1507            matched_grant_index: None,
1508        };
1509
1510        assert_eq!(guard.evaluate(&ctx).expect("ok"), Verdict::Deny);
1511    }
1512
1513    #[test]
1514    fn guard_evaluate_allows_clean_args() {
1515        let guard =
1516            ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1517
1518        let kp = chio_core::crypto::Keypair::generate();
1519        let scope = chio_core::capability::ChioScope::default();
1520        let agent_id = kp.public_key().to_hex();
1521        let server_id = "srv".to_string();
1522
1523        let cap_body = chio_core::capability::CapabilityTokenBody {
1524            id: "cap-test".to_string(),
1525            issuer: kp.public_key(),
1526            subject: kp.public_key(),
1527            scope: scope.clone(),
1528            issued_at: 0,
1529            expires_at: u64::MAX,
1530            delegation_chain: vec![],
1531        };
1532        let cap = chio_core::capability::CapabilityToken::sign(cap_body, &kp).expect("sign cap");
1533
1534        let request = chio_kernel::ToolCallRequest {
1535            request_id: "req-test".to_string(),
1536            capability: cap,
1537            tool_name: "read_file".to_string(),
1538            server_id: server_id.clone(),
1539            agent_id: agent_id.clone(),
1540            arguments: serde_json::json!({"path": "/app/src/main.rs"}),
1541            dpop_proof: None,
1542            governed_intent: None,
1543            approval_token: None,
1544            model_metadata: None,
1545            federated_origin_kernel_id: None,
1546        };
1547
1548        let ctx = chio_kernel::GuardContext {
1549            request: &request,
1550            scope: &scope,
1551            agent_id: &agent_id,
1552            server_id: &server_id,
1553            session_filesystem_roots: None,
1554            matched_grant_index: None,
1555        };
1556
1557        assert_eq!(guard.evaluate(&ctx).expect("ok"), Verdict::Allow);
1558    }
1559
1560    #[test]
1561    fn custom_pattern() {
1562        let pattern = build_pattern(
1563            "custom-id",
1564            r"\bCUST-\d{8}\b",
1565            SensitivityLevel::High,
1566            "[CUST-ID REDACTED]",
1567        );
1568        assert!(pattern.is_some());
1569
1570        let guard = ResponseSanitizationGuard::with_patterns(
1571            vec![pattern.unwrap()],
1572            SensitivityLevel::High,
1573            SanitizationAction::Block,
1574        );
1575        let findings = guard.scan("Customer CUST-12345678 record");
1576        assert!(!findings.is_empty());
1577        assert!(findings.iter().any(|(name, _)| name == "custom-id"));
1578    }
1579
1580    // ---- OutputSanitizer unit tests ----
1581
1582    #[test]
1583    fn luhn_rejects_random_16_digit_number() {
1584        assert!(!is_luhn_valid_card_number("1234567890123456"));
1585        // Known-valid test card (Visa).
1586        assert!(is_luhn_valid_card_number("4111 1111 1111 1111"));
1587        // One digit flipped: no longer valid.
1588        assert!(!is_luhn_valid_card_number("4111 1111 1111 1112"));
1589    }
1590
1591    #[test]
1592    fn shannon_entropy_basic() {
1593        let e = shannon_entropy_ascii("aaaaaa").unwrap();
1594        assert!(e < 0.01);
1595        let e2 = shannon_entropy_ascii("abcdefghij0123456789").unwrap();
1596        assert!(e2 > 4.0);
1597    }
1598
1599    #[test]
1600    fn ssn_fragments_validator_rejects_invalid_areas() {
1601        assert!(!is_valid_ssn_fragments("000-12-3456"));
1602        assert!(!is_valid_ssn_fragments("666-12-3456"));
1603        assert!(!is_valid_ssn_fragments("900-12-3456"));
1604        assert!(!is_valid_ssn_fragments("123-00-4567"));
1605        assert!(!is_valid_ssn_fragments("123-45-0000"));
1606        assert!(is_valid_ssn_fragments("123-45-6789"));
1607    }
1608}