1use std::collections::HashMap;
21use std::sync::{Arc, Mutex, OnceLock};
22
23use regex::Regex;
24use serde::{Deserialize, Serialize};
25use sha2::{Digest, Sha256};
26
27use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum SensitivityLevel {
36 Low,
38 Medium,
40 High,
42}
43
44#[derive(Debug, Clone)]
46pub struct SensitivePattern {
47 pub name: String,
49 regex: Regex,
51 pub level: SensitivityLevel,
53 pub redaction: String,
55}
56
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum SanitizationAction {
60 Block,
62 Redact,
64}
65
66fn default_patterns() -> Vec<SensitivePattern> {
67 let mut patterns = Vec::new();
68
69 if let Ok(regex) = Regex::new(r"\b\d{3}-\d{2}-\d{4}\b") {
70 patterns.push(SensitivePattern {
71 name: "SSN".to_string(),
72 regex,
73 level: SensitivityLevel::High,
74 redaction: "[SSN REDACTED]".to_string(),
75 });
76 }
77
78 if let Ok(regex) = Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b") {
79 patterns.push(SensitivePattern {
80 name: "email".to_string(),
81 regex,
82 level: SensitivityLevel::Medium,
83 redaction: "[EMAIL REDACTED]".to_string(),
84 });
85 }
86
87 if let Ok(regex) = Regex::new(r"\b(?:\(\d{3}\)\s*|\d{3}[-.])\d{3}[-.]?\d{4}\b") {
88 patterns.push(SensitivePattern {
89 name: "phone".to_string(),
90 regex,
91 level: SensitivityLevel::Low,
92 redaction: "[PHONE REDACTED]".to_string(),
93 });
94 }
95
96 if let Ok(regex) = Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b") {
97 patterns.push(SensitivePattern {
98 name: "credit-card".to_string(),
99 regex,
100 level: SensitivityLevel::High,
101 redaction: "[CARD REDACTED]".to_string(),
102 });
103 }
104
105 if let Ok(regex) = Regex::new(r"\b(?:\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})\b") {
106 patterns.push(SensitivePattern {
107 name: "date-of-birth".to_string(),
108 regex,
109 level: SensitivityLevel::Low,
110 redaction: "[DATE REDACTED]".to_string(),
111 });
112 }
113
114 if let Ok(regex) = Regex::new(r"\bMRN[:\s#]*\d{6,12}\b") {
115 patterns.push(SensitivePattern {
116 name: "MRN".to_string(),
117 regex,
118 level: SensitivityLevel::High,
119 redaction: "[MRN REDACTED]".to_string(),
120 });
121 }
122
123 if let Ok(regex) = Regex::new(r"\b[A-Z]\d{2}(?:\.\d{1,4})?\b") {
124 patterns.push(SensitivePattern {
125 name: "ICD-10".to_string(),
126 regex,
127 level: SensitivityLevel::Medium,
128 redaction: "[ICD REDACTED]".to_string(),
129 });
130 }
131
132 patterns
133}
134
135pub struct ResponseSanitizationGuard {
137 patterns: Vec<SensitivePattern>,
138 min_level: SensitivityLevel,
139 action: SanitizationAction,
140}
141
142impl ResponseSanitizationGuard {
143 pub fn new(min_level: SensitivityLevel, action: SanitizationAction) -> Self {
144 Self {
145 patterns: default_patterns(),
146 min_level,
147 action,
148 }
149 }
150
151 pub fn with_patterns(
152 patterns: Vec<SensitivePattern>,
153 min_level: SensitivityLevel,
154 action: SanitizationAction,
155 ) -> Self {
156 Self {
157 patterns,
158 min_level,
159 action,
160 }
161 }
162
163 pub fn with_additional_patterns(
164 additional_patterns: Vec<SensitivePattern>,
165 min_level: SensitivityLevel,
166 action: SanitizationAction,
167 ) -> Self {
168 let mut patterns = default_patterns();
169 patterns.extend(additional_patterns);
170 Self {
171 patterns,
172 min_level,
173 action,
174 }
175 }
176
177 pub fn scan(&self, text: &str) -> Vec<(String, String)> {
178 let mut findings = Vec::new();
179 for pattern in &self.patterns {
180 if level_ord(pattern.level) < level_ord(self.min_level) {
181 continue;
182 }
183 for m in pattern.regex.find_iter(text) {
184 findings.push((pattern.name.clone(), m.as_str().to_string()));
185 }
186 }
187 findings
188 }
189
190 pub fn redact(&self, text: &str) -> (String, usize) {
191 let mut result = text.to_string();
192 let mut count = 0usize;
193 for pattern in &self.patterns {
194 if level_ord(pattern.level) < level_ord(self.min_level) {
195 continue;
196 }
197 let match_count = pattern.regex.find_iter(&result).count();
198 if match_count > 0 {
199 result = pattern
200 .regex
201 .replace_all(&result, pattern.redaction.as_str())
202 .to_string();
203 count = count.saturating_add(match_count);
204 }
205 }
206 (result, count)
207 }
208
209 pub fn scan_response(&self, response: &serde_json::Value) -> ScanResult {
210 let text = response.to_string();
211 let findings = self.scan(&text);
212 if findings.is_empty() {
213 return ScanResult::Clean;
214 }
215 match self.action {
216 SanitizationAction::Block => ScanResult::Blocked(findings),
217 SanitizationAction::Redact => {
218 let (redacted, count) = self.redact(&text);
219 ScanResult::Redacted {
220 redacted_text: redacted,
221 redaction_count: count,
222 findings,
223 }
224 }
225 }
226 }
227}
228
229#[derive(Debug)]
230pub enum ScanResult {
231 Clean,
232 Blocked(Vec<(String, String)>),
233 Redacted {
234 redacted_text: String,
235 redaction_count: usize,
236 findings: Vec<(String, String)>,
237 },
238}
239
240fn level_ord(level: SensitivityLevel) -> u8 {
241 match level {
242 SensitivityLevel::Low => 0,
243 SensitivityLevel::Medium => 1,
244 SensitivityLevel::High => 2,
245 }
246}
247
248impl Guard for ResponseSanitizationGuard {
249 fn name(&self) -> &str {
250 "response-sanitization"
251 }
252
253 fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
254 let args_text = ctx.request.arguments.to_string();
255 let findings = self.scan(&args_text);
256 if findings.is_empty() {
257 Ok(Verdict::Allow)
258 } else {
259 Ok(Verdict::Deny)
260 }
261 }
262}
263
264pub fn build_pattern(
266 name: &str,
267 regex_str: &str,
268 level: SensitivityLevel,
269 redaction: &str,
270) -> Option<SensitivePattern> {
271 Regex::new(regex_str).ok().map(|regex| SensitivePattern {
272 name: name.to_string(),
273 regex,
274 level,
275 redaction: redaction.to_string(),
276 })
277}
278
279#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
285#[serde(rename_all = "snake_case")]
286pub enum SensitiveCategory {
287 Secret,
288 Pii,
289 Internal,
290 Custom(String),
291}
292
293#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
295#[serde(rename_all = "snake_case")]
296pub enum RedactionStrategy {
297 Mask,
299 Fingerprint,
301 Drop,
304 Tokenize,
306 Partial,
308 TypeLabel,
310 Keep,
312}
313
314#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
316pub struct Span {
317 pub start: usize,
318 pub end: usize,
319}
320
321#[derive(Clone, Debug, Serialize, Deserialize)]
323pub struct SensitiveDataFinding {
324 pub id: String,
325 pub category: SensitiveCategory,
326 pub data_type: String,
327 pub confidence: f32,
328 pub span: Span,
329 pub preview: String,
330 pub detector: String,
331 pub recommended_action: RedactionStrategy,
332}
333
334#[derive(Clone, Debug, Serialize, Deserialize)]
336pub struct Redaction {
337 pub finding_id: String,
338 pub strategy: RedactionStrategy,
339 pub original_span: Span,
340 pub replacement: String,
341}
342
343#[derive(Clone, Debug, Default, Serialize, Deserialize)]
345pub struct ProcessingStats {
346 pub input_length: usize,
347 pub output_length: usize,
348 pub findings_count: usize,
349 pub redactions_count: usize,
350}
351
352#[derive(Clone, Debug, Serialize, Deserialize)]
354pub struct SanitizationResult {
355 pub sanitized: String,
356 pub was_redacted: bool,
357 pub findings: Vec<SensitiveDataFinding>,
358 pub redactions: Vec<Redaction>,
359 pub stats: ProcessingStats,
360}
361
362#[derive(Clone, Debug, Serialize, Deserialize)]
364pub struct CategoryConfig {
365 pub secrets: bool,
366 pub pii: bool,
367 pub internal: bool,
368}
369
370impl Default for CategoryConfig {
371 fn default() -> Self {
372 Self {
373 secrets: true,
374 pii: true,
375 internal: true,
376 }
377 }
378}
379
380#[derive(Clone, Debug, Serialize, Deserialize)]
382pub struct EntropyConfig {
383 pub enabled: bool,
384 pub threshold: f64,
385 pub min_token_len: usize,
386}
387
388impl Default for EntropyConfig {
389 fn default() -> Self {
390 Self {
391 enabled: true,
392 threshold: 4.5,
393 min_token_len: 16,
394 }
395 }
396}
397
398#[derive(Clone, Debug, Default, Serialize, Deserialize)]
400pub struct AllowlistConfig {
401 pub exact: Vec<String>,
402 pub patterns: Vec<String>,
403}
404
405#[derive(Clone, Debug, Default, Serialize, Deserialize)]
407pub struct DenylistConfig {
408 pub exact: Vec<String>,
409 pub patterns: Vec<String>,
410}
411
412#[derive(Clone, Debug, Serialize, Deserialize)]
414pub struct OutputSanitizerConfig {
415 pub categories: CategoryConfig,
416 pub redaction_strategies: HashMap<SensitiveCategory, RedactionStrategy>,
417 pub entropy: EntropyConfig,
418 pub allowlist: AllowlistConfig,
419 pub denylist: DenylistConfig,
420 pub max_input_bytes: usize,
421 pub include_findings: bool,
422}
423
424impl Default for OutputSanitizerConfig {
425 fn default() -> Self {
426 let mut redaction_strategies = HashMap::new();
427 redaction_strategies.insert(SensitiveCategory::Secret, RedactionStrategy::Mask);
428 redaction_strategies.insert(SensitiveCategory::Pii, RedactionStrategy::Partial);
429 redaction_strategies.insert(SensitiveCategory::Internal, RedactionStrategy::TypeLabel);
430 Self {
431 categories: CategoryConfig::default(),
432 redaction_strategies,
433 entropy: EntropyConfig::default(),
434 allowlist: AllowlistConfig::default(),
435 denylist: DenylistConfig::default(),
436 max_input_bytes: 1_000_000,
437 include_findings: true,
438 }
439 }
440}
441
442#[derive(Debug, thiserror::Error)]
443pub enum OutputSanitizerConfigError {
444 #[error("invalid {list_name} regex `{pattern}`: {source}")]
445 InvalidPattern {
446 list_name: &'static str,
447 pattern: String,
448 #[source]
449 source: regex::Error,
450 },
451}
452
453#[derive(Clone)]
458struct CompiledPattern {
459 id: &'static str,
460 category: SensitiveCategory,
461 data_type: &'static str,
462 confidence: f32,
463 recommended: RedactionStrategy,
464 regex: Regex,
465 validator: Option<fn(&str) -> bool>,
466}
467
468fn compile_or_nomatch(pattern: &'static str) -> Regex {
469 match Regex::new(pattern) {
470 Ok(re) => re,
471 Err(err) => {
472 tracing::error!(error = %err, %pattern, "failed to compile hardcoded regex");
473 match Regex::new(r"\A\z") {
476 Ok(re) => re,
477 Err(_) => match Regex::new("") {
478 Ok(re) => re,
479 Err(_) => {
480 #[allow(clippy::unwrap_used)]
484 {
485 Regex::new("").unwrap()
486 }
487 }
488 },
489 }
490 }
491 }
492}
493
494fn compiled_patterns() -> &'static [CompiledPattern] {
495 static PATS: OnceLock<Vec<CompiledPattern>> = OnceLock::new();
496 PATS.get_or_init(|| {
497 vec![
498 CompiledPattern {
500 id: "secret_aws_access_key_id",
501 category: SensitiveCategory::Secret,
502 data_type: "aws_access_key_id",
503 confidence: 0.99,
504 recommended: RedactionStrategy::Mask,
505 regex: compile_or_nomatch(r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b"),
506 validator: None,
507 },
508 CompiledPattern {
509 id: "secret_aws_secret_access_key",
510 category: SensitiveCategory::Secret,
511 data_type: "aws_secret_access_key",
512 confidence: 0.9,
513 recommended: RedactionStrategy::Mask,
514 regex: compile_or_nomatch(
515 r"(?i)aws_secret_access_key\s*[:=]\s*[A-Za-z0-9/+=]{40}",
516 ),
517 validator: None,
518 },
519 CompiledPattern {
520 id: "secret_github_token",
521 category: SensitiveCategory::Secret,
522 data_type: "github_token",
523 confidence: 0.99,
524 recommended: RedactionStrategy::Mask,
525 regex: compile_or_nomatch(r"\bgh[pousr]_[A-Za-z0-9]{36,255}\b"),
526 validator: None,
527 },
528 CompiledPattern {
529 id: "secret_slack_token",
530 category: SensitiveCategory::Secret,
531 data_type: "slack_token",
532 confidence: 0.99,
533 recommended: RedactionStrategy::Mask,
534 regex: compile_or_nomatch(r"\bxox[abopsr]-[A-Za-z0-9-]{10,}\b"),
535 validator: None,
536 },
537 CompiledPattern {
538 id: "secret_slack_webhook",
539 category: SensitiveCategory::Secret,
540 data_type: "slack_webhook",
541 confidence: 0.95,
542 recommended: RedactionStrategy::Mask,
543 regex: compile_or_nomatch(
544 r"https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[A-Za-z0-9]+",
545 ),
546 validator: None,
547 },
548 CompiledPattern {
549 id: "secret_gcp_service_account",
550 category: SensitiveCategory::Secret,
551 data_type: "gcp_service_account_json",
552 confidence: 0.97,
553 recommended: RedactionStrategy::Drop,
554 regex: compile_or_nomatch(r#""type"\s*:\s*"service_account""#),
555 validator: None,
556 },
557 CompiledPattern {
558 id: "secret_pem_private_key",
559 category: SensitiveCategory::Secret,
560 data_type: "pem_private_key",
561 confidence: 0.99,
562 recommended: RedactionStrategy::Mask,
563 regex: compile_or_nomatch(
564 r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----",
565 ),
566 validator: None,
567 },
568 CompiledPattern {
569 id: "secret_jwt",
570 category: SensitiveCategory::Secret,
571 data_type: "jwt",
572 confidence: 0.85,
573 recommended: RedactionStrategy::Mask,
574 regex: compile_or_nomatch(
575 r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b",
576 ),
577 validator: None,
578 },
579 CompiledPattern {
580 id: "secret_oauth_bearer",
581 category: SensitiveCategory::Secret,
582 data_type: "oauth_bearer",
583 confidence: 0.85,
584 recommended: RedactionStrategy::Mask,
585 regex: compile_or_nomatch(
586 r"(?i)\b(?:authorization|auth)\s*:\s*bearer\s+[A-Za-z0-9._~+/=-]{16,}",
587 ),
588 validator: None,
589 },
590 CompiledPattern {
591 id: "secret_password_assignment",
592 category: SensitiveCategory::Secret,
593 data_type: "password",
594 confidence: 0.7,
595 recommended: RedactionStrategy::Mask,
596 regex: compile_or_nomatch(
597 r"(?i)\b(?:password|passwd|pwd|secret)\s*[:=]\s*\S{6,}",
598 ),
599 validator: None,
600 },
601 CompiledPattern {
603 id: "pii_ssn",
604 category: SensitiveCategory::Pii,
605 data_type: "ssn",
606 confidence: 0.9,
607 recommended: RedactionStrategy::Mask,
608 regex: compile_or_nomatch(r"\b\d{3}-\d{2}-\d{4}\b"),
609 validator: Some(is_valid_ssn_fragments),
610 },
611 CompiledPattern {
612 id: "pii_ssn_compact",
613 category: SensitiveCategory::Pii,
614 data_type: "ssn",
615 confidence: 0.7,
616 recommended: RedactionStrategy::Mask,
617 regex: compile_or_nomatch(r"(?:^|[^0-9])(\d{9})(?:$|[^0-9])"),
618 validator: Some(is_valid_ssn_compact),
619 },
620 CompiledPattern {
621 id: "pii_credit_card",
622 category: SensitiveCategory::Pii,
623 data_type: "credit_card",
624 confidence: 0.9,
625 recommended: RedactionStrategy::Mask,
626 regex: compile_or_nomatch(r"\b(?:\d[ -]*?){13,19}\b"),
627 validator: Some(is_luhn_valid_card_number),
628 },
629 CompiledPattern {
630 id: "pii_email",
631 category: SensitiveCategory::Pii,
632 data_type: "email",
633 confidence: 0.95,
634 recommended: RedactionStrategy::Partial,
635 regex: compile_or_nomatch(
636 r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b",
637 ),
638 validator: None,
639 },
640 CompiledPattern {
642 id: "internal_private_ip",
643 category: SensitiveCategory::Internal,
644 data_type: "internal_ip",
645 confidence: 0.8,
646 recommended: RedactionStrategy::TypeLabel,
647 regex: compile_or_nomatch(
648 r"\b(?:10|192\.168|172\.(?:1[6-9]|2[0-9]|3[0-1]))\.[0-9]{1,3}\.[0-9]{1,3}\b",
649 ),
650 validator: None,
651 },
652 ]
653 })
654}
655
656fn shannon_entropy_ascii(token: &str) -> Option<f64> {
661 if !token.is_ascii() {
662 return None;
663 }
664 let bytes = token.as_bytes();
665 if bytes.is_empty() {
666 return None;
667 }
668 let mut counts = [0u32; 256];
669 for &b in bytes {
670 counts[b as usize] = counts[b as usize].saturating_add(1);
671 }
672 let len = bytes.len() as f64;
673 let mut entropy = 0.0f64;
674 for &c in &counts {
675 if c == 0 {
676 continue;
677 }
678 let p = c as f64 / len;
679 entropy -= p * p.log2();
680 }
681 Some(entropy)
682}
683
684fn is_candidate_secret_token(token: &str) -> bool {
685 token
686 .bytes()
687 .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'+' | b'/' | b'=' | b'-' | b'_'))
688}
689
690fn is_luhn_valid_card_number(text: &str) -> bool {
691 let digits: Vec<u8> = text
692 .bytes()
693 .filter(|b| b.is_ascii_digit())
694 .map(|b| b - b'0')
695 .collect();
696 if !(13..=19).contains(&digits.len()) {
697 return false;
698 }
699 if digits.iter().all(|d| *d == digits[0]) {
700 return false;
701 }
702 let mut sum: u32 = 0;
703 let mut double = false;
704 for d in digits.iter().rev() {
705 let mut v = u32::from(*d);
706 if double {
707 v *= 2;
708 if v > 9 {
709 v -= 9;
710 }
711 }
712 sum = sum.saturating_add(v);
713 double = !double;
714 }
715 sum.is_multiple_of(10)
716}
717
718fn is_valid_ssn_fragments(text: &str) -> bool {
719 let parts: Vec<&str> = text.split('-').collect();
720 if parts.len() != 3 {
721 return false;
722 }
723 let area: u32 = parts[0].parse().unwrap_or(0);
724 let group: u32 = parts[1].parse().unwrap_or(0);
725 let serial: u32 = parts[2].parse().unwrap_or(0);
726 if area == 0 || area == 666 || (900..=999).contains(&area) {
727 return false;
728 }
729 if group == 0 || serial == 0 {
730 return false;
731 }
732 true
733}
734
735fn is_valid_ssn_compact(text: &str) -> bool {
736 let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
737 if digits.len() != 9 {
738 return false;
739 }
740 let area: u32 = digits.get(0..3).and_then(|s| s.parse().ok()).unwrap_or(0);
741 let group: u32 = digits.get(3..5).and_then(|s| s.parse().ok()).unwrap_or(0);
742 let serial: u32 = digits.get(5..9).and_then(|s| s.parse().ok()).unwrap_or(0);
743 if area == 0 || area == 666 || (900..=999).contains(&area) {
744 return false;
745 }
746 if group == 0 || serial == 0 {
747 return false;
748 }
749 true
750}
751
752fn preview_redacted(s: &str) -> String {
753 let len = s.chars().count();
754 if len <= 4 {
755 return "*".repeat(len);
756 }
757 let prefix: String = s.chars().take(2).collect();
758 let suffix_chars: Vec<char> = s.chars().rev().take(2).collect();
759 let suffix: String = suffix_chars.into_iter().rev().collect();
760 format!("{prefix}***{suffix}")
761}
762
763fn truncate_to_char_boundary(text: &str, max_bytes: usize) -> (&str, bool) {
764 if text.len() <= max_bytes {
765 return (text, false);
766 }
767 let mut end = max_bytes.min(text.len());
768 while end > 0 && !text.is_char_boundary(end) {
769 end = end.saturating_sub(1);
770 }
771 (&text[..end], end < text.len())
772}
773
774fn fingerprint(s: &str) -> String {
775 let mut hasher = Sha256::new();
776 hasher.update(s.as_bytes());
777 let digest = hasher.finalize();
778 let mut out = String::with_capacity(16);
779 for b in digest.iter().take(8) {
780 out.push_str(&format!("{b:02x}"));
781 }
782 out
783}
784
785#[derive(Debug, Default)]
791pub struct TokenVault {
792 inner: Mutex<TokenVaultInner>,
793}
794
795#[derive(Debug, Default)]
796struct TokenVaultInner {
797 counter: u64,
798 map: HashMap<String, String>,
799}
800
801impl TokenVault {
802 pub fn new() -> Self {
803 Self::default()
804 }
805
806 pub fn insert(&self, value: &str) -> String {
807 let mut inner = match self.inner.lock() {
808 Ok(g) => g,
809 Err(poisoned) => poisoned.into_inner(),
810 };
811 inner.counter = inner.counter.saturating_add(1);
812 let fp = fingerprint(value);
813 let id = format!("tok_{}_{}", inner.counter, fp);
814 inner.map.insert(id.clone(), value.to_string());
815 id
816 }
817
818 pub fn get(&self, token: &str) -> Option<String> {
819 let inner = match self.inner.lock() {
820 Ok(g) => g,
821 Err(poisoned) => poisoned.into_inner(),
822 };
823 inner.map.get(token).cloned()
824 }
825
826 pub fn len(&self) -> usize {
827 let inner = match self.inner.lock() {
828 Ok(g) => g,
829 Err(poisoned) => poisoned.into_inner(),
830 };
831 inner.map.len()
832 }
833
834 pub fn is_empty(&self) -> bool {
835 self.len() == 0
836 }
837}
838
839pub struct OutputSanitizer {
845 config: OutputSanitizerConfig,
846 allowlist_patterns: Vec<Regex>,
847 denylist_patterns: Vec<(String, Regex)>,
848 token_vault: Arc<TokenVault>,
849}
850
851impl std::fmt::Debug for OutputSanitizer {
852 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
853 f.debug_struct("OutputSanitizer")
854 .field("config", &self.config)
855 .field("allowlist_patterns", &self.allowlist_patterns.len())
856 .field("denylist_patterns", &self.denylist_patterns.len())
857 .finish()
858 }
859}
860
861impl Default for OutputSanitizer {
862 fn default() -> Self {
863 Self::new()
864 }
865}
866
867impl Clone for OutputSanitizer {
868 fn clone(&self) -> Self {
869 Self {
870 config: self.config.clone(),
871 allowlist_patterns: self.allowlist_patterns.clone(),
872 denylist_patterns: self.denylist_patterns.clone(),
873 token_vault: self.token_vault.clone(),
874 }
875 }
876}
877
878impl OutputSanitizer {
879 pub fn new() -> Self {
880 match Self::with_config(OutputSanitizerConfig::default()) {
881 Ok(sanitizer) => sanitizer,
882 Err(error) => panic!("default output sanitizer config should be valid: {error}"),
883 }
884 }
885
886 pub fn with_config(config: OutputSanitizerConfig) -> Result<Self, OutputSanitizerConfigError> {
887 let allowlist_patterns = config
888 .allowlist
889 .patterns
890 .iter()
891 .map(|pattern| {
892 Regex::new(pattern).map_err(|source| OutputSanitizerConfigError::InvalidPattern {
893 list_name: "allowlist",
894 pattern: pattern.clone(),
895 source,
896 })
897 })
898 .collect::<Result<Vec<_>, _>>()?;
899 let denylist_patterns = config
900 .denylist
901 .patterns
902 .iter()
903 .map(|pattern| {
904 Regex::new(pattern)
905 .map(|re| {
906 let id = format!("denylist_{}", fingerprint(pattern));
907 (id, re)
908 })
909 .map_err(|source| OutputSanitizerConfigError::InvalidPattern {
910 list_name: "denylist",
911 pattern: pattern.clone(),
912 source,
913 })
914 })
915 .collect::<Result<Vec<_>, _>>()?;
916
917 Ok(Self {
918 config,
919 allowlist_patterns,
920 denylist_patterns,
921 token_vault: Arc::new(TokenVault::new()),
922 })
923 }
924
925 pub fn token_vault(&self) -> Arc<TokenVault> {
926 self.token_vault.clone()
927 }
928
929 pub fn config(&self) -> &OutputSanitizerConfig {
930 &self.config
931 }
932
933 fn is_allowlisted(&self, s: &str) -> bool {
934 if self.config.allowlist.exact.iter().any(|x| x == s) {
935 return true;
936 }
937 self.allowlist_patterns.iter().any(|re| re.is_match(s))
938 }
939
940 pub fn sanitize_text(&self, input: &str) -> SanitizationResult {
942 let (limited, truncated) = truncate_to_char_boundary(input, self.config.max_input_bytes);
943
944 let mut findings: Vec<SensitiveDataFinding> = Vec::new();
945
946 for needle in &self.config.denylist.exact {
948 if needle.is_empty() {
949 continue;
950 }
951 let mut start = 0usize;
952 while let Some(pos) = limited[start..].find(needle.as_str()) {
953 let s = start + pos;
954 let e = s + needle.len();
955 findings.push(SensitiveDataFinding {
956 id: format!("denylist_exact_{}", fingerprint(needle)),
957 category: SensitiveCategory::Secret,
958 data_type: "denylist".to_string(),
959 confidence: 1.0,
960 span: Span { start: s, end: e },
961 preview: preview_redacted(needle),
962 detector: "denylist".to_string(),
963 recommended_action: RedactionStrategy::Mask,
964 });
965 start = e;
966 }
967 }
968 for (id, re) in &self.denylist_patterns {
969 for m in re.find_iter(limited) {
970 findings.push(SensitiveDataFinding {
971 id: id.clone(),
972 category: SensitiveCategory::Secret,
973 data_type: "denylist".to_string(),
974 confidence: 0.95,
975 span: Span {
976 start: m.start(),
977 end: m.end(),
978 },
979 preview: preview_redacted(m.as_str()),
980 detector: "denylist".to_string(),
981 recommended_action: RedactionStrategy::Mask,
982 });
983 }
984 }
985
986 for p in compiled_patterns() {
988 let enabled = match p.category {
989 SensitiveCategory::Secret => self.config.categories.secrets,
990 SensitiveCategory::Pii => self.config.categories.pii,
991 SensitiveCategory::Internal => self.config.categories.internal,
992 SensitiveCategory::Custom(_) => true,
993 };
994 if !enabled {
995 continue;
996 }
997 for m in p.regex.find_iter(limited) {
998 let raw = m.as_str();
999 if let Some(validator) = p.validator {
1000 if !validator(raw) {
1001 continue;
1002 }
1003 }
1004 if self.is_allowlisted(raw) {
1005 continue;
1006 }
1007 let (span_start, span_end) = if p.id == "pii_ssn_compact" {
1009 let bytes = limited.as_bytes();
1010 let mut s = m.start();
1011 while s < m.end() && !bytes[s].is_ascii_digit() {
1012 s += 1;
1013 }
1014 let mut e = m.end();
1015 while e > s && !bytes[e - 1].is_ascii_digit() {
1016 e -= 1;
1017 }
1018 (s, e)
1019 } else {
1020 (m.start(), m.end())
1021 };
1022 if span_start >= span_end {
1023 continue;
1024 }
1025 let slice = &limited[span_start..span_end];
1026 findings.push(SensitiveDataFinding {
1027 id: p.id.to_string(),
1028 category: p.category.clone(),
1029 data_type: p.data_type.to_string(),
1030 confidence: p.confidence,
1031 span: Span {
1032 start: span_start,
1033 end: span_end,
1034 },
1035 preview: preview_redacted(slice),
1036 detector: "pattern".to_string(),
1037 recommended_action: p.recommended.clone(),
1038 });
1039 }
1040 }
1041
1042 if self.config.categories.secrets && self.config.entropy.enabled {
1044 static TOKEN_RE: OnceLock<Regex> = OnceLock::new();
1045 let token_re = TOKEN_RE.get_or_init(|| compile_or_nomatch(r"[A-Za-z0-9+/=_-]{16,}"));
1046 for m in token_re.find_iter(limited) {
1047 let token = m.as_str();
1048 if token.len() < self.config.entropy.min_token_len {
1049 continue;
1050 }
1051 if self.is_allowlisted(token) {
1052 continue;
1053 }
1054 if !is_candidate_secret_token(token) {
1055 continue;
1056 }
1057 let ent = match shannon_entropy_ascii(token) {
1058 Some(e) => e,
1059 None => continue,
1060 };
1061 if ent < self.config.entropy.threshold {
1062 continue;
1063 }
1064 findings.push(SensitiveDataFinding {
1065 id: "secret_high_entropy_token".to_string(),
1066 category: SensitiveCategory::Secret,
1067 data_type: "high_entropy_token".to_string(),
1068 confidence: 0.6,
1069 span: Span {
1070 start: m.start(),
1071 end: m.end(),
1072 },
1073 preview: preview_redacted(token),
1074 detector: "entropy".to_string(),
1075 recommended_action: RedactionStrategy::Mask,
1076 });
1077 }
1078 }
1079
1080 findings.sort_by(|a, b| {
1081 a.span
1082 .start
1083 .cmp(&b.span.start)
1084 .then_with(|| b.span.end.cmp(&a.span.end))
1085 });
1086
1087 let merged = resolve_overlaps(&findings, &self.config.redaction_strategies);
1088
1089 let mut sanitized = limited.to_string();
1090 let mut redactions: Vec<Redaction> = Vec::new();
1091 let mut applied_any = false;
1092
1093 let mut merged_desc = merged;
1095 merged_desc.sort_by(|a, b| b.0.start.cmp(&a.0.start).then(b.0.end.cmp(&a.0.end)));
1096
1097 for (span, strategy, category, data_type, finding_id) in merged_desc {
1098 if span.end > sanitized.len() || span.start >= span.end {
1099 continue;
1100 }
1101 if !sanitized.is_char_boundary(span.start) || !sanitized.is_char_boundary(span.end) {
1102 continue;
1103 }
1104 let raw = &sanitized[span.start..span.end];
1105 let replacement = self.replacement_for(&strategy, &category, &data_type, raw);
1106 if replacement == raw {
1107 continue;
1108 }
1109 sanitized.replace_range(span.start..span.end, &replacement);
1110 applied_any = true;
1111 redactions.push(Redaction {
1112 finding_id,
1113 strategy,
1114 original_span: span,
1115 replacement,
1116 });
1117 }
1118
1119 if truncated {
1120 sanitized.push_str("\n[TRUNCATED_UNSCANNED_OUTPUT]");
1121 applied_any = true;
1122 }
1123
1124 let stats = ProcessingStats {
1125 input_length: input.len(),
1126 output_length: sanitized.len(),
1127 findings_count: findings.len(),
1128 redactions_count: redactions.len(),
1129 };
1130
1131 let mut result = SanitizationResult {
1132 sanitized,
1133 was_redacted: applied_any,
1134 findings,
1135 redactions,
1136 stats,
1137 };
1138 if !self.config.include_findings {
1139 result.findings.clear();
1140 }
1141 result
1142 }
1143
1144 fn replacement_for(
1145 &self,
1146 strategy: &RedactionStrategy,
1147 category: &SensitiveCategory,
1148 data_type: &str,
1149 raw: &str,
1150 ) -> String {
1151 match strategy {
1152 RedactionStrategy::Keep => raw.to_string(),
1153 RedactionStrategy::Mask => "****".to_string(),
1154 RedactionStrategy::Fingerprint => format!("[FP:{}]", fingerprint(raw)),
1155 RedactionStrategy::Drop => String::new(),
1156 RedactionStrategy::Tokenize => {
1157 let id = self.token_vault.insert(raw);
1158 format!("[TOKEN:{id}]")
1159 }
1160 RedactionStrategy::Partial => preview_redacted(raw),
1161 RedactionStrategy::TypeLabel => match category {
1162 SensitiveCategory::Secret | SensitiveCategory::Pii => {
1163 format!("[REDACTED:{data_type}]")
1164 }
1165 SensitiveCategory::Internal => "[REDACTED:internal]".to_string(),
1166 SensitiveCategory::Custom(label) => format!("[REDACTED:{label}]"),
1167 },
1168 }
1169 }
1170
1171 pub fn sanitize_value(&self, value: &serde_json::Value) -> SanitizedValue {
1175 let mut findings: Vec<SensitiveDataFinding> = Vec::new();
1176 let mut redactions: Vec<Redaction> = Vec::new();
1177 let mut was_redacted = false;
1178 let sanitized =
1179 self.sanitize_value_inner(value, &mut findings, &mut redactions, &mut was_redacted);
1180 SanitizedValue {
1181 value: sanitized,
1182 findings,
1183 redactions,
1184 was_redacted,
1185 }
1186 }
1187
1188 fn sanitize_value_inner(
1189 &self,
1190 value: &serde_json::Value,
1191 findings: &mut Vec<SensitiveDataFinding>,
1192 redactions: &mut Vec<Redaction>,
1193 was_redacted: &mut bool,
1194 ) -> serde_json::Value {
1195 use serde_json::Value as V;
1196 match value {
1197 V::Null | V::Bool(_) | V::Number(_) => value.clone(),
1198 V::String(s) => {
1199 let r = self.sanitize_text(s);
1200 if r.was_redacted {
1201 *was_redacted = true;
1202 if r.sanitized.is_empty()
1206 && r.redactions.len() == 1
1207 && matches!(r.redactions[0].strategy, RedactionStrategy::Drop)
1208 {
1209 findings.extend(r.findings);
1210 redactions.extend(r.redactions);
1211 return V::Null;
1212 }
1213 }
1214 findings.extend(r.findings);
1215 redactions.extend(r.redactions);
1216 V::String(r.sanitized)
1217 }
1218 V::Array(items) => {
1219 let new_items: Vec<serde_json::Value> = items
1220 .iter()
1221 .map(|v| self.sanitize_value_inner(v, findings, redactions, was_redacted))
1222 .collect();
1223 V::Array(new_items)
1224 }
1225 V::Object(map) => {
1226 if let Some((finding, redaction)) = detect_service_account_object(map) {
1227 *was_redacted = true;
1228 findings.push(finding);
1229 redactions.push(redaction);
1230 return V::Null;
1231 }
1232 let mut new_map = serde_json::Map::with_capacity(map.len());
1233 for (k, v) in map {
1234 let sv = self.sanitize_value_inner(v, findings, redactions, was_redacted);
1235 new_map.insert(k.clone(), sv);
1236 }
1237 V::Object(new_map)
1238 }
1239 }
1240 }
1241}
1242
1243#[derive(Debug, Clone)]
1245pub struct SanitizedValue {
1246 pub value: serde_json::Value,
1247 pub findings: Vec<SensitiveDataFinding>,
1248 pub redactions: Vec<Redaction>,
1249 pub was_redacted: bool,
1250}
1251
1252fn strategy_rank(s: &RedactionStrategy) -> u8 {
1257 match s {
1258 RedactionStrategy::Keep => 0,
1259 RedactionStrategy::Partial => 1,
1260 RedactionStrategy::TypeLabel => 2,
1261 RedactionStrategy::Fingerprint => 3,
1262 RedactionStrategy::Tokenize => 4,
1263 RedactionStrategy::Mask => 5,
1264 RedactionStrategy::Drop => 6,
1265 }
1266}
1267
1268type ResolvedSpan = (Span, RedactionStrategy, SensitiveCategory, String, String);
1269
1270fn resolve_overlaps(
1271 findings: &[SensitiveDataFinding],
1272 defaults: &HashMap<SensitiveCategory, RedactionStrategy>,
1273) -> Vec<ResolvedSpan> {
1274 let mut spans: Vec<ResolvedSpan> = Vec::with_capacity(findings.len());
1275 for f in findings {
1276 let strategy = match &f.recommended_action {
1283 RedactionStrategy::Keep => RedactionStrategy::Keep,
1284 RedactionStrategy::Drop
1285 | RedactionStrategy::Fingerprint
1286 | RedactionStrategy::Tokenize => f.recommended_action.clone(),
1287 _ => defaults
1288 .get(&f.category)
1289 .cloned()
1290 .unwrap_or_else(|| f.recommended_action.clone()),
1291 };
1292 spans.push((
1293 f.span,
1294 strategy,
1295 f.category.clone(),
1296 f.data_type.clone(),
1297 f.id.clone(),
1298 ));
1299 }
1300
1301 spans.sort_by(|a, b| {
1302 a.0.start
1303 .cmp(&b.0.start)
1304 .then_with(|| b.0.end.cmp(&a.0.end))
1305 });
1306
1307 let mut merged: Vec<ResolvedSpan> = Vec::new();
1308 for current in spans {
1309 if let Some(last) = merged.last_mut() {
1310 if current.0.start < last.0.end {
1311 let new_end = last.0.end.max(current.0.end);
1312 last.0.end = new_end;
1313 if strategy_rank(¤t.1) > strategy_rank(&last.1) {
1314 last.1 = current.1;
1315 last.2 = current.2;
1316 last.3 = current.3;
1317 last.4 = current.4;
1318 }
1319 continue;
1320 }
1321 }
1322 merged.push(current);
1323 }
1324 merged
1325}
1326
1327fn detect_service_account_object(
1328 map: &serde_json::Map<String, serde_json::Value>,
1329) -> Option<(SensitiveDataFinding, Redaction)> {
1330 let value = map.get("type")?.as_str()?;
1331 if !value.eq_ignore_ascii_case("service_account") {
1332 return None;
1333 }
1334
1335 let span = Span { start: 0, end: 0 };
1336 let finding = SensitiveDataFinding {
1337 id: "secret_gcp_service_account".to_string(),
1338 category: SensitiveCategory::Secret,
1339 data_type: "gcp_service_account_json".to_string(),
1340 confidence: 0.97,
1341 span,
1342 preview: preview_redacted(value),
1343 detector: "object".to_string(),
1344 recommended_action: RedactionStrategy::Drop,
1345 };
1346 let redaction = Redaction {
1347 finding_id: finding.id.clone(),
1348 strategy: RedactionStrategy::Drop,
1349 original_span: span,
1350 replacement: String::new(),
1351 };
1352 Some((finding, redaction))
1353}
1354
1355#[cfg(test)]
1360mod tests {
1361 use super::*;
1362
1363 #[test]
1366 fn guard_name() {
1367 let guard =
1368 ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1369 assert_eq!(guard.name(), "response-sanitization");
1370 }
1371
1372 #[test]
1373 fn detects_ssn() {
1374 let guard =
1375 ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1376 let findings = guard.scan("My SSN is 123-45-6789");
1377 assert!(!findings.is_empty());
1378 assert!(findings.iter().any(|(name, _)| name == "SSN"));
1379 }
1380
1381 #[test]
1382 fn detects_email() {
1383 let guard =
1384 ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1385 let findings = guard.scan("Contact john@example.com for info");
1386 assert!(findings.iter().any(|(name, _)| name == "email"));
1387 }
1388
1389 #[test]
1390 fn detects_mrn() {
1391 let guard =
1392 ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Block);
1393 let findings = guard.scan("Patient MRN: 123456789");
1394 assert!(findings.iter().any(|(name, _)| name == "MRN"));
1395 }
1396
1397 #[test]
1398 fn no_findings_on_clean_text() {
1399 let guard =
1400 ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1401 let findings = guard.scan("This is perfectly clean text with no PII.");
1402 assert!(findings.is_empty());
1403 }
1404
1405 #[test]
1406 fn respects_minimum_sensitivity() {
1407 let guard =
1408 ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1409 let findings = guard.scan("Contact john@example.com");
1410 assert!(!findings.iter().any(|(name, _)| name == "email"));
1411 let findings2 = guard.scan("SSN 123-45-6789");
1412 assert!(findings2.iter().any(|(name, _)| name == "SSN"));
1413 }
1414
1415 #[test]
1416 fn redacts_ssn() {
1417 let guard =
1418 ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Redact);
1419 let (redacted, count) = guard.redact("SSN is 123-45-6789 please");
1420 assert!(redacted.contains("[SSN REDACTED]"));
1421 assert!(!redacted.contains("123-45-6789"));
1422 assert!(count > 0);
1423 }
1424
1425 #[test]
1426 fn redacts_email() {
1427 let guard =
1428 ResponseSanitizationGuard::new(SensitivityLevel::Low, SanitizationAction::Redact);
1429 let (redacted, _) = guard.redact("Email: jane@example.com");
1430 assert!(redacted.contains("[EMAIL REDACTED]"));
1431 assert!(!redacted.contains("jane@example.com"));
1432 }
1433
1434 #[test]
1435 fn scan_response_clean() {
1436 let guard =
1437 ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1438 let response = serde_json::json!({"status": "ok", "data": "nothing sensitive"});
1439 let result = guard.scan_response(&response);
1440 assert!(matches!(result, ScanResult::Clean));
1441 }
1442
1443 #[test]
1444 fn scan_response_blocked() {
1445 let guard =
1446 ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1447 let response = serde_json::json!({"patient": "SSN: 123-45-6789"});
1448 let result = guard.scan_response(&response);
1449 assert!(matches!(result, ScanResult::Blocked(_)));
1450 }
1451
1452 #[test]
1453 fn scan_response_redacted() {
1454 let guard =
1455 ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Redact);
1456 let response = serde_json::json!({"patient": "SSN: 123-45-6789"});
1457 let result = guard.scan_response(&response);
1458 match result {
1459 ScanResult::Redacted { redacted_text, .. } => {
1460 assert!(redacted_text.contains("[SSN REDACTED]"));
1461 }
1462 _ => panic!("expected Redacted result"),
1463 }
1464 }
1465
1466 #[test]
1467 fn guard_evaluate_denies_args_with_pii() {
1468 let guard =
1469 ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1470
1471 let kp = chio_core::crypto::Keypair::generate();
1472 let scope = chio_core::capability::ChioScope::default();
1473 let agent_id = kp.public_key().to_hex();
1474 let server_id = "srv".to_string();
1475
1476 let cap_body = chio_core::capability::CapabilityTokenBody {
1477 id: "cap-test".to_string(),
1478 issuer: kp.public_key(),
1479 subject: kp.public_key(),
1480 scope: scope.clone(),
1481 issued_at: 0,
1482 expires_at: u64::MAX,
1483 delegation_chain: vec![],
1484 };
1485 let cap = chio_core::capability::CapabilityToken::sign(cap_body, &kp).expect("sign cap");
1486
1487 let request = chio_kernel::ToolCallRequest {
1488 request_id: "req-test".to_string(),
1489 capability: cap,
1490 tool_name: "write_file".to_string(),
1491 server_id: server_id.clone(),
1492 agent_id: agent_id.clone(),
1493 arguments: serde_json::json!({"content": "SSN is 123-45-6789"}),
1494 dpop_proof: None,
1495 governed_intent: None,
1496 approval_token: None,
1497 model_metadata: None,
1498 federated_origin_kernel_id: None,
1499 };
1500
1501 let ctx = chio_kernel::GuardContext {
1502 request: &request,
1503 scope: &scope,
1504 agent_id: &agent_id,
1505 server_id: &server_id,
1506 session_filesystem_roots: None,
1507 matched_grant_index: None,
1508 };
1509
1510 assert_eq!(guard.evaluate(&ctx).expect("ok"), Verdict::Deny);
1511 }
1512
1513 #[test]
1514 fn guard_evaluate_allows_clean_args() {
1515 let guard =
1516 ResponseSanitizationGuard::new(SensitivityLevel::High, SanitizationAction::Block);
1517
1518 let kp = chio_core::crypto::Keypair::generate();
1519 let scope = chio_core::capability::ChioScope::default();
1520 let agent_id = kp.public_key().to_hex();
1521 let server_id = "srv".to_string();
1522
1523 let cap_body = chio_core::capability::CapabilityTokenBody {
1524 id: "cap-test".to_string(),
1525 issuer: kp.public_key(),
1526 subject: kp.public_key(),
1527 scope: scope.clone(),
1528 issued_at: 0,
1529 expires_at: u64::MAX,
1530 delegation_chain: vec![],
1531 };
1532 let cap = chio_core::capability::CapabilityToken::sign(cap_body, &kp).expect("sign cap");
1533
1534 let request = chio_kernel::ToolCallRequest {
1535 request_id: "req-test".to_string(),
1536 capability: cap,
1537 tool_name: "read_file".to_string(),
1538 server_id: server_id.clone(),
1539 agent_id: agent_id.clone(),
1540 arguments: serde_json::json!({"path": "/app/src/main.rs"}),
1541 dpop_proof: None,
1542 governed_intent: None,
1543 approval_token: None,
1544 model_metadata: None,
1545 federated_origin_kernel_id: None,
1546 };
1547
1548 let ctx = chio_kernel::GuardContext {
1549 request: &request,
1550 scope: &scope,
1551 agent_id: &agent_id,
1552 server_id: &server_id,
1553 session_filesystem_roots: None,
1554 matched_grant_index: None,
1555 };
1556
1557 assert_eq!(guard.evaluate(&ctx).expect("ok"), Verdict::Allow);
1558 }
1559
1560 #[test]
1561 fn custom_pattern() {
1562 let pattern = build_pattern(
1563 "custom-id",
1564 r"\bCUST-\d{8}\b",
1565 SensitivityLevel::High,
1566 "[CUST-ID REDACTED]",
1567 );
1568 assert!(pattern.is_some());
1569
1570 let guard = ResponseSanitizationGuard::with_patterns(
1571 vec![pattern.unwrap()],
1572 SensitivityLevel::High,
1573 SanitizationAction::Block,
1574 );
1575 let findings = guard.scan("Customer CUST-12345678 record");
1576 assert!(!findings.is_empty());
1577 assert!(findings.iter().any(|(name, _)| name == "custom-id"));
1578 }
1579
1580 #[test]
1583 fn luhn_rejects_random_16_digit_number() {
1584 assert!(!is_luhn_valid_card_number("1234567890123456"));
1585 assert!(is_luhn_valid_card_number("4111 1111 1111 1111"));
1587 assert!(!is_luhn_valid_card_number("4111 1111 1111 1112"));
1589 }
1590
1591 #[test]
1592 fn shannon_entropy_basic() {
1593 let e = shannon_entropy_ascii("aaaaaa").unwrap();
1594 assert!(e < 0.01);
1595 let e2 = shannon_entropy_ascii("abcdefghij0123456789").unwrap();
1596 assert!(e2 > 4.0);
1597 }
1598
1599 #[test]
1600 fn ssn_fragments_validator_rejects_invalid_areas() {
1601 assert!(!is_valid_ssn_fragments("000-12-3456"));
1602 assert!(!is_valid_ssn_fragments("666-12-3456"));
1603 assert!(!is_valid_ssn_fragments("900-12-3456"));
1604 assert!(!is_valid_ssn_fragments("123-00-4567"));
1605 assert!(!is_valid_ssn_fragments("123-45-0000"));
1606 assert!(is_valid_ssn_fragments("123-45-6789"));
1607 }
1608}