1use std::borrow::Cow;
14
15use once_cell::sync::Lazy;
16use regex::{Regex, RegexSet};
17
18const REDACTED: &str = "[REDACTED]";
20
21struct SecretPattern {
23 pattern: &'static str,
24 regex: Regex,
25}
26
27static SECRET_PATTERNS: Lazy<Vec<SecretPattern>> = Lazy::new(|| {
29 vec![
30 SecretPattern {
32 pattern: r"\bAKIA[0-9A-Z]{16}\b",
33 regex: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").expect("aws access key regex"),
34 },
35 SecretPattern {
37 pattern: r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
38 regex: Regex::new(
39 r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
40 )
41 .expect("aws secret regex"),
42 },
43 SecretPattern {
45 pattern: r"\bgh[pousr]_[A-Za-z0-9]{36}\b",
46 regex: Regex::new(r"\bgh[pousr]_[A-Za-z0-9]{36}\b").expect("github pat regex"),
47 },
48 SecretPattern {
50 pattern: r"\bsk-[A-Za-z0-9]{20,}\b",
51 regex: Regex::new(r"\bsk-[A-Za-z0-9]{20,}\b").expect("openai key regex"),
52 },
53 SecretPattern {
55 pattern: r"\bsk-ant-[A-Za-z0-9]{20,}\b",
56 regex: Regex::new(r"\bsk-ant-[A-Za-z0-9]{20,}\b").expect("anthropic key regex"),
57 },
58 SecretPattern {
60 pattern: r"(?i)Bearer\s+[A-Za-z0-9_\-.]{20,}",
61 regex: Regex::new(r"(?i)Bearer\s+[A-Za-z0-9_\-.]{20,}").expect("bearer token regex"),
62 },
63 SecretPattern {
65 pattern: r"\beyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b",
66 regex: Regex::new(r"\beyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b")
67 .expect("jwt regex"),
68 },
69 SecretPattern {
71 pattern: r"-----BEGIN (?:RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY-----",
72 regex: Regex::new(r"-----BEGIN (?:RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY-----")
73 .expect("private key regex"),
74 },
75 SecretPattern {
77 pattern: r"(?i)\b(postgres|postgresql|mysql|mongodb|redis)://[^\s]{8,}",
78 regex: Regex::new(
79 r"(?i)\b(postgres|postgresql|mysql|mongodb|redis)://[^\s]{8,}",
80 )
81 .expect("db url regex"),
82 },
83 SecretPattern {
85 pattern: r#"(?i)(api[_-]?key|api[_-]?secret|auth[_-]?token|access[_-]?token|secret[_-]?key|password|passwd)\s*[:=]\s*['"]?[A-Za-z0-9_\-/+=]{8,}['"]?"#,
86 regex: Regex::new(
87 r#"(?i)(api[_-]?key|api[_-]?secret|auth[_-]?token|access[_-]?token|secret[_-]?key|password|passwd)\s*[:=]\s*['"]?[A-Za-z0-9_\-/+=]{8,}['"]?"#,
88 )
89 .expect("generic api key regex"),
90 },
91 SecretPattern {
93 pattern: r"\bxox[bpsar]-[A-Za-z0-9\-]{10,}",
94 regex: Regex::new(r"\bxox[bpsar]-[A-Za-z0-9\-]{10,}").expect("slack token regex"),
95 },
96 SecretPattern {
98 pattern: r"\b[spr]k_live_[A-Za-z0-9]{20,}",
99 regex: Regex::new(r"\b[spr]k_live_[A-Za-z0-9]{20,}").expect("stripe key regex"),
100 },
101 ]
102});
103
104static SECRET_REGEX_SET: Lazy<RegexSet> = Lazy::new(|| {
107 RegexSet::new(SECRET_PATTERNS.iter().map(|pattern| pattern.pattern)).expect("secret regex set")
108});
109
110pub fn redact_text(input: &str) -> Cow<'_, str> {
114 let matches = SECRET_REGEX_SET.matches(input);
115 if !matches.matched_any() {
116 return Cow::Borrowed(input);
117 }
118
119 let mut output = Cow::Borrowed(input);
120 for idx in matches.iter() {
121 let replaced = SECRET_PATTERNS[idx]
122 .regex
123 .replace_all(output.as_ref(), REDACTED);
124 if let Cow::Owned(redacted) = replaced {
125 output = Cow::Owned(redacted);
126 }
127 }
128 output
129}
130
131pub fn redact_json(value: &serde_json::Value) -> serde_json::Value {
137 match value {
138 serde_json::Value::String(s) => {
139 let redacted = redact_text(s).into_owned();
140 serde_json::Value::String(redacted)
141 }
142 serde_json::Value::Array(arr) => {
143 serde_json::Value::Array(arr.iter().map(redact_json).collect())
144 }
145 serde_json::Value::Object(obj) => {
146 let mut new_obj = serde_json::Map::new();
147 for (k, v) in obj {
148 let redacted_key = redact_text(k).into_owned();
149 new_obj.insert(redacted_key, redact_json(v));
150 }
151 serde_json::Value::Object(new_obj)
152 }
153 other => other.clone(),
154 }
155}
156
157#[doc(hidden)]
158pub fn fuzz_redact_json_with_memoizing_redactor(
159 value: &serde_json::Value,
160 capacity: usize,
161) -> serde_json::Value {
162 MemoizingRedactor::with_capacity(capacity.clamp(1, 1024)).redact_json(value)
163}
164
165pub fn redaction_enabled() -> bool {
169 match dotenvy::var("CASS_REDACT_SECRETS") {
170 Ok(val) => !matches!(val.as_str(), "0" | "false" | "off" | "no"),
171 Err(_) => true,
172 }
173}
174
175pub fn redaction_algorithm_fingerprint() -> String {
188 static FINGERPRINT: Lazy<String> = Lazy::new(|| {
189 let mut hasher = blake3::Hasher::new();
190 for pattern in SECRET_PATTERNS.iter() {
191 hasher.update(pattern.pattern.as_bytes());
192 hasher.update(&[0]);
193 }
194 hasher.update(REDACTED.as_bytes());
195 format!("redact-v1:{}", hasher.finalize().to_hex())
196 });
197 FINGERPRINT.clone()
198}
199
200#[allow(dead_code)]
222pub(crate) struct MemoizingRedactor {
223 text_cache: crate::indexer::memoization::ContentAddressedMemoCache<String>,
224 algorithm_fingerprint: String,
225}
226
227#[allow(dead_code)]
228impl MemoizingRedactor {
229 pub(crate) const DEFAULT_CAPACITY: usize = 4096;
233
234 pub(crate) fn with_capacity(capacity: usize) -> Self {
235 Self {
236 text_cache: crate::indexer::memoization::ContentAddressedMemoCache::with_capacity(
237 capacity,
238 ),
239 algorithm_fingerprint: redaction_algorithm_fingerprint(),
240 }
241 }
242
243 pub(crate) fn new() -> Self {
244 Self::with_capacity(Self::DEFAULT_CAPACITY)
245 }
246
247 pub(crate) fn algorithm_fingerprint(&self) -> &str {
248 &self.algorithm_fingerprint
249 }
250
251 pub(crate) fn stats(&self) -> &crate::indexer::memoization::MemoCacheStats {
252 self.text_cache.stats()
253 }
254
255 pub(crate) fn redact_text(&mut self, input: &str) -> String {
267 let (output, _audit) = self.redact_text_with_audit(input);
268 output
269 }
270
271 pub(crate) fn redact_text_with_audit(
278 &mut self,
279 input: &str,
280 ) -> (
281 String,
282 Vec<crate::indexer::memoization::MemoCacheAuditRecord>,
283 ) {
284 if input.is_empty() {
287 return (String::new(), Vec::new());
288 }
289 let key = self.key_for(input);
290 let (lookup, lookup_audit) = self.text_cache.get_with_audit(&key);
291 Self::trace_audit(&lookup_audit);
292 match lookup {
293 crate::indexer::memoization::MemoLookup::Hit { value } => (value, vec![lookup_audit]),
294 crate::indexer::memoization::MemoLookup::Quarantined { reason } => {
295 tracing::warn!(
301 quarantine_reason = %reason,
302 algorithm = %self.algorithm_fingerprint,
303 "redaction memo entry is quarantined; falling back to direct regex pass"
304 );
305 let redacted = redact_text(input).into_owned();
306 (redacted, vec![lookup_audit])
307 }
308 crate::indexer::memoization::MemoLookup::Miss => {
309 let redacted = redact_text(input).into_owned();
310 let insert_audit = self.text_cache.insert_with_audit(key, redacted.clone());
311 Self::trace_audit(&insert_audit);
312 (redacted, vec![lookup_audit, insert_audit])
313 }
314 }
315 }
316
317 pub(crate) fn invalidate(&mut self, input: &str) -> bool {
323 if input.is_empty() {
324 return false;
325 }
326 let key = self.key_for(input);
327 let audit = self.text_cache.invalidate_with_audit(&key);
328 Self::trace_audit(&audit);
329 audit.changed
330 }
331
332 pub(crate) fn quarantine(&mut self, input: &str, reason: impl Into<String>) {
340 if input.is_empty() {
341 return;
342 }
343 let key = self.key_for(input);
344 let audit = self.text_cache.quarantine_with_audit(key, reason);
345 Self::trace_audit(&audit);
346 }
347
348 fn trace_audit(audit: &crate::indexer::memoization::MemoCacheAuditRecord) {
349 use crate::indexer::memoization::MemoCacheEvent;
355 match audit.event {
356 MemoCacheEvent::Hit => tracing::trace!(
357 target: "cass::redact::memo",
358 algorithm = %audit.key.algorithm,
359 stats = ?audit.stats,
360 "redact memo hit"
361 ),
362 MemoCacheEvent::Miss => tracing::debug!(
363 target: "cass::redact::memo",
364 algorithm = %audit.key.algorithm,
365 stats = ?audit.stats,
366 "redact memo miss"
367 ),
368 MemoCacheEvent::Insert => tracing::debug!(
369 target: "cass::redact::memo",
370 algorithm = %audit.key.algorithm,
371 live_entries = audit.stats.live_entries,
372 "redact memo insert"
373 ),
374 MemoCacheEvent::Evict { ref reason } => tracing::info!(
375 target: "cass::redact::memo",
376 evict_reason = ?reason,
377 live_entries = audit.stats.live_entries,
378 evictions_capacity = audit.stats.evictions_capacity,
379 "redact memo eviction"
380 ),
381 MemoCacheEvent::Invalidate => tracing::warn!(
382 target: "cass::redact::memo",
383 changed = audit.changed,
384 live_entries = audit.stats.live_entries,
385 invalidations = audit.stats.invalidations,
386 "redact memo invalidate"
387 ),
388 MemoCacheEvent::Quarantine { ref reason } => tracing::warn!(
389 target: "cass::redact::memo",
390 quarantine_reason = %reason,
391 quarantined_entries = audit.quarantined_entries,
392 "redact memo quarantine"
393 ),
394 }
395 }
396
397 pub(crate) fn redact_json(&mut self, value: &serde_json::Value) -> serde_json::Value {
403 match value {
404 serde_json::Value::String(s) => serde_json::Value::String(self.redact_text(s)),
405 serde_json::Value::Array(arr) => {
406 serde_json::Value::Array(arr.iter().map(|v| self.redact_json(v)).collect())
407 }
408 serde_json::Value::Object(obj) => {
409 let mut new_obj = serde_json::Map::with_capacity(obj.len());
410 for (k, v) in obj {
411 let redacted_key = self.redact_text(k);
412 new_obj.insert(redacted_key, self.redact_json(v));
413 }
414 serde_json::Value::Object(new_obj)
415 }
416 other => other.clone(),
417 }
418 }
419
420 fn key_for(&self, input: &str) -> crate::indexer::memoization::MemoKey {
421 let mut hasher = blake3::Hasher::new();
425 hasher.update(input.as_bytes());
426 let content_hash = crate::indexer::memoization::MemoContentHash::from_bytes(
427 hasher.finalize().as_bytes().to_vec(),
428 );
429 crate::indexer::memoization::MemoKey::new(
430 content_hash,
431 "redact_text",
432 self.algorithm_fingerprint.clone(),
433 )
434 }
435}
436
437impl Default for MemoizingRedactor {
438 fn default() -> Self {
439 Self::new()
440 }
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446 use serde_json::json;
447 use serial_test::serial;
448
449 #[test]
450 fn redacts_openai_key() {
451 let input = "my key is sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
452 let output = redact_text(input);
453 assert_eq!(output, "my key is [REDACTED]");
454 assert!(!output.contains("sk-ABCDE"));
455 }
456
457 #[test]
458 fn redacts_anthropic_key() {
459 let input = "sk-ant-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
460 let output = redact_text(input);
461 assert_eq!(output, "[REDACTED]");
462 }
463
464 #[test]
465 fn redacts_github_pat() {
466 let input = "token ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
467 let output = redact_text(input);
468 assert_eq!(output, "token [REDACTED]");
469 }
470
471 #[test]
472 fn redacts_bearer_token() {
473 let input = "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature";
474 let output = redact_text(input);
475 assert!(!output.contains("eyJhbGci"));
476 }
477
478 #[test]
479 fn redacts_aws_access_key() {
480 let input = "AKIAIOSFODNN7EXAMPLE";
481 let output = redact_text(input);
482 assert_eq!(output, "[REDACTED]");
483 }
484
485 #[test]
486 fn redacts_private_key_header() {
487 let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAK...";
488 let output = redact_text(input);
489 assert!(output.starts_with("[REDACTED]"));
490 }
491
492 #[test]
493 fn redacts_generic_api_key_assignment() {
494 let input = "api_key=abcdefgh12345678";
495 let output = redact_text(input);
496 assert_eq!(output, "[REDACTED]");
497 }
498
499 #[test]
500 fn redacts_database_url() {
501 let input = "DATABASE_URL=postgres://user:pass@host:5432/db";
502 let output = redact_text(input);
503 assert!(!output.contains("user:pass"));
504 }
505
506 #[test]
507 fn redacts_stripe_key() {
508 let input = format!("{}_{}", "sk_live", "AAAABBBBCCCCDDDDEEEEFFFFGGGG");
510 let output = redact_text(&input);
511 assert_eq!(output, "[REDACTED]");
512 }
513
514 #[test]
515 fn redacts_slack_token() {
516 let input = "xoxb-123456789-abcdefghij";
517 let output = redact_text(input);
518 assert_eq!(output, "[REDACTED]");
519 }
520
521 #[test]
522 fn leaves_normal_text_unchanged() {
523 let input = "Hello, this is a normal message about code review.";
524 let output = redact_text(input);
525 assert_eq!(output, input);
526 assert!(
527 matches!(output, Cow::Borrowed(_)),
528 "no-secret path should not allocate"
529 );
530 }
531
532 #[test]
533 fn leaves_short_tokens_unchanged() {
534 let input = "sk-abc";
536 let output = redact_text(input);
537 assert_eq!(output, input);
538 }
539
540 #[test]
541 fn redacts_json_string_values() {
542 let input = json!({
543 "tool_result": "Response contains sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
544 "safe": "no secrets here",
545 "number": 42
546 });
547 let output = redact_json(&input);
548 assert_eq!(output["tool_result"], json!("Response contains [REDACTED]"));
549 assert_eq!(output["safe"], json!("no secrets here"));
550 assert_eq!(output["number"], json!(42));
551 }
552
553 #[test]
554 fn redacts_nested_json() {
555 let input = json!({
556 "outer": {
557 "inner": "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij"
558 },
559 "array": ["safe", "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij"]
560 });
561 let output = redact_json(&input);
562 assert_eq!(output["outer"]["inner"], json!("[REDACTED]"));
563 assert_eq!(output["array"][0], json!("safe"));
564 assert_eq!(output["array"][1], json!("[REDACTED]"));
565 }
566
567 #[test]
568 #[serial]
569 fn redaction_enabled_default() {
570 unsafe { std::env::remove_var("CASS_REDACT_SECRETS") };
573 assert!(redaction_enabled());
574 }
575
576 #[test]
577 #[serial]
578 fn redaction_can_be_disabled() {
579 unsafe { std::env::set_var("CASS_REDACT_SECRETS", "0") };
580 assert!(!redaction_enabled());
581
582 unsafe { std::env::set_var("CASS_REDACT_SECRETS", "false") };
583 assert!(!redaction_enabled());
584
585 unsafe { std::env::remove_var("CASS_REDACT_SECRETS") };
587 }
588
589 #[test]
590 fn multiple_secrets_in_one_string() {
591 let input = "key1=sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij and key2=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
592 let output = redact_text(input);
593 assert!(!output.contains("sk-ABCDE"));
594 assert!(!output.contains("ghp_ABCDE"));
595 assert_eq!(output.matches("[REDACTED]").count(), 2);
596 assert!(
597 matches!(output, Cow::Owned(_)),
598 "matched secret path should return owned redacted text"
599 );
600 }
601
602 #[test]
615 fn memoizing_redactor_matches_uncached_for_arbitrary_input() {
616 fn safe_prefix(s: &str, max_bytes: usize) -> &str {
622 let mut end = s.len().min(max_bytes);
623 while end > 0 && !s.is_char_boundary(end) {
624 end -= 1;
625 }
626 &s[..end]
627 }
628 let twenty_kib_unicode = "🔐abc".repeat(2_048);
629 let inputs: &[&str] = &[
630 "",
631 "no secrets here, just prose",
632 "my key is sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
633 "sk-ant-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij followed by AKIAABCDEFGHIJKLMNOP",
634 "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature",
635 "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij and another ghp_ZYXWVUTSRQPONMLKJIHGFEDCBA0123456789",
636 "🔐 user pasted sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij from 测试",
642 &twenty_kib_unicode,
643 &"a".repeat(10_000),
644 ];
645 let mut redactor = MemoizingRedactor::with_capacity(64);
646 for input in inputs {
647 let uncached = redact_text(input).into_owned();
648 let memoized_first = redactor.redact_text(input);
649 let memoized_second = redactor.redact_text(input);
650 assert_eq!(
651 uncached,
652 memoized_first,
653 "memoized first call must match legacy uncached redact_text for input prefix: {:?}",
654 safe_prefix(input, 64)
655 );
656 assert_eq!(
657 uncached,
658 memoized_second,
659 "memoized second call must match legacy uncached for input prefix: {:?}",
660 safe_prefix(input, 64)
661 );
662 }
663 }
664
665 #[test]
669 fn memoizing_redactor_reuses_cache_for_repeated_content() {
670 let mut redactor = MemoizingRedactor::with_capacity(16);
671 let payload = "boilerplate assistant prompt: please help with sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
672 let _ = redactor.redact_text("");
676 let _ = redactor.redact_text(payload);
677 let _ = redactor.redact_text(payload);
678 let _ = redactor.redact_text(payload);
679 let stats = redactor.stats();
680 assert_eq!(stats.misses, 1, "first call must be a cache miss");
681 assert_eq!(
682 stats.hits, 2,
683 "subsequent identical calls must be cache hits"
684 );
685 assert_eq!(stats.inserts, 1, "exactly one redacted result inserted");
686 }
687
688 #[test]
697 fn memoizing_redactor_keys_isolate_by_algorithm_fingerprint() {
698 let fingerprint = redaction_algorithm_fingerprint();
699 assert!(
700 fingerprint.starts_with("redact-v1:"),
701 "fingerprint must carry an explicit version epoch, got: {fingerprint}"
702 );
703 let hex_part = fingerprint.strip_prefix("redact-v1:").unwrap();
704 assert_eq!(
705 hex_part.len(),
706 64,
707 "fingerprint hash must be a 64-char blake3 hex digest"
708 );
709 assert_eq!(fingerprint, redaction_algorithm_fingerprint());
711
712 let r1 = MemoizingRedactor::new();
718 let r2 = MemoizingRedactor::new();
719 assert_eq!(r1.algorithm_fingerprint(), r2.algorithm_fingerprint());
720 }
721
722 #[test]
728 fn memoizing_redactor_redact_json_matches_uncached_for_nested_shapes() {
729 let value = json!({
730 "session": {
731 "auth": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature",
732 "history": [
733 "no secret",
734 "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
735 {"key": "value", "leak": "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij"},
736 null,
737 42,
738 true,
739 ],
740 "metadata": {
741 "leaked_field": "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
742 "safe_field": "noop",
743 },
744 },
745 "version": 7,
746 });
747 let uncached = redact_json(&value);
748 let memoized = MemoizingRedactor::new().redact_json(&value);
749 assert_eq!(
750 uncached, memoized,
751 "memoizing redact_json must match legacy redact_json byte-for-byte"
752 );
753 }
754
755 #[test]
760 fn memoizing_redactor_redact_json_reuses_repeated_keys_and_values() {
761 let repeated_secret =
762 "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature";
763 let repeated_note = "same assistant boilerplate without secrets";
764 let value = json!({
765 "events": [
766 {"token": repeated_secret, "note": repeated_note},
767 {"token": repeated_secret, "note": repeated_note},
768 {"token": repeated_secret, "note": repeated_note},
769 ],
770 "footer": repeated_note,
771 });
772
773 let uncached = redact_json(&value);
774 let mut redactor = MemoizingRedactor::with_capacity(32);
775 let memoized = redactor.redact_json(&value);
776
777 assert_eq!(
778 uncached, memoized,
779 "memoized JSON redaction must preserve legacy output exactly"
780 );
781 assert!(
782 !memoized.to_string().contains("eyJhbGci"),
783 "memoized JSON redaction must still remove repeated secrets"
784 );
785
786 let stats = redactor.stats();
787 assert_eq!(
788 stats.misses, 6,
789 "first occurrences of root keys, repeated child keys, and scalar values should miss once"
790 );
791 assert_eq!(
792 stats.inserts, 6,
793 "each distinct JSON key/value string should be inserted once"
794 );
795 assert_eq!(
796 stats.hits, 9,
797 "repeated child keys and repeated scalar values should hit the memo cache"
798 );
799 }
800
801 #[test]
806 #[serial]
807 fn memoizing_redactor_empty_input_skips_cache() {
808 let mut redactor = MemoizingRedactor::with_capacity(8);
809 let _ = redactor.redact_text("");
810 let _ = redactor.redact_text("");
811 let _ = redactor.redact_text("");
812 let stats = redactor.stats();
813 assert_eq!(stats.misses, 0, "empty input must not count as miss");
814 assert_eq!(stats.hits, 0, "empty input must not count as hit");
815 assert_eq!(stats.inserts, 0, "empty input must not insert into cache");
816 }
817
818 #[test]
828 fn memoizing_redactor_with_audit_emits_lookup_and_insert_records() {
829 use crate::indexer::memoization::{MemoCacheEvent, MemoCacheOperation};
830 let mut redactor = MemoizingRedactor::with_capacity(8);
831 let payload =
832 "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature";
833
834 let (first_output, first_audit) = redactor.redact_text_with_audit(payload);
835 assert!(!first_output.contains("eyJhbGci"));
836 assert_eq!(
837 first_audit.len(),
838 2,
839 "first call must emit a lookup audit + an insert audit"
840 );
841 assert!(matches!(
842 first_audit[0].operation,
843 MemoCacheOperation::Lookup
844 ));
845 assert!(matches!(first_audit[0].event, MemoCacheEvent::Miss));
846 assert!(matches!(
847 first_audit[1].operation,
848 MemoCacheOperation::Insert
849 ));
850 assert!(matches!(first_audit[1].event, MemoCacheEvent::Insert));
851 assert_eq!(first_audit[1].stats.live_entries, 1);
852
853 let (second_output, second_audit) = redactor.redact_text_with_audit(payload);
854 assert_eq!(first_output, second_output);
855 assert_eq!(
856 second_audit.len(),
857 1,
858 "second call must emit only the lookup audit (cache hit)"
859 );
860 assert!(matches!(second_audit[0].event, MemoCacheEvent::Hit));
861 assert_eq!(second_audit[0].stats.hits, 1);
862
863 for record in first_audit.iter().chain(second_audit.iter()) {
867 assert_eq!(record.key.algorithm, "redact_text");
868 assert!(record.key.algorithm_version.starts_with("redact-v1:"));
869 }
870 }
871
872 #[test]
877 fn memoizing_redactor_invalidate_drops_cached_entry() {
878 let mut redactor = MemoizingRedactor::with_capacity(8);
879 let payload = "no secret here, just a sentence";
880
881 let _ = redactor.redact_text(payload);
883 assert_eq!(redactor.stats().inserts, 1);
884 assert_eq!(redactor.stats().misses, 1);
885 let _ = redactor.redact_text(payload);
886 assert_eq!(redactor.stats().hits, 1);
887
888 assert!(
890 redactor.invalidate(payload),
891 "invalidate must return true when an entry was removed"
892 );
893 assert_eq!(redactor.stats().invalidations, 1);
894 assert!(
896 !redactor.invalidate(payload),
897 "second invalidate must be a no-op"
898 );
899 assert_eq!(redactor.stats().invalidations, 1);
900
901 assert!(
904 !redactor.invalidate(""),
905 "invalidating empty input must be a no-op"
906 );
907
908 let _ = redactor.redact_text(payload);
910 assert_eq!(
911 redactor.stats().misses,
912 2,
913 "post-invalidate call must register as a miss"
914 );
915 assert_eq!(redactor.stats().hits, 1, "hits counter must not regress");
916 }
917
918 #[test]
925 fn memoizing_redactor_quarantined_entries_fall_through_to_direct_redaction() {
926 use crate::indexer::memoization::{MemoCacheEvent, MemoCacheOperation};
927 let mut redactor = MemoizingRedactor::with_capacity(8);
928 let payload =
929 "user=admin password=hunter2hunter2 token=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
930
931 let _ = redactor.redact_text(payload);
933 let _ = redactor.redact_text(payload);
934 assert_eq!(redactor.stats().hits, 1);
935
936 redactor.quarantine(payload, "telemetry: poisoned redaction signal");
941 assert_eq!(redactor.stats().quarantined, 1);
942
943 let (output, audit) = redactor.redact_text_with_audit(payload);
944 assert!(
945 !output.contains("ghp_ABCDE"),
946 "post-quarantine redaction must still scrub secrets via direct regex pass"
947 );
948 assert!(
949 !output.contains("password=hunter2hunter2"),
950 "post-quarantine redaction must scrub generic password assignments"
951 );
952 assert_eq!(
953 audit.len(),
954 1,
955 "quarantine fallthrough emits the lookup audit only (no insert)"
956 );
957 assert!(matches!(audit[0].operation, MemoCacheOperation::Lookup));
958 assert!(matches!(audit[0].event, MemoCacheEvent::Quarantine { .. }));
959
960 redactor.quarantine(payload, "telemetry: poisoned redaction signal");
963 assert_eq!(
964 redactor.stats().quarantined,
965 1,
966 "re-quarantining the same key with the same reason must not double-count"
967 );
968
969 redactor.quarantine("", "ignored");
971 assert_eq!(redactor.stats().quarantined, 1);
972 }
973}