coding-agent-search 0.5.2

Unified TUI search over local coding agent histories
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
//! Ingestion-time secret redaction for message content and metadata.
//!
//! Prevents secrets (API keys, tokens, passwords, private keys) leaked in
//! tool-result blocks from being persisted into the cass database.
//!
//! This module runs at ingestion time in `map_to_internal()`, before any data
//! reaches SQLite or the FTS index.  It is intentionally conservative: it uses
//! well-known prefix patterns rather than high-entropy heuristics to avoid
//! false positives on normal code content.
//!
//! See also: `pages::secret_scan` (post-hoc scanning of existing data).

use std::borrow::Cow;

use once_cell::sync::Lazy;
use regex::{Regex, RegexSet};

/// Placeholder inserted where a secret was found.
const REDACTED: &str = "[REDACTED]";

/// A compiled secret-detection pattern.
struct SecretPattern {
    pattern: &'static str,
    regex: Regex,
}

/// All built-in patterns, compiled once on first use.
static SECRET_PATTERNS: Lazy<Vec<SecretPattern>> = Lazy::new(|| {
    vec![
        // AWS Access Key ID (always starts with AKIA)
        SecretPattern {
            pattern: r"\bAKIA[0-9A-Z]{16}\b",
            regex: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").expect("aws access key regex"),
        },
        // AWS Secret Key in assignment context
        SecretPattern {
            pattern: r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
            regex: Regex::new(
                r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
            )
            .expect("aws secret regex"),
        },
        // GitHub PAT (ghp_, gho_, ghu_, ghs_, ghr_)
        SecretPattern {
            pattern: r"\bgh[pousr]_[A-Za-z0-9]{36}\b",
            regex: Regex::new(r"\bgh[pousr]_[A-Za-z0-9]{36}\b").expect("github pat regex"),
        },
        // OpenAI API key (sk-...)
        SecretPattern {
            pattern: r"\bsk-[A-Za-z0-9]{20,}\b",
            regex: Regex::new(r"\bsk-[A-Za-z0-9]{20,}\b").expect("openai key regex"),
        },
        // Anthropic API key (sk-ant-...)
        SecretPattern {
            pattern: r"\bsk-ant-[A-Za-z0-9]{20,}\b",
            regex: Regex::new(r"\bsk-ant-[A-Za-z0-9]{20,}\b").expect("anthropic key regex"),
        },
        // Bearer tokens in authorization headers
        SecretPattern {
            pattern: r"(?i)Bearer\s+[A-Za-z0-9_\-.]{20,}",
            regex: Regex::new(r"(?i)Bearer\s+[A-Za-z0-9_\-.]{20,}").expect("bearer token regex"),
        },
        // JWT tokens (eyJ...)
        SecretPattern {
            pattern: r"\beyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b",
            regex: Regex::new(r"\beyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b")
                .expect("jwt regex"),
        },
        // PEM private keys
        SecretPattern {
            pattern: r"-----BEGIN (?:RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY-----",
            regex: Regex::new(r"-----BEGIN (?:RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY-----")
                .expect("private key regex"),
        },
        // Database connection URLs with credentials
        SecretPattern {
            pattern: r"(?i)\b(postgres|postgresql|mysql|mongodb|redis)://[^\s]{8,}",
            regex: Regex::new(
                r"(?i)\b(postgres|postgresql|mysql|mongodb|redis)://[^\s]{8,}",
            )
            .expect("db url regex"),
        },
        // Generic key/token/secret/password assignments
        SecretPattern {
            pattern: r#"(?i)(api[_-]?key|api[_-]?secret|auth[_-]?token|access[_-]?token|secret[_-]?key|password|passwd)\s*[:=]\s*['"]?[A-Za-z0-9_\-/+=]{8,}['"]?"#,
            regex: Regex::new(
                r#"(?i)(api[_-]?key|api[_-]?secret|auth[_-]?token|access[_-]?token|secret[_-]?key|password|passwd)\s*[:=]\s*['"]?[A-Za-z0-9_\-/+=]{8,}['"]?"#,
            )
            .expect("generic api key regex"),
        },
        // Slack tokens (xoxb-, xoxp-, xoxs-, xoxa-, xoxo-, xoxr-)
        SecretPattern {
            pattern: r"\bxox[bpsar]-[A-Za-z0-9\-]{10,}",
            regex: Regex::new(r"\bxox[bpsar]-[A-Za-z0-9\-]{10,}").expect("slack token regex"),
        },
        // Stripe keys (sk_live_, pk_live_, rk_live_)
        SecretPattern {
            pattern: r"\b[spr]k_live_[A-Za-z0-9]{20,}",
            regex: Regex::new(r"\b[spr]k_live_[A-Za-z0-9]{20,}").expect("stripe key regex"),
        },
    ]
});

/// Fast pre-check for the common no-secret path. Keeps pattern ordering aligned
/// with `SECRET_PATTERNS` so matched set indices can select replacement regexes.
static SECRET_REGEX_SET: Lazy<RegexSet> = Lazy::new(|| {
    RegexSet::new(SECRET_PATTERNS.iter().map(|pattern| pattern.pattern)).expect("secret regex set")
});

/// Redact secrets from a plain-text string.
///
/// Returns the input unchanged if no secrets are detected.
pub fn redact_text(input: &str) -> Cow<'_, str> {
    let matches = SECRET_REGEX_SET.matches(input);
    if !matches.matched_any() {
        return Cow::Borrowed(input);
    }

    let mut output = Cow::Borrowed(input);
    for idx in matches.iter() {
        let replaced = SECRET_PATTERNS[idx]
            .regex
            .replace_all(output.as_ref(), REDACTED);
        if let Cow::Owned(redacted) = replaced {
            output = Cow::Owned(redacted);
        }
    }
    output
}

/// Redact secrets from a JSON value, recursively walking strings.
///
/// - String values are redacted in-place.
/// - Arrays and objects are walked recursively.
/// - Numbers, booleans, and null are left untouched.
pub fn redact_json(value: &serde_json::Value) -> serde_json::Value {
    match value {
        serde_json::Value::String(s) => {
            let redacted = redact_text(s).into_owned();
            serde_json::Value::String(redacted)
        }
        serde_json::Value::Array(arr) => {
            serde_json::Value::Array(arr.iter().map(redact_json).collect())
        }
        serde_json::Value::Object(obj) => {
            let mut new_obj = serde_json::Map::new();
            for (k, v) in obj {
                let redacted_key = redact_text(k).into_owned();
                new_obj.insert(redacted_key, redact_json(v));
            }
            serde_json::Value::Object(new_obj)
        }
        other => other.clone(),
    }
}

#[doc(hidden)]
pub fn fuzz_redact_json_with_memoizing_redactor(
    value: &serde_json::Value,
    capacity: usize,
) -> serde_json::Value {
    MemoizingRedactor::with_capacity(capacity.clamp(1, 1024)).redact_json(value)
}

/// Returns true if redaction is enabled (default: true).
///
/// Set `CASS_REDACT_SECRETS=0` or `CASS_REDACT_SECRETS=false` to disable.
pub fn redaction_enabled() -> bool {
    match dotenvy::var("CASS_REDACT_SECRETS") {
        Ok(val) => !matches!(val.as_str(), "0" | "false" | "off" | "no"),
        Err(_) => true,
    }
}

/// Stable identifier for the compiled SECRET_PATTERNS list.
///
/// Memoization keys for [`MemoizingRedactor`] combine input content
/// with this fingerprint so a pattern bump (new regex added, existing
/// regex tightened) automatically invalidates every prior cache entry
/// — silent stale cross-version reuse is impossible by construction.
///
/// The fingerprint is `redact-v1:<blake3-hex>` where the hash covers
/// every pattern source string concatenated with NUL separators. The
/// `v1` epoch lets future maintainers force a manual bump even when
/// the regex source set hasn't changed (e.g. if the replacement
/// constant changes from `[REDACTED]` to something else).
pub fn redaction_algorithm_fingerprint() -> String {
    static FINGERPRINT: Lazy<String> = Lazy::new(|| {
        let mut hasher = blake3::Hasher::new();
        for pattern in SECRET_PATTERNS.iter() {
            hasher.update(pattern.pattern.as_bytes());
            hasher.update(&[0]);
        }
        hasher.update(REDACTED.as_bytes());
        format!("redact-v1:{}", hasher.finalize().to_hex())
    });
    FINGERPRINT.clone()
}

/// Content-addressed memoizing redactor for the ingestion hot path.
///
/// `coding_agent_session_search-ibuuh.34`: redaction is a pure,
/// regex-heavy transformation that runs against every persisted message
/// content + metadata blob. Salvage replays, repeated assistant
/// boilerplate, and historical re-ingest all feed identical content
/// through the regex engine over and over. This wrapper keys
/// [`ContentAddressedMemoCache`] on the input bytes plus the algorithm
/// fingerprint so repeated content stops paying the regex cost while a
/// pattern bump invalidates every prior entry transparently.
///
/// The wrapper preserves the legacy [`redact_text`]/[`redact_json`]
/// contract byte-for-byte: see
/// `memoizing_redactor_matches_uncached_for_arbitrary_input` for the
/// equivalence gate. When the cache is hit, the recorded value is
/// returned directly; on miss, the legacy regex path runs and the
/// result is inserted under the content+algorithm key.
///
/// `MemoizingRedactor` is `pub(crate)` so the live persist path can
/// adopt it without leaking the memoization vocabulary into public
/// API. Wiring lives in the indexer crate.
#[allow(dead_code)]
pub(crate) struct MemoizingRedactor {
    text_cache: crate::indexer::memoization::ContentAddressedMemoCache<String>,
    algorithm_fingerprint: String,
}

#[allow(dead_code)]
impl MemoizingRedactor {
    /// Default cache capacity for typical refresh batches. Sized to
    /// cover a few thousand distinct message bodies before LRU
    /// eviction kicks in.
    pub(crate) const DEFAULT_CAPACITY: usize = 4096;

    pub(crate) fn with_capacity(capacity: usize) -> Self {
        Self {
            text_cache: crate::indexer::memoization::ContentAddressedMemoCache::with_capacity(
                capacity,
            ),
            algorithm_fingerprint: redaction_algorithm_fingerprint(),
        }
    }

    pub(crate) fn new() -> Self {
        Self::with_capacity(Self::DEFAULT_CAPACITY)
    }

    pub(crate) fn algorithm_fingerprint(&self) -> &str {
        &self.algorithm_fingerprint
    }

    pub(crate) fn stats(&self) -> &crate::indexer::memoization::MemoCacheStats {
        self.text_cache.stats()
    }

    /// Memoized counterpart to [`redact_text`]. Returns an owned String
    /// (not Cow) because caching forces a copy on first compute anyway,
    /// and downstream callers (`map_to_internal`) immediately call
    /// `.into_owned()` regardless. Skipping the Cow indirection keeps
    /// the cached-hit path branchless.
    ///
    /// Each cache decision emits a structured `tracing` event so
    /// operators can audit hit / miss / insert / evict / quarantine
    /// behavior from logs alone (per `coding_agent_session_search-ibuuh.34`
    /// AC: "operator-auditable through structured hit, miss,
    /// invalidation, eviction, quarantine, and budget logs").
    pub(crate) fn redact_text(&mut self, input: &str) -> String {
        let (output, _audit) = self.redact_text_with_audit(input);
        output
    }

    /// Audit-bearing variant: returns the redacted text plus the
    /// structured cache-decision records (lookup audit, plus insert
    /// audit on miss). Callers that want to forward records to a
    /// subscriber (telemetry sink, doctor diagnostics, etc.) use this
    /// directly; the convenience `redact_text` wrapper drops them
    /// after emitting tracing events.
    pub(crate) fn redact_text_with_audit(
        &mut self,
        input: &str,
    ) -> (
        String,
        Vec<crate::indexer::memoization::MemoCacheAuditRecord>,
    ) {
        // Empty fast-path matches the uncached contract and bypasses
        // the cache entirely (see memoizing_redactor_empty_input_skips_cache).
        if input.is_empty() {
            return (String::new(), Vec::new());
        }
        let key = self.key_for(input);
        let (lookup, lookup_audit) = self.text_cache.get_with_audit(&key);
        Self::trace_audit(&lookup_audit);
        match lookup {
            crate::indexer::memoization::MemoLookup::Hit { value } => (value, vec![lookup_audit]),
            crate::indexer::memoization::MemoLookup::Quarantined { reason } => {
                // Quarantined entry: never serve a stale value;
                // recompute via the legacy regex path, but DO NOT
                // re-insert (the entry stays quarantined for operator
                // inspection until explicitly lifted via
                // `lift_quarantine_for`).
                tracing::warn!(
                    quarantine_reason = %reason,
                    algorithm = %self.algorithm_fingerprint,
                    "redaction memo entry is quarantined; falling back to direct regex pass"
                );
                let redacted = redact_text(input).into_owned();
                (redacted, vec![lookup_audit])
            }
            crate::indexer::memoization::MemoLookup::Miss => {
                let redacted = redact_text(input).into_owned();
                let insert_audit = self.text_cache.insert_with_audit(key, redacted.clone());
                Self::trace_audit(&insert_audit);
                (redacted, vec![lookup_audit, insert_audit])
            }
        }
    }

    /// Invalidate a cached redaction for the given input. Returns
    /// `true` only when an entry was actually removed (matches the
    /// underlying `ContentAddressedMemoCache` contract). Mostly
    /// useful for tests and for operator tooling that wants to bust
    /// individual cache entries without restarting the process.
    pub(crate) fn invalidate(&mut self, input: &str) -> bool {
        if input.is_empty() {
            return false;
        }
        let key = self.key_for(input);
        let audit = self.text_cache.invalidate_with_audit(&key);
        Self::trace_audit(&audit);
        audit.changed
    }

    /// Quarantine a cached entry: subsequent lookups will return
    /// [`MemoLookup::Quarantined`] (handled by `redact_text` as a
    /// fallthrough to the direct regex path) instead of the cached
    /// value. The reason is preserved for operator inspection. Used
    /// when telemetry detects a poisoned redaction (e.g. unexpected
    /// regex behavior under a hot pattern bump that the algorithm
    /// fingerprint didn't catch).
    pub(crate) fn quarantine(&mut self, input: &str, reason: impl Into<String>) {
        if input.is_empty() {
            return;
        }
        let key = self.key_for(input);
        let audit = self.text_cache.quarantine_with_audit(key, reason);
        Self::trace_audit(&audit);
    }

    fn trace_audit(audit: &crate::indexer::memoization::MemoCacheAuditRecord) {
        // Severity tiers match operator expectations: hits are noise
        // (trace), misses + inserts are routine (debug), evictions
        // are noteworthy (info), invalidations and quarantines are
        // alarming enough to warn so they show up in default-level
        // logs without dredging.
        use crate::indexer::memoization::MemoCacheEvent;
        match audit.event {
            MemoCacheEvent::Hit => tracing::trace!(
                target: "cass::redact::memo",
                algorithm = %audit.key.algorithm,
                stats = ?audit.stats,
                "redact memo hit"
            ),
            MemoCacheEvent::Miss => tracing::debug!(
                target: "cass::redact::memo",
                algorithm = %audit.key.algorithm,
                stats = ?audit.stats,
                "redact memo miss"
            ),
            MemoCacheEvent::Insert => tracing::debug!(
                target: "cass::redact::memo",
                algorithm = %audit.key.algorithm,
                live_entries = audit.stats.live_entries,
                "redact memo insert"
            ),
            MemoCacheEvent::Evict { ref reason } => tracing::info!(
                target: "cass::redact::memo",
                evict_reason = ?reason,
                live_entries = audit.stats.live_entries,
                evictions_capacity = audit.stats.evictions_capacity,
                "redact memo eviction"
            ),
            MemoCacheEvent::Invalidate => tracing::warn!(
                target: "cass::redact::memo",
                changed = audit.changed,
                live_entries = audit.stats.live_entries,
                invalidations = audit.stats.invalidations,
                "redact memo invalidate"
            ),
            MemoCacheEvent::Quarantine { ref reason } => tracing::warn!(
                target: "cass::redact::memo",
                quarantine_reason = %reason,
                quarantined_entries = audit.quarantined_entries,
                "redact memo quarantine"
            ),
        }
    }

    /// Memoized counterpart to [`redact_json`]. Recurses through the
    /// JSON value, memoizing each string scalar (and each object key)
    /// independently — JSON arrays / objects themselves are not
    /// cached because their structural identity dominates compared to
    /// per-string regex cost.
    pub(crate) fn redact_json(&mut self, value: &serde_json::Value) -> serde_json::Value {
        match value {
            serde_json::Value::String(s) => serde_json::Value::String(self.redact_text(s)),
            serde_json::Value::Array(arr) => {
                serde_json::Value::Array(arr.iter().map(|v| self.redact_json(v)).collect())
            }
            serde_json::Value::Object(obj) => {
                let mut new_obj = serde_json::Map::with_capacity(obj.len());
                for (k, v) in obj {
                    let redacted_key = self.redact_text(k);
                    new_obj.insert(redacted_key, self.redact_json(v));
                }
                serde_json::Value::Object(new_obj)
            }
            other => other.clone(),
        }
    }

    fn key_for(&self, input: &str) -> crate::indexer::memoization::MemoKey {
        // Hash content with blake3 for a fixed-width key (avoids
        // pathological 1-MiB-content cache keys that would otherwise
        // dominate cache memory).
        let mut hasher = blake3::Hasher::new();
        hasher.update(input.as_bytes());
        let content_hash = crate::indexer::memoization::MemoContentHash::from_bytes(
            hasher.finalize().as_bytes().to_vec(),
        );
        crate::indexer::memoization::MemoKey::new(
            content_hash,
            "redact_text",
            self.algorithm_fingerprint.clone(),
        )
    }
}

impl Default for MemoizingRedactor {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;
    use serial_test::serial;

    #[test]
    fn redacts_openai_key() {
        let input = "my key is sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
        let output = redact_text(input);
        assert_eq!(output, "my key is [REDACTED]");
        assert!(!output.contains("sk-ABCDE"));
    }

    #[test]
    fn redacts_anthropic_key() {
        let input = "sk-ant-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
        let output = redact_text(input);
        assert_eq!(output, "[REDACTED]");
    }

    #[test]
    fn redacts_github_pat() {
        let input = "token ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
        let output = redact_text(input);
        assert_eq!(output, "token [REDACTED]");
    }

    #[test]
    fn redacts_bearer_token() {
        let input = "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature";
        let output = redact_text(input);
        assert!(!output.contains("eyJhbGci"));
    }

    #[test]
    fn redacts_aws_access_key() {
        let input = "AKIAIOSFODNN7EXAMPLE";
        let output = redact_text(input);
        assert_eq!(output, "[REDACTED]");
    }

    #[test]
    fn redacts_private_key_header() {
        let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAK...";
        let output = redact_text(input);
        assert!(output.starts_with("[REDACTED]"));
    }

    #[test]
    fn redacts_generic_api_key_assignment() {
        let input = "api_key=abcdefgh12345678";
        let output = redact_text(input);
        assert_eq!(output, "[REDACTED]");
    }

    #[test]
    fn redacts_database_url() {
        let input = "DATABASE_URL=postgres://user:pass@host:5432/db";
        let output = redact_text(input);
        assert!(!output.contains("user:pass"));
    }

    #[test]
    fn redacts_stripe_key() {
        // Build the test key dynamically to avoid GitHub push protection flagging it
        let input = format!("{}_{}", "sk_live", "AAAABBBBCCCCDDDDEEEEFFFFGGGG");
        let output = redact_text(&input);
        assert_eq!(output, "[REDACTED]");
    }

    #[test]
    fn redacts_slack_token() {
        let input = "xoxb-123456789-abcdefghij";
        let output = redact_text(input);
        assert_eq!(output, "[REDACTED]");
    }

    #[test]
    fn leaves_normal_text_unchanged() {
        let input = "Hello, this is a normal message about code review.";
        let output = redact_text(input);
        assert_eq!(output, input);
        assert!(
            matches!(output, Cow::Borrowed(_)),
            "no-secret path should not allocate"
        );
    }

    #[test]
    fn leaves_short_tokens_unchanged() {
        // Short strings should not match (below minimum lengths)
        let input = "sk-abc";
        let output = redact_text(input);
        assert_eq!(output, input);
    }

    #[test]
    fn redacts_json_string_values() {
        let input = json!({
            "tool_result": "Response contains sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
            "safe": "no secrets here",
            "number": 42
        });
        let output = redact_json(&input);
        assert_eq!(output["tool_result"], json!("Response contains [REDACTED]"));
        assert_eq!(output["safe"], json!("no secrets here"));
        assert_eq!(output["number"], json!(42));
    }

    #[test]
    fn redacts_nested_json() {
        let input = json!({
            "outer": {
                "inner": "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij"
            },
            "array": ["safe", "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij"]
        });
        let output = redact_json(&input);
        assert_eq!(output["outer"]["inner"], json!("[REDACTED]"));
        assert_eq!(output["array"][0], json!("safe"));
        assert_eq!(output["array"][1], json!("[REDACTED]"));
    }

    #[test]
    #[serial]
    fn redaction_enabled_default() {
        // When env var is not set, should be enabled
        // Safety: only called in single-threaded test context
        unsafe { std::env::remove_var("CASS_REDACT_SECRETS") };
        assert!(redaction_enabled());
    }

    #[test]
    #[serial]
    fn redaction_can_be_disabled() {
        unsafe { std::env::set_var("CASS_REDACT_SECRETS", "0") };
        assert!(!redaction_enabled());

        unsafe { std::env::set_var("CASS_REDACT_SECRETS", "false") };
        assert!(!redaction_enabled());

        // Restore for other tests
        unsafe { std::env::remove_var("CASS_REDACT_SECRETS") };
    }

    #[test]
    fn multiple_secrets_in_one_string() {
        let input = "key1=sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij and key2=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
        let output = redact_text(input);
        assert!(!output.contains("sk-ABCDE"));
        assert!(!output.contains("ghp_ABCDE"));
        assert_eq!(output.matches("[REDACTED]").count(), 2);
        assert!(
            matches!(output, Cow::Owned(_)),
            "matched secret path should return owned redacted text"
        );
    }

    /// `coding_agent_session_search-ibuuh.34` (memoization equivalence
    /// gate): the memoizing redactor must produce byte-identical
    /// output to the legacy `redact_text` path on every input.
    /// Equivalence is checked across:
    /// - clean inputs with no secret matches
    /// - single-secret inputs (every supported pattern fires at least once)
    /// - multi-secret inputs (multiple replacement passes)
    /// - empty input (fast-path)
    /// - long boilerplate-style inputs (large blob with no secrets)
    ///
    /// First and second invocations on the same input must agree
    /// (cache-hit invariance) AND match the uncached result.
    #[test]
    fn memoizing_redactor_matches_uncached_for_arbitrary_input() {
        // Diagnostic-message slice helper: MUST land on a UTF-8 char
        // boundary so we can extend this fixture set with multi-byte
        // inputs in the future without panicking on byte-slice
        // boundary errors. (MEMORY.md flagged this exact pattern as
        // a recurring footgun; this helper inoculates the test.)
        fn safe_prefix(s: &str, max_bytes: usize) -> &str {
            let mut end = s.len().min(max_bytes);
            while end > 0 && !s.is_char_boundary(end) {
                end -= 1;
            }
            &s[..end]
        }
        let twenty_kib_unicode = "🔐abc".repeat(2_048);
        let inputs: &[&str] = &[
            "",
            "no secrets here, just prose",
            "my key is sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
            "sk-ant-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij followed by AKIAABCDEFGHIJKLMNOP",
            "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature",
            "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij and another ghp_ZYXWVUTSRQPONMLKJIHGFEDCBA0123456789",
            // Multi-byte UTF-8 input: pins that the memoized path's
            // hashing + cache key construction handles non-ASCII
            // content (blake3 over .as_bytes() handles any byte
            // sequence). Pre-fixup, the diagnostic prefix slice
            // below would have panicked on this input.
            "🔐 user pasted sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij from 测试",
            &twenty_kib_unicode,
            &"a".repeat(10_000),
        ];
        let mut redactor = MemoizingRedactor::with_capacity(64);
        for input in inputs {
            let uncached = redact_text(input).into_owned();
            let memoized_first = redactor.redact_text(input);
            let memoized_second = redactor.redact_text(input);
            assert_eq!(
                uncached,
                memoized_first,
                "memoized first call must match legacy uncached redact_text for input prefix: {:?}",
                safe_prefix(input, 64)
            );
            assert_eq!(
                uncached,
                memoized_second,
                "memoized second call must match legacy uncached for input prefix: {:?}",
                safe_prefix(input, 64)
            );
        }
    }

    /// Repeated identical content must hit the cache rather than
    /// re-running the regex set. Pinning hits/misses is the operator
    /// audit signal the bead acceptance asks for.
    #[test]
    fn memoizing_redactor_reuses_cache_for_repeated_content() {
        let mut redactor = MemoizingRedactor::with_capacity(16);
        let payload = "boilerplate assistant prompt: please help with sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
        // Three identical calls: 1 miss + 2 hits. Empty-string
        // fast-path is never accounted in the cache, so it does not
        // perturb the counters.
        let _ = redactor.redact_text("");
        let _ = redactor.redact_text(payload);
        let _ = redactor.redact_text(payload);
        let _ = redactor.redact_text(payload);
        let stats = redactor.stats();
        assert_eq!(stats.misses, 1, "first call must be a cache miss");
        assert_eq!(
            stats.hits, 2,
            "subsequent identical calls must be cache hits"
        );
        assert_eq!(stats.inserts, 1, "exactly one redacted result inserted");
    }

    /// A pattern bump (algorithm fingerprint change) must invalidate
    /// every prior memo entry. We simulate this by constructing two
    /// `MemoizingRedactor` instances whose algorithm fingerprints
    /// differ — entries from one cannot serve hits to the other,
    /// guaranteeing safe cross-version semantics. Pinning the
    /// fingerprint structure (`redact-v1:<hex>`) guards against an
    /// accidental hash-format change that would silently break
    /// invalidation.
    #[test]
    fn memoizing_redactor_keys_isolate_by_algorithm_fingerprint() {
        let fingerprint = redaction_algorithm_fingerprint();
        assert!(
            fingerprint.starts_with("redact-v1:"),
            "fingerprint must carry an explicit version epoch, got: {fingerprint}"
        );
        let hex_part = fingerprint.strip_prefix("redact-v1:").unwrap();
        assert_eq!(
            hex_part.len(),
            64,
            "fingerprint hash must be a 64-char blake3 hex digest"
        );
        // Same compiled patterns ⇒ same fingerprint across calls.
        assert_eq!(fingerprint, redaction_algorithm_fingerprint());

        // Two fresh redactors share the algorithm fingerprint, so they
        // would route hits/misses through the same key shape. Pinning
        // both fingerprints equal guards against a thread-local /
        // process-singleton bug that could silently desync cache
        // versions across parallel persist workers.
        let r1 = MemoizingRedactor::new();
        let r2 = MemoizingRedactor::new();
        assert_eq!(r1.algorithm_fingerprint(), r2.algorithm_fingerprint());
    }

    /// `redact_json` round-trip via the memoizing path must agree with
    /// the legacy `redact_json` for non-trivial JSON shapes (nested
    /// arrays, nested objects, mixed scalars). Pins the recursive
    /// projection so a regression in either path's traversal trips a
    /// clear assertion.
    #[test]
    fn memoizing_redactor_redact_json_matches_uncached_for_nested_shapes() {
        let value = json!({
            "session": {
                "auth": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature",
                "history": [
                    "no secret",
                    "ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
                    {"key": "value", "leak": "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij"},
                    null,
                    42,
                    true,
                ],
                "metadata": {
                    "leaked_field": "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij",
                    "safe_field": "noop",
                },
            },
            "version": 7,
        });
        let uncached = redact_json(&value);
        let memoized = MemoizingRedactor::new().redact_json(&value);
        assert_eq!(
            uncached, memoized,
            "memoizing redact_json must match legacy redact_json byte-for-byte"
        );
    }

    /// Repeated metadata / extra_json structures are common in salvage
    /// replays and assistant boilerplate. The memoized JSON walker must
    /// reuse repeated object keys and repeated scalar values instead of
    /// re-running the regex set for every copy.
    #[test]
    fn memoizing_redactor_redact_json_reuses_repeated_keys_and_values() {
        let repeated_secret =
            "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature";
        let repeated_note = "same assistant boilerplate without secrets";
        let value = json!({
            "events": [
                {"token": repeated_secret, "note": repeated_note},
                {"token": repeated_secret, "note": repeated_note},
                {"token": repeated_secret, "note": repeated_note},
            ],
            "footer": repeated_note,
        });

        let uncached = redact_json(&value);
        let mut redactor = MemoizingRedactor::with_capacity(32);
        let memoized = redactor.redact_json(&value);

        assert_eq!(
            uncached, memoized,
            "memoized JSON redaction must preserve legacy output exactly"
        );
        assert!(
            !memoized.to_string().contains("eyJhbGci"),
            "memoized JSON redaction must still remove repeated secrets"
        );

        let stats = redactor.stats();
        assert_eq!(
            stats.misses, 6,
            "first occurrences of root keys, repeated child keys, and scalar values should miss once"
        );
        assert_eq!(
            stats.inserts, 6,
            "each distinct JSON key/value string should be inserted once"
        );
        assert_eq!(
            stats.hits, 9,
            "repeated child keys and repeated scalar values should hit the memo cache"
        );
    }

    /// Emptiness fast-path: zero-length input must NOT increment the
    /// cache miss counter. Otherwise an ingestion run with thousands
    /// of empty system messages would burn cache slots for
    /// content-equivalent empty strings.
    #[test]
    #[serial]
    fn memoizing_redactor_empty_input_skips_cache() {
        let mut redactor = MemoizingRedactor::with_capacity(8);
        let _ = redactor.redact_text("");
        let _ = redactor.redact_text("");
        let _ = redactor.redact_text("");
        let stats = redactor.stats();
        assert_eq!(stats.misses, 0, "empty input must not count as miss");
        assert_eq!(stats.hits, 0, "empty input must not count as hit");
        assert_eq!(stats.inserts, 0, "empty input must not insert into cache");
    }

    /// `coding_agent_session_search-ibuuh.34` (operator-audit gate):
    /// every cache decision must surface a structured
    /// MemoCacheAuditRecord so telemetry sinks / doctor diagnostics
    /// can reason about cache health without grepping internal stats.
    /// First call on a new content emits Lookup(Miss) + Insert.
    /// Second call emits Lookup(Hit). Pinning the audit shape directly
    /// closes the bead's "operator-auditable through structured hit,
    /// miss, invalidation, eviction, quarantine, and budget logs"
    /// requirement for the redaction sink.
    #[test]
    fn memoizing_redactor_with_audit_emits_lookup_and_insert_records() {
        use crate::indexer::memoization::{MemoCacheEvent, MemoCacheOperation};
        let mut redactor = MemoizingRedactor::with_capacity(8);
        let payload =
            "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature";

        let (first_output, first_audit) = redactor.redact_text_with_audit(payload);
        assert!(!first_output.contains("eyJhbGci"));
        assert_eq!(
            first_audit.len(),
            2,
            "first call must emit a lookup audit + an insert audit"
        );
        assert!(matches!(
            first_audit[0].operation,
            MemoCacheOperation::Lookup
        ));
        assert!(matches!(first_audit[0].event, MemoCacheEvent::Miss));
        assert!(matches!(
            first_audit[1].operation,
            MemoCacheOperation::Insert
        ));
        assert!(matches!(first_audit[1].event, MemoCacheEvent::Insert));
        assert_eq!(first_audit[1].stats.live_entries, 1);

        let (second_output, second_audit) = redactor.redact_text_with_audit(payload);
        assert_eq!(first_output, second_output);
        assert_eq!(
            second_audit.len(),
            1,
            "second call must emit only the lookup audit (cache hit)"
        );
        assert!(matches!(second_audit[0].event, MemoCacheEvent::Hit));
        assert_eq!(second_audit[0].stats.hits, 1);

        // Algorithm key carried on every audit record so a downstream
        // sink can disambiguate cache events when multiple
        // ContentAddressedMemoCaches share the same logger target.
        for record in first_audit.iter().chain(second_audit.iter()) {
            assert_eq!(record.key.algorithm, "redact_text");
            assert!(record.key.algorithm_version.starts_with("redact-v1:"));
        }
    }

    /// Invalidate must remove the cached entry so the next call is a
    /// miss + re-insert. Pin the changed/no-op semantics so a caller
    /// can rely on the boolean return value to know whether anything
    /// was actually evicted.
    #[test]
    fn memoizing_redactor_invalidate_drops_cached_entry() {
        let mut redactor = MemoizingRedactor::with_capacity(8);
        let payload = "no secret here, just a sentence";

        // Prime the cache.
        let _ = redactor.redact_text(payload);
        assert_eq!(redactor.stats().inserts, 1);
        assert_eq!(redactor.stats().misses, 1);
        let _ = redactor.redact_text(payload);
        assert_eq!(redactor.stats().hits, 1);

        // Invalidate must report the change.
        assert!(
            redactor.invalidate(payload),
            "invalidate must return true when an entry was removed"
        );
        assert_eq!(redactor.stats().invalidations, 1);
        // A second invalidate on the same key is a no-op.
        assert!(
            !redactor.invalidate(payload),
            "second invalidate must be a no-op"
        );
        assert_eq!(redactor.stats().invalidations, 1);

        // Empty input invalidate is a no-op (matches the empty-input
        // fast-path: nothing was ever cached).
        assert!(
            !redactor.invalidate(""),
            "invalidating empty input must be a no-op"
        );

        // Next call must miss again, not hit.
        let _ = redactor.redact_text(payload);
        assert_eq!(
            redactor.stats().misses,
            2,
            "post-invalidate call must register as a miss"
        );
        assert_eq!(redactor.stats().hits, 1, "hits counter must not regress");
    }

    /// Quarantined entries must NEVER serve a cached value. After
    /// quarantine, the redactor falls through to the direct
    /// `redact_text` regex path and the cached value remains
    /// quarantined for operator inspection. This satisfies the bead's
    /// "suspected corruption or stale-entry quarantine" coverage
    /// requirement.
    #[test]
    fn memoizing_redactor_quarantined_entries_fall_through_to_direct_redaction() {
        use crate::indexer::memoization::{MemoCacheEvent, MemoCacheOperation};
        let mut redactor = MemoizingRedactor::with_capacity(8);
        let payload =
            "user=admin password=hunter2hunter2 token=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";

        // Prime + verify hit.
        let _ = redactor.redact_text(payload);
        let _ = redactor.redact_text(payload);
        assert_eq!(redactor.stats().hits, 1);

        // Quarantine the entry; subsequent lookup must report the
        // Quarantined outcome via audit AND fall through to direct
        // regex redaction (so the user-visible result is still the
        // correct redacted text).
        redactor.quarantine(payload, "telemetry: poisoned redaction signal");
        assert_eq!(redactor.stats().quarantined, 1);

        let (output, audit) = redactor.redact_text_with_audit(payload);
        assert!(
            !output.contains("ghp_ABCDE"),
            "post-quarantine redaction must still scrub secrets via direct regex pass"
        );
        assert!(
            !output.contains("password=hunter2hunter2"),
            "post-quarantine redaction must scrub generic password assignments"
        );
        assert_eq!(
            audit.len(),
            1,
            "quarantine fallthrough emits the lookup audit only (no insert)"
        );
        assert!(matches!(audit[0].operation, MemoCacheOperation::Lookup));
        assert!(matches!(audit[0].event, MemoCacheEvent::Quarantine { .. }));

        // Re-quarantining the same key with the same reason is a
        // no-op for the quarantine counter (already quarantined).
        redactor.quarantine(payload, "telemetry: poisoned redaction signal");
        assert_eq!(
            redactor.stats().quarantined,
            1,
            "re-quarantining the same key with the same reason must not double-count"
        );

        // Empty input quarantine is a no-op.
        redactor.quarantine("", "ignored");
        assert_eq!(redactor.stats().quarantined, 1);
    }
}