Skip to main content

ftui_text/
width_cache.rs

1#![forbid(unsafe_code)]
2
3//! LRU width cache for efficient text measurement.
4//!
5//! Text width calculation is a hot path (called every render frame).
6//! This cache stores computed widths to avoid redundant Unicode width
7//! calculations for repeated strings.
8//!
9//! # Example
10//! ```
11//! use ftui_text::WidthCache;
12//!
13//! let mut cache = WidthCache::new(1000);
14//!
15//! // First call computes width
16//! let width = cache.get_or_compute("Hello, world!");
17//! assert_eq!(width, 13);
18//!
19//! // Second call hits cache
20//! let width2 = cache.get_or_compute("Hello, world!");
21//! assert_eq!(width2, 13);
22//!
23//! // Check stats
24//! let stats = cache.stats();
25//! assert_eq!(stats.hits, 1);
26//! assert_eq!(stats.misses, 1);
27//! ```
28
29use lru::LruCache;
30use rustc_hash::FxHasher;
31use std::hash::{Hash, Hasher};
32use std::num::NonZeroUsize;
33
34/// Default cache capacity.
35pub const DEFAULT_CACHE_CAPACITY: usize = 4096;
36
37/// Statistics about cache performance.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub struct CacheStats {
40    /// Number of cache hits.
41    pub hits: u64,
42    /// Number of cache misses.
43    pub misses: u64,
44    /// Current number of entries.
45    pub size: usize,
46    /// Maximum capacity.
47    pub capacity: usize,
48}
49
50impl CacheStats {
51    /// Calculate hit rate (0.0 to 1.0).
52    #[must_use]
53    pub fn hit_rate(&self) -> f64 {
54        let total = self.hits + self.misses;
55        if total == 0 {
56            0.0
57        } else {
58            self.hits as f64 / total as f64
59        }
60    }
61}
62
63/// LRU cache for text width measurements.
64///
65/// This cache stores the computed display width (in terminal cells) for
66/// text strings, using an LRU eviction policy when capacity is reached.
67///
68/// # Performance
69/// - Uses FxHash for fast hashing
70/// - O(1) lookup and insertion
71/// - Automatic LRU eviction
72/// - Keys are stored as 64-bit hashes (not full strings) to minimize memory
73///
74/// # Hash Collisions
75/// The cache uses a 64-bit hash as the lookup key rather than storing the
76/// full string. This trades theoretical correctness for memory efficiency.
77/// With FxHash, collision probability is ~1 in 2^64, making this safe for
78/// practical use. If you require guaranteed correctness, use `contains()`
79/// to verify presence before trusting cached values.
80///
81/// # Thread Safety
82/// `WidthCache` is not thread-safe. For concurrent use, wrap in a mutex
83/// or use thread-local caches.
84#[derive(Debug)]
85pub struct WidthCache {
86    cache: LruCache<u64, usize>,
87    hits: u64,
88    misses: u64,
89}
90
91impl WidthCache {
92    /// Create a new cache with the specified capacity.
93    ///
94    /// If capacity is zero, defaults to 1.
95    #[must_use]
96    pub fn new(capacity: usize) -> Self {
97        let capacity = NonZeroUsize::new(capacity.max(1)).expect("capacity must be > 0");
98        Self {
99            cache: LruCache::new(capacity),
100            hits: 0,
101            misses: 0,
102        }
103    }
104
105    /// Create a new cache with the default capacity (4096 entries).
106    #[must_use]
107    pub fn with_default_capacity() -> Self {
108        Self::new(DEFAULT_CACHE_CAPACITY)
109    }
110
111    /// Get cached width or compute and cache it.
112    ///
113    /// If the text is in the cache, returns the cached width.
114    /// Otherwise, computes the width using `display_width` and caches it.
115    #[inline]
116    pub fn get_or_compute(&mut self, text: &str) -> usize {
117        self.get_or_compute_with(text, crate::display_width)
118    }
119
120    /// Get cached width or compute using a custom function.
121    ///
122    /// This allows using custom width calculation functions for testing
123    /// or specialized terminal behavior.
124    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
125    where
126        F: FnOnce(&str) -> usize,
127    {
128        let hash = hash_text(text);
129
130        if let Some(&width) = self.cache.get(&hash) {
131            self.hits += 1;
132            return width;
133        }
134
135        self.misses += 1;
136        let width = compute(text);
137        self.cache.put(hash, width);
138        width
139    }
140
141    /// Check if a text string is in the cache.
142    #[must_use]
143    pub fn contains(&self, text: &str) -> bool {
144        let hash = hash_text(text);
145        self.cache.contains(&hash)
146    }
147
148    /// Get the cached width for a text string without computing.
149    ///
150    /// Returns `None` if the text is not in the cache.
151    /// Note: This does update the LRU order.
152    #[must_use]
153    pub fn get(&mut self, text: &str) -> Option<usize> {
154        let hash = hash_text(text);
155        self.cache.get(&hash).copied()
156    }
157
158    /// Peek at the cached width without updating LRU order.
159    #[must_use]
160    pub fn peek(&self, text: &str) -> Option<usize> {
161        let hash = hash_text(text);
162        self.cache.peek(&hash).copied()
163    }
164
165    /// Pre-populate the cache with a text string.
166    ///
167    /// This is useful for warming up the cache with known strings.
168    pub fn preload(&mut self, text: &str) {
169        let hash = hash_text(text);
170        if !self.cache.contains(&hash) {
171            let width = crate::display_width(text);
172            self.cache.put(hash, width);
173        }
174    }
175
176    /// Pre-populate the cache with multiple strings.
177    pub fn preload_many<'a>(&mut self, texts: impl IntoIterator<Item = &'a str>) {
178        for text in texts {
179            self.preload(text);
180        }
181    }
182
183    /// Clear the cache.
184    pub fn clear(&mut self) {
185        self.cache.clear();
186    }
187
188    /// Reset statistics.
189    pub fn reset_stats(&mut self) {
190        self.hits = 0;
191        self.misses = 0;
192    }
193
194    /// Get cache statistics.
195    #[must_use]
196    pub fn stats(&self) -> CacheStats {
197        CacheStats {
198            hits: self.hits,
199            misses: self.misses,
200            size: self.cache.len(),
201            capacity: self.cache.cap().get(),
202        }
203    }
204
205    /// Get the current number of cached entries.
206    #[must_use]
207    pub fn len(&self) -> usize {
208        self.cache.len()
209    }
210
211    /// Check if the cache is empty.
212    #[must_use]
213    pub fn is_empty(&self) -> bool {
214        self.cache.is_empty()
215    }
216
217    /// Get the cache capacity.
218    #[must_use]
219    pub fn capacity(&self) -> usize {
220        self.cache.cap().get()
221    }
222
223    /// Resize the cache capacity.
224    ///
225    /// If the new capacity is smaller than the current size,
226    /// entries will be evicted (LRU order).
227    pub fn resize(&mut self, new_capacity: usize) {
228        let new_capacity = NonZeroUsize::new(new_capacity.max(1)).expect("capacity must be > 0");
229        self.cache.resize(new_capacity);
230    }
231}
232
233impl Default for WidthCache {
234    fn default() -> Self {
235        Self::with_default_capacity()
236    }
237}
238
239/// Hash a text string using FxHash for fast hashing.
240#[inline]
241fn hash_text(text: &str) -> u64 {
242    let mut hasher = FxHasher::default();
243    text.hash(&mut hasher);
244    hasher.finish()
245}
246
247// Thread-local width cache for convenience.
248//
249// This provides a global cache that is thread-local, avoiding the need
250// to pass a cache around explicitly.
251#[cfg(feature = "thread_local_cache")]
252thread_local! {
253    static THREAD_CACHE: std::cell::RefCell<WidthCache> =
254        std::cell::RefCell::new(WidthCache::with_default_capacity());
255}
256
257/// Get or compute width using the thread-local cache.
258#[cfg(feature = "thread_local_cache")]
259pub fn cached_width(text: &str) -> usize {
260    THREAD_CACHE.with(|cache| cache.borrow_mut().get_or_compute(text))
261}
262
263/// Clear the thread-local cache.
264#[cfg(feature = "thread_local_cache")]
265pub fn clear_thread_cache() {
266    THREAD_CACHE.with(|cache| cache.borrow_mut().clear());
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn new_cache_is_empty() {
275        let cache = WidthCache::new(100);
276        assert!(cache.is_empty());
277        assert_eq!(cache.len(), 0);
278        assert_eq!(cache.capacity(), 100);
279    }
280
281    #[test]
282    fn default_capacity() {
283        let cache = WidthCache::with_default_capacity();
284        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
285    }
286
287    #[test]
288    fn get_or_compute_caches_value() {
289        let mut cache = WidthCache::new(100);
290
291        let width1 = cache.get_or_compute("hello");
292        assert_eq!(width1, 5);
293        assert_eq!(cache.len(), 1);
294
295        let width2 = cache.get_or_compute("hello");
296        assert_eq!(width2, 5);
297        assert_eq!(cache.len(), 1); // Same entry
298
299        let stats = cache.stats();
300        assert_eq!(stats.hits, 1);
301        assert_eq!(stats.misses, 1);
302    }
303
304    #[test]
305    fn get_or_compute_different_strings() {
306        let mut cache = WidthCache::new(100);
307
308        cache.get_or_compute("hello");
309        cache.get_or_compute("world");
310        cache.get_or_compute("foo");
311
312        assert_eq!(cache.len(), 3);
313        let stats = cache.stats();
314        assert_eq!(stats.misses, 3);
315        assert_eq!(stats.hits, 0);
316    }
317
318    #[test]
319    fn get_or_compute_cjk() {
320        let mut cache = WidthCache::new(100);
321
322        let width = cache.get_or_compute("你好");
323        assert_eq!(width, 4); // 2 chars * 2 cells each
324    }
325
326    #[test]
327    fn contains() {
328        let mut cache = WidthCache::new(100);
329
330        assert!(!cache.contains("hello"));
331        cache.get_or_compute("hello");
332        assert!(cache.contains("hello"));
333    }
334
335    #[test]
336    fn get_returns_none_for_missing() {
337        let mut cache = WidthCache::new(100);
338        assert!(cache.get("missing").is_none());
339    }
340
341    #[test]
342    fn get_returns_cached_value() {
343        let mut cache = WidthCache::new(100);
344        cache.get_or_compute("hello");
345
346        let width = cache.get("hello");
347        assert_eq!(width, Some(5));
348    }
349
350    #[test]
351    fn peek_does_not_update_lru() {
352        let mut cache = WidthCache::new(2);
353
354        cache.get_or_compute("a");
355        cache.get_or_compute("b");
356
357        // Peek at "a" - should not update LRU order
358        let _ = cache.peek("a");
359
360        // Add "c" - should evict "a" (oldest)
361        cache.get_or_compute("c");
362
363        assert!(!cache.contains("a"));
364        assert!(cache.contains("b"));
365        assert!(cache.contains("c"));
366    }
367
368    #[test]
369    fn lru_eviction() {
370        let mut cache = WidthCache::new(2);
371
372        cache.get_or_compute("a");
373        cache.get_or_compute("b");
374        cache.get_or_compute("c"); // Should evict "a"
375
376        assert!(!cache.contains("a"));
377        assert!(cache.contains("b"));
378        assert!(cache.contains("c"));
379    }
380
381    #[test]
382    fn lru_refresh_on_access() {
383        let mut cache = WidthCache::new(2);
384
385        cache.get_or_compute("a");
386        cache.get_or_compute("b");
387        cache.get_or_compute("a"); // Refresh "a" to most recent
388        cache.get_or_compute("c"); // Should evict "b"
389
390        assert!(cache.contains("a"));
391        assert!(!cache.contains("b"));
392        assert!(cache.contains("c"));
393    }
394
395    #[test]
396    fn preload() {
397        let mut cache = WidthCache::new(100);
398
399        cache.preload("hello");
400        assert!(cache.contains("hello"));
401        assert_eq!(cache.peek("hello"), Some(5));
402
403        let stats = cache.stats();
404        assert_eq!(stats.misses, 0); // Preload doesn't count as miss
405        assert_eq!(stats.hits, 0);
406    }
407
408    #[test]
409    fn preload_many() {
410        let mut cache = WidthCache::new(100);
411
412        cache.preload_many(["hello", "world", "foo"]);
413        assert_eq!(cache.len(), 3);
414    }
415
416    #[test]
417    fn clear() {
418        let mut cache = WidthCache::new(100);
419        cache.get_or_compute("hello");
420        cache.get_or_compute("world");
421
422        cache.clear();
423        assert!(cache.is_empty());
424        assert!(!cache.contains("hello"));
425    }
426
427    #[test]
428    fn reset_stats() {
429        let mut cache = WidthCache::new(100);
430        cache.get_or_compute("hello");
431        cache.get_or_compute("hello");
432
433        let stats = cache.stats();
434        assert_eq!(stats.hits, 1);
435        assert_eq!(stats.misses, 1);
436
437        cache.reset_stats();
438        let stats = cache.stats();
439        assert_eq!(stats.hits, 0);
440        assert_eq!(stats.misses, 0);
441    }
442
443    #[test]
444    fn hit_rate() {
445        let stats = CacheStats {
446            hits: 75,
447            misses: 25,
448            size: 100,
449            capacity: 1000,
450        };
451        assert!((stats.hit_rate() - 0.75).abs() < 0.001);
452    }
453
454    #[test]
455    fn hit_rate_no_requests() {
456        let stats = CacheStats::default();
457        assert_eq!(stats.hit_rate(), 0.0);
458    }
459
460    #[test]
461    fn resize_smaller() {
462        let mut cache = WidthCache::new(100);
463        for i in 0..50 {
464            cache.get_or_compute(&format!("text{i}"));
465        }
466        assert_eq!(cache.len(), 50);
467
468        cache.resize(10);
469        assert!(cache.len() <= 10);
470        assert_eq!(cache.capacity(), 10);
471    }
472
473    #[test]
474    fn resize_larger() {
475        let mut cache = WidthCache::new(10);
476        cache.resize(100);
477        assert_eq!(cache.capacity(), 100);
478    }
479
480    #[test]
481    fn custom_compute_function() {
482        let mut cache = WidthCache::new(100);
483
484        // Use a custom width function (always returns 42)
485        let width = cache.get_or_compute_with("hello", |_| 42);
486        assert_eq!(width, 42);
487
488        // Cached value is 42
489        assert_eq!(cache.peek("hello"), Some(42));
490    }
491
492    #[test]
493    fn empty_string() {
494        let mut cache = WidthCache::new(100);
495        let width = cache.get_or_compute("");
496        assert_eq!(width, 0);
497    }
498
499    #[test]
500    fn hash_collision_handling() {
501        // Even with hash collisions, the LRU should handle them
502        // (this is just a stress test with many entries)
503        let mut cache = WidthCache::new(1000);
504
505        for i in 0..500 {
506            cache.get_or_compute(&format!("string{i}"));
507        }
508
509        assert_eq!(cache.len(), 500);
510    }
511
512    #[test]
513    fn unicode_strings() {
514        let mut cache = WidthCache::new(100);
515
516        // Various Unicode strings
517        assert_eq!(cache.get_or_compute("café"), 4);
518        assert_eq!(cache.get_or_compute("日本語"), 6);
519        assert_eq!(cache.get_or_compute("🎉"), 2); // Emoji typically 2 cells
520
521        assert_eq!(cache.len(), 3);
522    }
523
524    #[test]
525    fn combining_characters() {
526        let mut cache = WidthCache::new(100);
527
528        // e + combining acute accent
529        let width = cache.get_or_compute("e\u{0301}");
530        // Should be 1 cell (the combining char doesn't add width)
531        assert_eq!(width, 1);
532    }
533
534    // ==========================================================================
535    // Additional coverage tests
536    // ==========================================================================
537
538    #[test]
539    fn default_cache() {
540        let cache = WidthCache::default();
541        assert!(cache.is_empty());
542        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
543    }
544
545    #[test]
546    fn cache_stats_debug() {
547        let stats = CacheStats {
548            hits: 10,
549            misses: 5,
550            size: 15,
551            capacity: 100,
552        };
553        let debug = format!("{:?}", stats);
554        assert!(debug.contains("CacheStats"));
555        assert!(debug.contains("10")); // hits
556    }
557
558    #[test]
559    fn cache_stats_default() {
560        let stats = CacheStats::default();
561        assert_eq!(stats.hits, 0);
562        assert_eq!(stats.misses, 0);
563        assert_eq!(stats.size, 0);
564        assert_eq!(stats.capacity, 0);
565    }
566
567    #[test]
568    fn cache_stats_equality() {
569        let stats1 = CacheStats {
570            hits: 10,
571            misses: 5,
572            size: 15,
573            capacity: 100,
574        };
575        let stats2 = stats1; // Copy
576        assert_eq!(stats1, stats2);
577    }
578
579    #[test]
580    fn clear_after_preload() {
581        let mut cache = WidthCache::new(100);
582        cache.preload_many(["hello", "world", "test"]);
583        assert_eq!(cache.len(), 3);
584
585        cache.clear();
586        assert!(cache.is_empty());
587        assert!(!cache.contains("hello"));
588    }
589
590    #[test]
591    fn preload_existing_is_noop() {
592        let mut cache = WidthCache::new(100);
593        cache.get_or_compute("hello"); // First access
594        let len_before = cache.len();
595
596        cache.preload("hello"); // Already exists
597        assert_eq!(cache.len(), len_before);
598    }
599
600    #[test]
601    fn minimum_capacity_is_one() {
602        let cache = WidthCache::new(0);
603        assert_eq!(cache.capacity(), 1);
604    }
605
606    #[test]
607    fn width_cache_debug() {
608        let cache = WidthCache::new(10);
609        let debug = format!("{:?}", cache);
610        assert!(debug.contains("WidthCache"));
611    }
612
613    #[test]
614    fn emoji_zwj_sequence() {
615        let mut cache = WidthCache::new(100);
616        // Family emoji (ZWJ sequence)
617        let width = cache.get_or_compute("👨‍👩‍👧");
618        // Width varies by implementation, just ensure it doesn't panic
619        assert!(width >= 1);
620    }
621
622    #[test]
623    fn emoji_with_skin_tone() {
624        let mut cache = WidthCache::new(100);
625        let width = cache.get_or_compute("👍🏻");
626        assert!(width >= 1);
627    }
628
629    #[test]
630    fn flag_emoji() {
631        let mut cache = WidthCache::new(100);
632        // US flag emoji (regional indicators)
633        let width = cache.get_or_compute("🇺🇸");
634        assert!(width >= 1);
635    }
636
637    #[test]
638    fn mixed_width_strings() {
639        let mut cache = WidthCache::new(100);
640        // Mixed ASCII and CJK
641        let width = cache.get_or_compute("Hello你好World");
642        assert_eq!(width, 14); // 10 ASCII + 4 CJK
643    }
644
645    #[test]
646    fn stats_size_reflects_cache_len() {
647        let mut cache = WidthCache::new(100);
648        cache.get_or_compute("a");
649        cache.get_or_compute("b");
650        cache.get_or_compute("c");
651
652        let stats = cache.stats();
653        assert_eq!(stats.size, cache.len());
654        assert_eq!(stats.size, 3);
655    }
656
657    #[test]
658    fn stats_capacity_matches() {
659        let cache = WidthCache::new(42);
660        let stats = cache.stats();
661        assert_eq!(stats.capacity, 42);
662    }
663
664    #[test]
665    fn resize_to_zero_becomes_one() {
666        let mut cache = WidthCache::new(100);
667        cache.resize(0);
668        assert_eq!(cache.capacity(), 1);
669    }
670
671    #[test]
672    fn get_updates_lru_order() {
673        let mut cache = WidthCache::new(2);
674
675        cache.get_or_compute("a");
676        cache.get_or_compute("b");
677
678        // Access "a" via get() - should update LRU order
679        let _ = cache.get("a");
680
681        // Add "c" - should evict "b" (now oldest)
682        cache.get_or_compute("c");
683
684        assert!(cache.contains("a"));
685        assert!(!cache.contains("b"));
686        assert!(cache.contains("c"));
687    }
688
689    #[test]
690    fn contains_does_not_modify_stats() {
691        let mut cache = WidthCache::new(100);
692        cache.get_or_compute("hello");
693
694        let stats_before = cache.stats();
695        let _ = cache.contains("hello");
696        let _ = cache.contains("missing");
697        let stats_after = cache.stats();
698
699        assert_eq!(stats_before.hits, stats_after.hits);
700        assert_eq!(stats_before.misses, stats_after.misses);
701    }
702
703    #[test]
704    fn peek_returns_none_for_missing() {
705        let cache = WidthCache::new(100);
706        assert!(cache.peek("missing").is_none());
707    }
708
709    #[test]
710    fn custom_compute_called_once() {
711        let mut cache = WidthCache::new(100);
712        let mut call_count = 0;
713
714        cache.get_or_compute_with("test", |_| {
715            call_count += 1;
716            10
717        });
718
719        cache.get_or_compute_with("test", |_| {
720            call_count += 1;
721            20 // This shouldn't be called
722        });
723
724        assert_eq!(call_count, 1);
725        assert_eq!(cache.peek("test"), Some(10));
726    }
727
728    #[test]
729    fn whitespace_strings() {
730        let mut cache = WidthCache::new(100);
731        assert_eq!(cache.get_or_compute("   "), 3); // 3 spaces
732        assert_eq!(cache.get_or_compute("\t"), 1); // Tab is 1 cell typically
733        assert_eq!(cache.get_or_compute("\n"), 1); // Newline
734    }
735}
736
737// ---------------------------------------------------------------------------
738// W-TinyLFU Admission Components (bd-4kq0.6.1)
739// ---------------------------------------------------------------------------
740//
741// # Design
742//
743// W-TinyLFU augments LRU eviction with a frequency-based admission filter:
744//
745// 1. **Count-Min Sketch (CMS)**: Approximate frequency counter.
746//    - Parameters: width `w`, depth `d`.
747//    - Error bound: estimated count <= true count + epsilon * N
748//      with probability >= 1 - delta, where:
749//        epsilon = e / w  (e = Euler's number ≈ 2.718)
750//        delta   = (1/2)^d
751//    - Chosen defaults: w=1024 (epsilon ≈ 0.0027), d=4 (delta ≈ 0.0625).
752//    - Counter width: 4 bits (saturating at 15). Periodic halving (aging)
753//      every `reset_interval` increments to prevent staleness.
754//
755// 2. **Doorkeeper**: 1-bit Bloom filter (single hash, `doorkeeper_bits` entries).
756//    - Filters one-hit wonders before they reach the CMS.
757//    - On first access: set doorkeeper bit. On second access in the same
758//      epoch: increment CMS. Cleared on CMS reset.
759//    - Default: 2048 bits (256 bytes).
760//
761// 3. **Admission rule**: When evicting, compare frequencies:
762//    - `freq(candidate) > freq(victim)` → admit candidate, evict victim.
763//    - `freq(candidate) <= freq(victim)` → reject candidate, keep victim.
764//
765// 4. **Fingerprint guard**: The CMS stores 64-bit hashes. Since the main
766//    cache also keys by 64-bit hash, a collision means two distinct strings
767//    share the same key. The fingerprint guard adds a secondary hash
768//    (different seed) stored alongside the value. On lookup, if the
769//    secondary hash mismatches, the entry is treated as a miss and evicted.
770//
771// # Failure Modes
772// - CMS overcounting: bounded by epsilon * N; aging limits staleness.
773// - Doorkeeper false positives: one-hit items may leak to CMS. Bounded
774//   by Bloom FP rate ≈ (1 - e^{-k*n/m})^k with k=1.
775// - Fingerprint collision (secondary hash): probability ~2^{-64}; negligible.
776// - Reset storm: halving all counters is O(w*d). With w=1024, d=4 this is
777//   4096 operations — negligible vs. rendering cost.
778
779/// Count-Min Sketch for approximate frequency estimation.
780///
781/// Uses `depth` independent hash functions (derived from a single hash via
782/// mixing) and `width` counters per row. Each counter is a `u8` saturating
783/// at [`CountMinSketch::MAX_COUNT`] (15 by default, representing 4-bit counters).
784///
785/// # Error Bounds
786///
787/// For a sketch with width `w` and depth `d`, after `N` total increments:
788/// - `estimate(x) <= true_count(x) + epsilon * N` with probability `>= 1 - delta`
789/// - where `epsilon = e / w` and `delta = (1/2)^d`
790#[derive(Debug, Clone)]
791pub struct CountMinSketch {
792    /// Counter matrix: `depth` rows of `width` counters each.
793    counters: Vec<Vec<u8>>,
794    /// Number of hash functions (rows).
795    depth: usize,
796    /// Number of counters per row.
797    width: usize,
798    /// Total number of increments since last reset.
799    total_increments: u64,
800    /// Increment count at which to halve all counters.
801    reset_interval: u64,
802}
803
804/// Maximum counter value (4-bit saturation).
805const CMS_MAX_COUNT: u8 = 15;
806
807/// Default CMS width. epsilon = e/1024 ≈ 0.0027.
808const CMS_DEFAULT_WIDTH: usize = 1024;
809
810/// Default CMS depth. delta = (1/2)^4 = 0.0625.
811const CMS_DEFAULT_DEPTH: usize = 4;
812
813/// Default reset interval (halve counters after this many increments).
814const CMS_DEFAULT_RESET_INTERVAL: u64 = 8192;
815
816impl CountMinSketch {
817    /// Create a new Count-Min Sketch with the given dimensions.
818    pub fn new(width: usize, depth: usize, reset_interval: u64) -> Self {
819        let width = width.max(1);
820        let depth = depth.max(1);
821        Self {
822            counters: vec![vec![0u8; width]; depth],
823            depth,
824            width,
825            total_increments: 0,
826            reset_interval: reset_interval.max(1),
827        }
828    }
829
830    /// Create a sketch with default parameters (w=1024, d=4, reset=8192).
831    pub fn with_defaults() -> Self {
832        Self::new(
833            CMS_DEFAULT_WIDTH,
834            CMS_DEFAULT_DEPTH,
835            CMS_DEFAULT_RESET_INTERVAL,
836        )
837    }
838
839    /// Increment the count for a key.
840    pub fn increment(&mut self, hash: u64) {
841        for row in 0..self.depth {
842            let idx = self.index(hash, row);
843            self.counters[row][idx] = self.counters[row][idx].saturating_add(1).min(CMS_MAX_COUNT);
844        }
845        self.total_increments += 1;
846
847        if self.total_increments >= self.reset_interval {
848            self.halve();
849        }
850    }
851
852    /// Estimate the frequency of a key (minimum across all rows).
853    pub fn estimate(&self, hash: u64) -> u8 {
854        let mut min = u8::MAX;
855        for row in 0..self.depth {
856            let idx = self.index(hash, row);
857            min = min.min(self.counters[row][idx]);
858        }
859        min
860    }
861
862    /// Total number of increments since creation or last reset.
863    pub fn total_increments(&self) -> u64 {
864        self.total_increments
865    }
866
867    /// Halve all counters (aging). Resets the increment counter.
868    fn halve(&mut self) {
869        for row in &mut self.counters {
870            for c in row.iter_mut() {
871                *c /= 2;
872            }
873        }
874        self.total_increments = 0;
875    }
876
877    /// Clear all counters to zero.
878    pub fn clear(&mut self) {
879        for row in &mut self.counters {
880            row.fill(0);
881        }
882        self.total_increments = 0;
883    }
884
885    /// Compute the column index for a given hash and row.
886    #[inline]
887    fn index(&self, hash: u64, row: usize) -> usize {
888        // Mix the hash with the row index for independent hash functions.
889        let mixed = hash
890            .wrapping_mul(0x517c_c1b7_2722_0a95)
891            .wrapping_add(row as u64);
892        let mixed = mixed ^ (mixed >> 32);
893        (mixed as usize) % self.width
894    }
895}
896
897/// 1-bit Bloom filter used as a doorkeeper to filter one-hit wonders.
898///
899/// On first access within an epoch, the doorkeeper sets a bit. Only on
900/// the second access does the item get promoted to the Count-Min Sketch.
901/// The doorkeeper is cleared whenever the CMS resets (halves).
902#[derive(Debug, Clone)]
903pub struct Doorkeeper {
904    bits: Vec<u64>,
905    num_bits: usize,
906}
907
908/// Default doorkeeper size in bits.
909const DOORKEEPER_DEFAULT_BITS: usize = 2048;
910
911impl Doorkeeper {
912    /// Create a new doorkeeper with the specified number of bits.
913    pub fn new(num_bits: usize) -> Self {
914        let num_bits = num_bits.max(64);
915        let num_words = num_bits.div_ceil(64);
916        Self {
917            bits: vec![0u64; num_words],
918            num_bits,
919        }
920    }
921
922    /// Create a doorkeeper with the default size (2048 bits).
923    pub fn with_defaults() -> Self {
924        Self::new(DOORKEEPER_DEFAULT_BITS)
925    }
926
927    /// Check if a key has been seen. Returns true if the bit was already set.
928    pub fn check_and_set(&mut self, hash: u64) -> bool {
929        let idx = (hash as usize) % self.num_bits;
930        let word = idx / 64;
931        let bit = idx % 64;
932        let was_set = (self.bits[word] >> bit) & 1 == 1;
933        self.bits[word] |= 1 << bit;
934        was_set
935    }
936
937    /// Check if a key has been seen without setting.
938    pub fn contains(&self, hash: u64) -> bool {
939        let idx = (hash as usize) % self.num_bits;
940        let word = idx / 64;
941        let bit = idx % 64;
942        (self.bits[word] >> bit) & 1 == 1
943    }
944
945    /// Clear all bits.
946    pub fn clear(&mut self) {
947        self.bits.fill(0);
948    }
949}
950
951/// Compute a secondary fingerprint hash for collision guard.
952///
953/// Uses a different multiplicative constant than FxHash to produce
954/// an independent 64-bit fingerprint.
955#[inline]
956pub fn fingerprint_hash(text: &str) -> u64 {
957    // Simple but effective: fold bytes with a different constant than FxHash.
958    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
959    for &b in text.as_bytes() {
960        h ^= b as u64;
961        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
962    }
963    h
964}
965
966/// Evaluate the TinyLFU admission rule.
967///
968/// Returns `true` if the candidate should be admitted (replacing the victim).
969///
970/// # Rule
971/// Admit if `freq(candidate) > freq(victim)`. On tie, reject (keep victim).
972#[inline]
973pub fn tinylfu_admit(candidate_freq: u8, victim_freq: u8) -> bool {
974    candidate_freq > victim_freq
975}
976
977// ---------------------------------------------------------------------------
978// W-TinyLFU Width Cache (bd-4kq0.6.2)
979// ---------------------------------------------------------------------------
980
981/// Entry in the TinyLFU cache, storing value and fingerprint for collision guard.
982#[derive(Debug, Clone)]
983struct TinyLfuEntry {
984    width: usize,
985    fingerprint: u64,
986}
987
988/// Width cache using W-TinyLFU admission policy.
989///
990/// Architecture:
991/// - **Window cache** (small LRU, ~1% of capacity): captures recent items.
992/// - **Main cache** (larger LRU, ~99% of capacity): for frequently accessed items.
993/// - **Count-Min Sketch + Doorkeeper**: frequency estimation for admission decisions.
994/// - **Fingerprint guard**: secondary hash per entry to detect hash collisions.
995///
996/// On every access:
997/// 1. Check main cache → hit? Return value (verify fingerprint).
998/// 2. Check window cache → hit? Return value (verify fingerprint).
999/// 3. Miss: compute width, insert into window cache.
1000///
1001/// On window cache eviction:
1002/// 1. The evicted item becomes a candidate.
1003/// 2. The LRU victim of the main cache is identified.
1004/// 3. If `freq(candidate) > freq(victim)`, candidate enters main cache
1005///    (victim is evicted). Otherwise, candidate is discarded.
1006///
1007/// Frequency tracking uses Doorkeeper → CMS pipeline:
1008/// - First access: doorkeeper records.
1009/// - Second+ access: CMS is incremented.
1010#[derive(Debug)]
1011pub struct TinyLfuWidthCache {
1012    /// Small window cache (recency).
1013    window: LruCache<u64, TinyLfuEntry>,
1014    /// Large main cache (frequency-filtered).
1015    main: LruCache<u64, TinyLfuEntry>,
1016    /// Approximate frequency counter.
1017    sketch: CountMinSketch,
1018    /// One-hit-wonder filter.
1019    doorkeeper: Doorkeeper,
1020    /// Total capacity (window + main).
1021    total_capacity: usize,
1022    /// Hit/miss stats.
1023    hits: u64,
1024    misses: u64,
1025}
1026
1027impl TinyLfuWidthCache {
1028    /// Create a new TinyLFU cache with the given total capacity.
1029    ///
1030    /// The window gets ~1% of capacity (minimum 1), main gets the rest.
1031    pub fn new(total_capacity: usize) -> Self {
1032        let total_capacity = total_capacity.max(2);
1033        let window_cap = (total_capacity / 100).max(1);
1034        let main_cap = total_capacity - window_cap;
1035
1036        Self {
1037            window: LruCache::new(NonZeroUsize::new(window_cap).unwrap()),
1038            main: LruCache::new(NonZeroUsize::new(main_cap.max(1)).unwrap()),
1039            sketch: CountMinSketch::with_defaults(),
1040            doorkeeper: Doorkeeper::with_defaults(),
1041            total_capacity,
1042            hits: 0,
1043            misses: 0,
1044        }
1045    }
1046
1047    /// Get cached width or compute and cache it.
1048    pub fn get_or_compute(&mut self, text: &str) -> usize {
1049        self.get_or_compute_with(text, crate::display_width)
1050    }
1051
1052    /// Get cached width or compute using a custom function.
1053    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1054    where
1055        F: FnOnce(&str) -> usize,
1056    {
1057        let hash = hash_text(text);
1058        let fp = fingerprint_hash(text);
1059
1060        // Record frequency via doorkeeper → CMS pipeline.
1061        let seen = self.doorkeeper.check_and_set(hash);
1062        if seen {
1063            self.sketch.increment(hash);
1064        }
1065
1066        // Check main cache first (larger, higher value).
1067        if let Some(entry) = self.main.get(&hash) {
1068            if entry.fingerprint == fp {
1069                self.hits += 1;
1070                return entry.width;
1071            }
1072            // Fingerprint mismatch: collision. Evict stale entry.
1073            self.main.pop(&hash);
1074        }
1075
1076        // Check window cache.
1077        if let Some(entry) = self.window.get(&hash) {
1078            if entry.fingerprint == fp {
1079                self.hits += 1;
1080                return entry.width;
1081            }
1082            // Collision in window cache.
1083            self.window.pop(&hash);
1084        }
1085
1086        // Cache miss: compute width.
1087        self.misses += 1;
1088        let width = compute(text);
1089        let new_entry = TinyLfuEntry {
1090            width,
1091            fingerprint: fp,
1092        };
1093
1094        // Insert into window cache. If window is full, the evicted item
1095        // goes through admission filter for main cache.
1096        if self.window.len() >= self.window.cap().get() {
1097            // Get the LRU item from window before it's evicted.
1098            if let Some((evicted_hash, evicted_entry)) = self.window.pop_lru() {
1099                self.try_admit_to_main(evicted_hash, evicted_entry);
1100            }
1101        }
1102        self.window.put(hash, new_entry);
1103
1104        width
1105    }
1106
1107    /// Try to admit a candidate (evicted from window) into the main cache.
1108    fn try_admit_to_main(&mut self, candidate_hash: u64, candidate_entry: TinyLfuEntry) {
1109        let candidate_freq = self.sketch.estimate(candidate_hash);
1110
1111        if self.main.len() < self.main.cap().get() {
1112            // Main has room — admit unconditionally.
1113            self.main.put(candidate_hash, candidate_entry);
1114            return;
1115        }
1116
1117        // Main is full. Compare candidate frequency with the LRU victim.
1118        if let Some((&victim_hash, _)) = self.main.peek_lru() {
1119            let victim_freq = self.sketch.estimate(victim_hash);
1120            if tinylfu_admit(candidate_freq, victim_freq) {
1121                self.main.pop_lru();
1122                self.main.put(candidate_hash, candidate_entry);
1123            }
1124            // Otherwise, candidate is discarded.
1125        }
1126    }
1127
1128    /// Check if a key is in the cache (window or main).
1129    pub fn contains(&self, text: &str) -> bool {
1130        let hash = hash_text(text);
1131        let fp = fingerprint_hash(text);
1132        if let Some(e) = self.main.peek(&hash)
1133            && e.fingerprint == fp
1134        {
1135            return true;
1136        }
1137        if let Some(e) = self.window.peek(&hash)
1138            && e.fingerprint == fp
1139        {
1140            return true;
1141        }
1142        false
1143    }
1144
1145    /// Get cache statistics.
1146    pub fn stats(&self) -> CacheStats {
1147        CacheStats {
1148            hits: self.hits,
1149            misses: self.misses,
1150            size: self.window.len() + self.main.len(),
1151            capacity: self.total_capacity,
1152        }
1153    }
1154
1155    /// Clear all caches and reset sketch/doorkeeper.
1156    pub fn clear(&mut self) {
1157        self.window.clear();
1158        self.main.clear();
1159        self.sketch.clear();
1160        self.doorkeeper.clear();
1161    }
1162
1163    /// Reset statistics.
1164    pub fn reset_stats(&mut self) {
1165        self.hits = 0;
1166        self.misses = 0;
1167    }
1168
1169    /// Current number of cached entries.
1170    pub fn len(&self) -> usize {
1171        self.window.len() + self.main.len()
1172    }
1173
1174    /// Check if cache is empty.
1175    pub fn is_empty(&self) -> bool {
1176        self.window.is_empty() && self.main.is_empty()
1177    }
1178
1179    /// Total capacity (window + main).
1180    pub fn capacity(&self) -> usize {
1181        self.total_capacity
1182    }
1183
1184    /// Number of entries in the main cache.
1185    pub fn main_len(&self) -> usize {
1186        self.main.len()
1187    }
1188
1189    /// Number of entries in the window cache.
1190    pub fn window_len(&self) -> usize {
1191        self.window.len()
1192    }
1193}
1194
1195#[cfg(test)]
1196mod proptests {
1197    use super::*;
1198    use proptest::prelude::*;
1199
1200    proptest! {
1201        #[test]
1202        fn cached_width_matches_direct(s in "[a-zA-Z0-9 ]{1,50}") {
1203            let mut cache = WidthCache::new(100);
1204            let cached = cache.get_or_compute(&s);
1205            let direct = crate::display_width(&s);
1206            prop_assert_eq!(cached, direct);
1207        }
1208
1209        #[test]
1210        fn second_access_is_hit(s in "[a-zA-Z0-9]{1,20}") {
1211            let mut cache = WidthCache::new(100);
1212
1213            cache.get_or_compute(&s);
1214            let stats_before = cache.stats();
1215
1216            cache.get_or_compute(&s);
1217            let stats_after = cache.stats();
1218
1219            prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1220            prop_assert_eq!(stats_after.misses, stats_before.misses);
1221        }
1222
1223        #[test]
1224        fn lru_never_exceeds_capacity(
1225            strings in prop::collection::vec("[a-z]{1,5}", 10..100),
1226            capacity in 5usize..20
1227        ) {
1228            let mut cache = WidthCache::new(capacity);
1229
1230            for s in &strings {
1231                cache.get_or_compute(s);
1232                prop_assert!(cache.len() <= capacity);
1233            }
1234        }
1235
1236        #[test]
1237        fn preload_then_access_is_hit(s in "[a-zA-Z]{1,20}") {
1238            let mut cache = WidthCache::new(100);
1239
1240            cache.preload(&s);
1241            let stats_before = cache.stats();
1242
1243            cache.get_or_compute(&s);
1244            let stats_after = cache.stats();
1245
1246            // Should be a hit (preloaded)
1247            prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1248        }
1249    }
1250}
1251
1252// ---------------------------------------------------------------------------
1253// TinyLFU Spec Tests (bd-4kq0.6.1)
1254// ---------------------------------------------------------------------------
1255
1256#[cfg(test)]
1257mod tinylfu_tests {
1258    use super::*;
1259
1260    // --- Count-Min Sketch ---
1261
1262    #[test]
1263    fn unit_cms_single_key_count() {
1264        let mut cms = CountMinSketch::with_defaults();
1265        let h = hash_text("hello");
1266
1267        for _ in 0..5 {
1268            cms.increment(h);
1269        }
1270        assert_eq!(cms.estimate(h), 5);
1271    }
1272
1273    #[test]
1274    fn unit_cms_unseen_key_is_zero() {
1275        let cms = CountMinSketch::with_defaults();
1276        assert_eq!(cms.estimate(hash_text("never_seen")), 0);
1277    }
1278
1279    #[test]
1280    fn unit_cms_saturates_at_max() {
1281        let mut cms = CountMinSketch::with_defaults();
1282        let h = hash_text("hot");
1283
1284        for _ in 0..100 {
1285            cms.increment(h);
1286        }
1287        assert_eq!(cms.estimate(h), CMS_MAX_COUNT);
1288    }
1289
1290    #[test]
1291    fn unit_cms_bounds() {
1292        // Error bound: estimate(x) <= true_count(x) + epsilon * N.
1293        // With w=1024, epsilon = e/1024 ~ 0.00266.
1294        let mut cms = CountMinSketch::new(1024, 4, u64::MAX); // no reset
1295        let n: u64 = 1000;
1296
1297        // Insert 1000 unique keys
1298        for i in 0..n {
1299            cms.increment(i.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1));
1300        }
1301
1302        // Check a target key inserted exactly 5 times
1303        let target = 0xDEAD_BEEF_u64;
1304        for _ in 0..5 {
1305            cms.increment(target);
1306        }
1307
1308        let est = cms.estimate(target);
1309        let epsilon = std::f64::consts::E / 1024.0;
1310        let upper_bound = 5.0 + epsilon * (n + 5) as f64;
1311
1312        assert!(
1313            (est as f64) <= upper_bound,
1314            "estimate {} exceeds bound {:.1} (epsilon={:.5}, N={})",
1315            est,
1316            upper_bound,
1317            epsilon,
1318            n + 5,
1319        );
1320        assert!(est >= 5, "estimate {} should be >= true count 5", est);
1321    }
1322
1323    #[test]
1324    fn unit_cms_bounds_mass_test() {
1325        let mut cms = CountMinSketch::new(1024, 4, u64::MAX);
1326        let n = 2000u64;
1327
1328        let mut true_counts = vec![0u8; n as usize];
1329        for i in 0..n {
1330            let count = (i % 10 + 1) as u8;
1331            true_counts[i as usize] = count;
1332            for _ in 0..count {
1333                cms.increment(i);
1334            }
1335        }
1336
1337        let total = cms.total_increments();
1338        let epsilon = std::f64::consts::E / 1024.0;
1339        let mut violations = 0u32;
1340
1341        for i in 0..n {
1342            let est = cms.estimate(i);
1343            let true_c = true_counts[i as usize];
1344            let upper = true_c as f64 + epsilon * total as f64;
1345            if est as f64 > upper + 0.5 {
1346                violations += 1;
1347            }
1348            assert!(
1349                est >= true_c,
1350                "key {}: estimate {} < true count {}",
1351                i,
1352                est,
1353                true_c
1354            );
1355        }
1356
1357        // delta = (1/2)^4 = 0.0625; allow generous threshold
1358        let violation_rate = violations as f64 / n as f64;
1359        assert!(
1360            violation_rate <= 0.10,
1361            "violation rate {:.3} exceeds delta threshold",
1362            violation_rate,
1363        );
1364    }
1365
1366    #[test]
1367    fn unit_cms_halving_ages_counts() {
1368        let mut cms = CountMinSketch::new(64, 2, 100);
1369
1370        let h = hash_text("test");
1371        for _ in 0..10 {
1372            cms.increment(h);
1373        }
1374        assert_eq!(cms.estimate(h), 10);
1375
1376        // Trigger reset by reaching reset_interval
1377        for _ in 10..100 {
1378            cms.increment(hash_text("noise"));
1379        }
1380
1381        let est = cms.estimate(h);
1382        assert!(est <= 5, "After halving, estimate {} should be <= 5", est);
1383    }
1384
1385    #[test]
1386    fn unit_cms_monotone() {
1387        let mut cms = CountMinSketch::with_defaults();
1388        let h = hash_text("key");
1389
1390        let mut prev_est = 0u8;
1391        for _ in 0..CMS_MAX_COUNT {
1392            cms.increment(h);
1393            let est = cms.estimate(h);
1394            assert!(est >= prev_est, "estimate should be monotone");
1395            prev_est = est;
1396        }
1397    }
1398
1399    // --- Doorkeeper ---
1400
1401    #[test]
1402    fn unit_doorkeeper_first_access_returns_false() {
1403        let mut dk = Doorkeeper::with_defaults();
1404        assert!(!dk.check_and_set(hash_text("new")));
1405    }
1406
1407    #[test]
1408    fn unit_doorkeeper_second_access_returns_true() {
1409        let mut dk = Doorkeeper::with_defaults();
1410        let h = hash_text("key");
1411        dk.check_and_set(h);
1412        assert!(dk.check_and_set(h));
1413    }
1414
1415    #[test]
1416    fn unit_doorkeeper_contains() {
1417        let mut dk = Doorkeeper::with_defaults();
1418        let h = hash_text("key");
1419        assert!(!dk.contains(h));
1420        dk.check_and_set(h);
1421        assert!(dk.contains(h));
1422    }
1423
1424    #[test]
1425    fn unit_doorkeeper_clear_resets() {
1426        let mut dk = Doorkeeper::with_defaults();
1427        let h = hash_text("key");
1428        dk.check_and_set(h);
1429        dk.clear();
1430        assert!(!dk.contains(h));
1431        assert!(!dk.check_and_set(h));
1432    }
1433
1434    #[test]
1435    fn unit_doorkeeper_false_positive_rate() {
1436        let mut dk = Doorkeeper::new(2048);
1437        let n = 100u64;
1438
1439        for i in 0..n {
1440            dk.check_and_set(i * 0x9E37_79B9 + 1);
1441        }
1442
1443        let mut false_positives = 0u32;
1444        for i in 0..1000 {
1445            let h = (i + 100_000) * 0x6A09_E667 + 7;
1446            if dk.contains(h) {
1447                false_positives += 1;
1448            }
1449        }
1450
1451        // k=1, m=2048, n=100: FP rate ~ 1 - e^{-100/2048} ~ 0.048
1452        let fp_rate = false_positives as f64 / 1000.0;
1453        assert!(
1454            fp_rate < 0.15,
1455            "FP rate {:.3} too high (expected < 0.15)",
1456            fp_rate,
1457        );
1458    }
1459
1460    // --- Admission Rule ---
1461
1462    #[test]
1463    fn unit_admission_rule() {
1464        assert!(tinylfu_admit(5, 3)); // candidate > victim -> admit
1465        assert!(!tinylfu_admit(3, 5)); // candidate < victim -> reject
1466        assert!(!tinylfu_admit(3, 3)); // tie -> reject (keep victim)
1467    }
1468
1469    #[test]
1470    fn unit_admission_rule_extremes() {
1471        assert!(tinylfu_admit(1, 0));
1472        assert!(!tinylfu_admit(0, 0));
1473        assert!(!tinylfu_admit(0, 1));
1474        assert!(tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT - 1));
1475        assert!(!tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT));
1476    }
1477
1478    // --- Fingerprint Guard ---
1479
1480    #[test]
1481    fn unit_fingerprint_guard() {
1482        let fp1 = fingerprint_hash("hello");
1483        let fp2 = fingerprint_hash("world");
1484        let fp3 = fingerprint_hash("hello");
1485
1486        assert_ne!(
1487            fp1, fp2,
1488            "Different strings should have different fingerprints"
1489        );
1490        assert_eq!(fp1, fp3, "Same string should have same fingerprint");
1491    }
1492
1493    #[test]
1494    fn unit_fingerprint_guard_collision_rate() {
1495        let mut fps = std::collections::HashSet::new();
1496        let n = 10_000;
1497
1498        for i in 0..n {
1499            let s = format!("string_{}", i);
1500            fps.insert(fingerprint_hash(&s));
1501        }
1502
1503        let collisions = n - fps.len();
1504        assert!(
1505            collisions == 0,
1506            "Expected 0 collisions in 10k items, got {}",
1507            collisions,
1508        );
1509    }
1510
1511    #[test]
1512    fn unit_fingerprint_independent_of_primary_hash() {
1513        let text = "test_string";
1514        let primary = hash_text(text);
1515        let secondary = fingerprint_hash(text);
1516
1517        assert_ne!(
1518            primary, secondary,
1519            "Fingerprint and primary hash should differ"
1520        );
1521    }
1522
1523    // --- Integration: Doorkeeper + CMS pipeline ---
1524
1525    #[test]
1526    fn unit_doorkeeper_cms_pipeline() {
1527        let mut dk = Doorkeeper::with_defaults();
1528        let mut cms = CountMinSketch::with_defaults();
1529        let h = hash_text("item");
1530
1531        // First access: doorkeeper records
1532        assert!(!dk.check_and_set(h));
1533        assert_eq!(cms.estimate(h), 0);
1534
1535        // Second access: doorkeeper confirms, CMS incremented
1536        assert!(dk.check_and_set(h));
1537        cms.increment(h);
1538        assert_eq!(cms.estimate(h), 1);
1539
1540        // Third access
1541        assert!(dk.check_and_set(h));
1542        cms.increment(h);
1543        assert_eq!(cms.estimate(h), 2);
1544    }
1545
1546    #[test]
1547    fn unit_doorkeeper_filters_one_hit_wonders() {
1548        let mut dk = Doorkeeper::with_defaults();
1549        let mut cms = CountMinSketch::with_defaults();
1550
1551        // 100 one-hit items
1552        for i in 0u64..100 {
1553            let h = i * 0x9E37_79B9 + 1;
1554            let seen = dk.check_and_set(h);
1555            if seen {
1556                cms.increment(h);
1557            }
1558        }
1559
1560        assert_eq!(cms.total_increments(), 0);
1561
1562        // Access one again -> passes doorkeeper
1563        let h = 1; // i=0 from the loop above
1564        assert!(dk.check_and_set(h));
1565        cms.increment(h);
1566        assert_eq!(cms.total_increments(), 1);
1567    }
1568}
1569
1570// ---------------------------------------------------------------------------
1571// TinyLFU Implementation Tests (bd-4kq0.6.2)
1572// ---------------------------------------------------------------------------
1573
1574#[cfg(test)]
1575mod tinylfu_impl_tests {
1576    use super::*;
1577
1578    #[test]
1579    fn basic_get_or_compute() {
1580        let mut cache = TinyLfuWidthCache::new(100);
1581        let w = cache.get_or_compute("hello");
1582        assert_eq!(w, 5);
1583        assert_eq!(cache.len(), 1);
1584
1585        let w2 = cache.get_or_compute("hello");
1586        assert_eq!(w2, 5);
1587        let stats = cache.stats();
1588        assert_eq!(stats.misses, 1);
1589        assert_eq!(stats.hits, 1);
1590    }
1591
1592    #[test]
1593    fn window_to_main_promotion() {
1594        // With capacity=100, window=1, main=99.
1595        // Fill window, then force eviction into main via new inserts.
1596        let mut cache = TinyLfuWidthCache::new(100);
1597
1598        // Access "frequent" many times to build CMS frequency.
1599        for _ in 0..10 {
1600            cache.get_or_compute("frequent");
1601        }
1602
1603        // Insert enough items to fill window and force eviction.
1604        for i in 0..5 {
1605            cache.get_or_compute(&format!("item_{}", i));
1606        }
1607
1608        // "frequent" should have been promoted to main cache via admission.
1609        assert!(cache.contains("frequent"));
1610        assert!(cache.main_len() > 0 || cache.window_len() > 0);
1611    }
1612
1613    #[test]
1614    fn unit_window_promotion() {
1615        // Frequent items should end up in main cache.
1616        let mut cache = TinyLfuWidthCache::new(50);
1617
1618        // Access "hot" repeatedly to build frequency.
1619        for _ in 0..20 {
1620            cache.get_or_compute("hot");
1621        }
1622
1623        // Now push enough items through window to force "hot" out of window.
1624        for i in 0..10 {
1625            cache.get_or_compute(&format!("filler_{}", i));
1626        }
1627
1628        // "hot" should still be accessible (promoted to main via admission).
1629        assert!(cache.contains("hot"), "Frequent item should be retained");
1630    }
1631
1632    #[test]
1633    fn fingerprint_guard_detects_collision() {
1634        let mut cache = TinyLfuWidthCache::new(100);
1635
1636        // Compute "hello" with custom function.
1637        let w = cache.get_or_compute_with("hello", |_| 42);
1638        assert_eq!(w, 42);
1639
1640        // Verify it's cached.
1641        assert!(cache.contains("hello"));
1642    }
1643
1644    #[test]
1645    fn admission_rejects_infrequent() {
1646        // Fill main cache with frequently-accessed items.
1647        // Then try to insert a cold item — it should be rejected.
1648        let mut cache = TinyLfuWidthCache::new(10); // window=1, main=9
1649
1650        // Fill main with items accessed multiple times.
1651        for i in 0..9 {
1652            let s = format!("hot_{}", i);
1653            for _ in 0..5 {
1654                cache.get_or_compute(&s);
1655            }
1656        }
1657
1658        // Now insert cold items. They should go through window but not
1659        // necessarily get into main.
1660        for i in 0..20 {
1661            cache.get_or_compute(&format!("cold_{}", i));
1662        }
1663
1664        // Hot items should mostly survive (they have high frequency).
1665        let hot_survivors: usize = (0..9)
1666            .filter(|i| cache.contains(&format!("hot_{}", i)))
1667            .count();
1668        assert!(
1669            hot_survivors >= 5,
1670            "Expected most hot items to survive, got {}/9",
1671            hot_survivors
1672        );
1673    }
1674
1675    #[test]
1676    fn clear_empties_everything() {
1677        let mut cache = TinyLfuWidthCache::new(100);
1678        cache.get_or_compute("a");
1679        cache.get_or_compute("b");
1680        cache.clear();
1681        assert!(cache.is_empty());
1682        assert_eq!(cache.len(), 0);
1683    }
1684
1685    #[test]
1686    fn stats_reflect_usage() {
1687        let mut cache = TinyLfuWidthCache::new(100);
1688        cache.get_or_compute("a");
1689        cache.get_or_compute("a");
1690        cache.get_or_compute("b");
1691
1692        let stats = cache.stats();
1693        assert_eq!(stats.misses, 2);
1694        assert_eq!(stats.hits, 1);
1695        assert_eq!(stats.size, 2);
1696    }
1697
1698    #[test]
1699    fn capacity_is_respected() {
1700        let mut cache = TinyLfuWidthCache::new(20);
1701
1702        for i in 0..100 {
1703            cache.get_or_compute(&format!("item_{}", i));
1704        }
1705
1706        assert!(
1707            cache.len() <= 20,
1708            "Cache size {} exceeds capacity 20",
1709            cache.len()
1710        );
1711    }
1712
1713    #[test]
1714    fn reset_stats_works() {
1715        let mut cache = TinyLfuWidthCache::new(100);
1716        cache.get_or_compute("x");
1717        cache.get_or_compute("x");
1718        cache.reset_stats();
1719        let stats = cache.stats();
1720        assert_eq!(stats.hits, 0);
1721        assert_eq!(stats.misses, 0);
1722    }
1723
1724    #[test]
1725    fn perf_cache_hit_rate() {
1726        // Simulate a Zipfian-like workload: some items accessed frequently,
1727        // many accessed rarely. TinyLFU should achieve decent hit rate.
1728        let mut cache = TinyLfuWidthCache::new(50);
1729
1730        // 10 hot items accessed 20 times each.
1731        for _ in 0..20 {
1732            for i in 0..10 {
1733                cache.get_or_compute(&format!("hot_{}", i));
1734            }
1735        }
1736
1737        // 100 cold items accessed once each.
1738        for i in 0..100 {
1739            cache.get_or_compute(&format!("cold_{}", i));
1740        }
1741
1742        // Re-access hot items — these should mostly be hits.
1743        cache.reset_stats();
1744        for i in 0..10 {
1745            cache.get_or_compute(&format!("hot_{}", i));
1746        }
1747
1748        let stats = cache.stats();
1749        // Hot items should have high hit rate after being frequently accessed.
1750        assert!(
1751            stats.hits >= 5,
1752            "Expected at least 5/10 hot items to hit, got {}",
1753            stats.hits
1754        );
1755    }
1756
1757    #[test]
1758    fn unicode_strings_work() {
1759        let mut cache = TinyLfuWidthCache::new(100);
1760        assert_eq!(cache.get_or_compute("日本語"), 6);
1761        assert_eq!(cache.get_or_compute("café"), 4);
1762        assert_eq!(cache.get_or_compute("日本語"), 6); // hit
1763        assert_eq!(cache.stats().hits, 1);
1764    }
1765
1766    #[test]
1767    fn empty_string() {
1768        let mut cache = TinyLfuWidthCache::new(100);
1769        assert_eq!(cache.get_or_compute(""), 0);
1770    }
1771
1772    #[test]
1773    fn minimum_capacity() {
1774        let cache = TinyLfuWidthCache::new(0);
1775        assert!(cache.capacity() >= 2);
1776    }
1777}
1778
1779// ---------------------------------------------------------------------------
1780// bd-4kq0.6.3: WidthCache Tests + Perf Gates
1781// ---------------------------------------------------------------------------
1782
1783/// Deterministic LCG for test reproducibility (no external rand dependency).
1784#[cfg(test)]
1785struct Lcg(u64);
1786
1787#[cfg(test)]
1788impl Lcg {
1789    fn new(seed: u64) -> Self {
1790        Self(seed)
1791    }
1792    fn next_u64(&mut self) -> u64 {
1793        self.0 = self
1794            .0
1795            .wrapping_mul(6_364_136_223_846_793_005)
1796            .wrapping_add(1);
1797        self.0
1798    }
1799    fn next_usize(&mut self, max: usize) -> usize {
1800        (self.next_u64() % (max as u64)) as usize
1801    }
1802}
1803
1804// ---------------------------------------------------------------------------
1805// 1. Property: cache equivalence (cached width == computed width)
1806// ---------------------------------------------------------------------------
1807
1808#[cfg(test)]
1809mod property_cache_equivalence {
1810    use super::*;
1811    use proptest::prelude::*;
1812
1813    proptest! {
1814        #[test]
1815        fn tinylfu_cached_equals_computed(s in "[a-zA-Z0-9 ]{1,80}") {
1816            let mut cache = TinyLfuWidthCache::new(200);
1817            let cached = cache.get_or_compute(&s);
1818            let direct = crate::display_width(&s);
1819            prop_assert_eq!(cached, direct,
1820                "TinyLFU returned {} but display_width says {} for {:?}", cached, direct, s);
1821        }
1822
1823        #[test]
1824        fn tinylfu_second_access_same_value(s in "[a-zA-Z0-9]{1,40}") {
1825            let mut cache = TinyLfuWidthCache::new(200);
1826            let first = cache.get_or_compute(&s);
1827            let second = cache.get_or_compute(&s);
1828            prop_assert_eq!(first, second,
1829                "First access returned {} but second returned {} for {:?}", first, second, s);
1830        }
1831
1832        #[test]
1833        fn tinylfu_never_exceeds_capacity(
1834            strings in prop::collection::vec("[a-z]{1,5}", 10..200),
1835            capacity in 10usize..50
1836        ) {
1837            let mut cache = TinyLfuWidthCache::new(capacity);
1838            for s in &strings {
1839                cache.get_or_compute(s);
1840                prop_assert!(cache.len() <= capacity,
1841                    "Cache size {} exceeded capacity {}", cache.len(), capacity);
1842            }
1843        }
1844
1845        #[test]
1846        fn tinylfu_custom_fn_matches(s in "[a-z]{1,20}") {
1847            let mut cache = TinyLfuWidthCache::new(100);
1848            let custom_fn = |text: &str| text.len(); // byte length as custom metric
1849            let cached = cache.get_or_compute_with(&s, custom_fn);
1850            prop_assert_eq!(cached, s.len(),
1851                "Custom fn: cached {} != expected {} for {:?}", cached, s.len(), s);
1852        }
1853    }
1854
1855    #[test]
1856    fn deterministic_seed_equivalence() {
1857        // Same workload with same seed → same results every time.
1858        let mut rng = super::Lcg::new(0xDEAD_BEEF);
1859
1860        let mut cache1 = TinyLfuWidthCache::new(50);
1861        let mut results1 = Vec::new();
1862        for _ in 0..500 {
1863            let idx = rng.next_usize(100);
1864            let s = format!("key_{}", idx);
1865            results1.push(cache1.get_or_compute(&s));
1866        }
1867
1868        let mut rng2 = super::Lcg::new(0xDEAD_BEEF);
1869        let mut cache2 = TinyLfuWidthCache::new(50);
1870        let mut results2 = Vec::new();
1871        for _ in 0..500 {
1872            let idx = rng2.next_usize(100);
1873            let s = format!("key_{}", idx);
1874            results2.push(cache2.get_or_compute(&s));
1875        }
1876
1877        assert_eq!(
1878            results1, results2,
1879            "Deterministic seed must produce identical results"
1880        );
1881    }
1882
1883    #[test]
1884    fn both_caches_agree_on_widths() {
1885        // WidthCache (plain LRU) and TinyLfuWidthCache must return the same
1886        // widths for any input.
1887        let mut lru = WidthCache::new(200);
1888        let mut tlfu = TinyLfuWidthCache::new(200);
1889
1890        let inputs = [
1891            "",
1892            "hello",
1893            "日本語テスト",
1894            "café résumé",
1895            "a\tb",
1896            "🎉🎊🎈",
1897            "mixed日本eng",
1898            "    spaces    ",
1899            "AAAAAAAAAAAAAAAAAAAAAAAAA",
1900            "x",
1901        ];
1902
1903        for &s in &inputs {
1904            let w1 = lru.get_or_compute(s);
1905            let w2 = tlfu.get_or_compute(s);
1906            assert_eq!(
1907                w1, w2,
1908                "Width mismatch for {:?}: LRU={}, TinyLFU={}",
1909                s, w1, w2
1910            );
1911        }
1912    }
1913}
1914
1915// ---------------------------------------------------------------------------
1916// 2. E2E cache replay: deterministic workload, log hit rate + latency (JSONL)
1917// ---------------------------------------------------------------------------
1918
1919#[cfg(test)]
1920mod e2e_cache_replay {
1921    use super::*;
1922    use std::time::Instant;
1923
1924    /// A single replay record, serialisable to JSONL.
1925    #[derive(Debug)]
1926    struct ReplayRecord {
1927        step: usize,
1928        key: String,
1929        width: usize,
1930        hit: bool,
1931        latency_ns: u128,
1932    }
1933
1934    impl ReplayRecord {
1935        fn to_jsonl(&self) -> String {
1936            format!(
1937                r#"{{"step":{},"key":"{}","width":{},"hit":{},"latency_ns":{}}}"#,
1938                self.step, self.key, self.width, self.hit, self.latency_ns,
1939            )
1940        }
1941    }
1942
1943    fn zipfian_workload(rng: &mut super::Lcg, n: usize, universe: usize) -> Vec<String> {
1944        // Approximate Zipfian: lower indices are much more frequent.
1945        (0..n)
1946            .map(|_| {
1947                let raw = rng.next_usize(universe * universe);
1948                let idx = (raw as f64).sqrt() as usize % universe;
1949                format!("item_{}", idx)
1950            })
1951            .collect()
1952    }
1953
1954    #[test]
1955    fn replay_zipfian_tinylfu() {
1956        let mut rng = super::Lcg::new(0x1234_5678);
1957        let workload = zipfian_workload(&mut rng, 2000, 200);
1958
1959        let mut cache = TinyLfuWidthCache::new(50);
1960        let mut records = Vec::with_capacity(workload.len());
1961
1962        for (i, key) in workload.iter().enumerate() {
1963            let stats_before = cache.stats();
1964            let t0 = Instant::now();
1965            let width = cache.get_or_compute(key);
1966            let elapsed = t0.elapsed().as_nanos();
1967            let stats_after = cache.stats();
1968            let hit = stats_after.hits > stats_before.hits;
1969
1970            records.push(ReplayRecord {
1971                step: i,
1972                key: key.clone(),
1973                width,
1974                hit,
1975                latency_ns: elapsed,
1976            });
1977        }
1978
1979        // Validate JSONL serialisation is parseable.
1980        for r in &records[..5] {
1981            let line = r.to_jsonl();
1982            assert!(
1983                line.starts_with('{') && line.ends_with('}'),
1984                "Bad JSONL: {}",
1985                line
1986            );
1987        }
1988
1989        // Compute aggregate stats.
1990        let total = records.len();
1991        let hits = records.iter().filter(|r| r.hit).count();
1992        let hit_rate = hits as f64 / total as f64;
1993
1994        // Zipfian workload with 50-entry cache over 200-item universe
1995        // should get a meaningful hit rate (> 10%).
1996        assert!(
1997            hit_rate > 0.10,
1998            "Zipfian hit rate too low: {:.2}% ({}/{})",
1999            hit_rate * 100.0,
2000            hits,
2001            total
2002        );
2003    }
2004
2005    #[test]
2006    fn replay_zipfian_lru_comparison() {
2007        let mut rng = super::Lcg::new(0x1234_5678);
2008        let workload = zipfian_workload(&mut rng, 2000, 200);
2009
2010        let mut tlfu = TinyLfuWidthCache::new(50);
2011        let mut lru = WidthCache::new(50);
2012
2013        for key in &workload {
2014            tlfu.get_or_compute(key);
2015            lru.get_or_compute(key);
2016        }
2017
2018        let tlfu_stats = tlfu.stats();
2019        let lru_stats = lru.stats();
2020
2021        // Both must have correct total operations.
2022        assert_eq!(tlfu_stats.hits + tlfu_stats.misses, 2000);
2023        assert_eq!(lru_stats.hits + lru_stats.misses, 2000);
2024
2025        // TinyLFU should be at least competitive with plain LRU on Zipfian.
2026        // (In practice TinyLFU often wins, but we just check both work.)
2027        assert!(
2028            tlfu_stats.hit_rate() >= lru_stats.hit_rate() * 0.8,
2029            "TinyLFU hit rate {:.2}% much worse than LRU {:.2}%",
2030            tlfu_stats.hit_rate() * 100.0,
2031            lru_stats.hit_rate() * 100.0,
2032        );
2033    }
2034
2035    #[test]
2036    fn replay_deterministic_reproduction() {
2037        // Two identical replays must produce identical hit/miss sequences.
2038        let run = |seed: u64| -> Vec<bool> {
2039            let mut rng = super::Lcg::new(seed);
2040            let workload = zipfian_workload(&mut rng, 500, 100);
2041            let mut cache = TinyLfuWidthCache::new(30);
2042            let mut hits = Vec::with_capacity(500);
2043            for key in &workload {
2044                let before = cache.stats().hits;
2045                cache.get_or_compute(key);
2046                hits.push(cache.stats().hits > before);
2047            }
2048            hits
2049        };
2050
2051        let run1 = run(0xABCD_EF01);
2052        let run2 = run(0xABCD_EF01);
2053        assert_eq!(run1, run2, "Deterministic replay diverged");
2054    }
2055
2056    #[test]
2057    fn replay_uniform_workload() {
2058        // Uniform access pattern: every key equally likely. Hit rate should be
2059        // roughly cache_size / universe_size for large N.
2060        let mut rng = super::Lcg::new(0x9999);
2061        let universe = 100;
2062        let cache_size = 25;
2063        let n = 5000;
2064
2065        let mut cache = TinyLfuWidthCache::new(cache_size);
2066
2067        // Warm up: access each key once.
2068        for i in 0..universe {
2069            cache.get_or_compute(&format!("u_{}", i));
2070        }
2071
2072        cache.reset_stats();
2073        for _ in 0..n {
2074            let idx = rng.next_usize(universe);
2075            cache.get_or_compute(&format!("u_{}", idx));
2076        }
2077
2078        let stats = cache.stats();
2079        let hit_rate = stats.hit_rate();
2080        // Theoretical: ~25/100 = 25%. Allow range 10%–60%.
2081        assert!(
2082            hit_rate > 0.10 && hit_rate < 0.60,
2083            "Uniform hit rate {:.2}% outside expected range",
2084            hit_rate * 100.0,
2085        );
2086    }
2087}
2088
2089// ---------------------------------------------------------------------------
2090// 3. Perf gates: cache operations < 1us p95
2091// ---------------------------------------------------------------------------
2092
2093#[cfg(test)]
2094mod perf_cache_overhead {
2095    use super::*;
2096    use std::time::Instant;
2097
2098    /// Collect latencies for N operations, return sorted Vec<u128> in nanoseconds.
2099    fn measure_latencies<F: FnMut(usize)>(n: usize, mut op: F) -> Vec<u128> {
2100        let mut latencies = Vec::with_capacity(n);
2101        for i in 0..n {
2102            let t0 = Instant::now();
2103            op(i);
2104            latencies.push(t0.elapsed().as_nanos());
2105        }
2106        latencies.sort_unstable();
2107        latencies
2108    }
2109
2110    fn p95(sorted: &[u128]) -> u128 {
2111        let len = sorted.len();
2112        let idx = ((len as f64 * 0.95) as usize).min(len.saturating_sub(1));
2113        sorted[idx]
2114    }
2115
2116    fn p99(sorted: &[u128]) -> u128 {
2117        let len = sorted.len();
2118        let idx = ((len as f64 * 0.99) as usize).min(len.saturating_sub(1));
2119        sorted[idx]
2120    }
2121
2122    fn median(sorted: &[u128]) -> u128 {
2123        sorted[sorted.len() / 2]
2124    }
2125
2126    #[allow(unexpected_cfgs)]
2127    fn perf_budget_ns(base_ns: u128) -> u128 {
2128        if cfg!(coverage) || cfg!(coverage_nightly) {
2129            base_ns.saturating_mul(10)
2130        } else {
2131            base_ns
2132        }
2133    }
2134
2135    #[test]
2136    fn perf_lru_hit_latency() {
2137        let mut cache = WidthCache::new(1000);
2138        // Warm up.
2139        for i in 0..100 {
2140            cache.get_or_compute(&format!("warm_{}", i));
2141        }
2142
2143        let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2144        let latencies = measure_latencies(10_000, |i| {
2145            let _ = cache.get_or_compute(&keys[i % 100]);
2146        });
2147
2148        let p95_ns = p95(&latencies);
2149        // Budget: < 1us (1000ns) p95 for cache hits.
2150        // Use generous 5us to account for CI variability (10x under coverage).
2151        let budget_ns = perf_budget_ns(5_000);
2152        assert!(
2153            p95_ns < budget_ns,
2154            "LRU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2155            p95_ns,
2156            budget_ns,
2157            median(&latencies),
2158            p99(&latencies),
2159        );
2160    }
2161
2162    #[test]
2163    fn perf_tinylfu_hit_latency() {
2164        let mut cache = TinyLfuWidthCache::new(1000);
2165        // Warm up: access each key multiple times to ensure promotion to main.
2166        for _ in 0..5 {
2167            for i in 0..100 {
2168                cache.get_or_compute(&format!("warm_{}", i));
2169            }
2170        }
2171
2172        let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2173        let latencies = measure_latencies(10_000, |i| {
2174            let _ = cache.get_or_compute(&keys[i % 100]);
2175        });
2176
2177        let p95_ns = p95(&latencies);
2178        // Budget: < 1us p95 for hits (5us CI-safe threshold).
2179        let budget_ns = perf_budget_ns(5_000);
2180        assert!(
2181            p95_ns < budget_ns,
2182            "TinyLFU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2183            p95_ns,
2184            budget_ns,
2185            median(&latencies),
2186            p99(&latencies),
2187        );
2188    }
2189
2190    #[test]
2191    fn perf_tinylfu_miss_latency() {
2192        let mut cache = TinyLfuWidthCache::new(100);
2193        let keys: Vec<String> = (0..10_000).map(|i| format!("miss_{}", i)).collect();
2194
2195        let latencies = measure_latencies(10_000, |i| {
2196            let _ = cache.get_or_compute(&keys[i]);
2197        });
2198
2199        let p95_ns = p95(&latencies);
2200        // Misses include unicode width computation. Budget: < 5us p95.
2201        // Use 20us CI-safe threshold (10x under coverage; computation dominates).
2202        let budget_ns = perf_budget_ns(20_000);
2203        assert!(
2204            p95_ns < budget_ns,
2205            "TinyLFU miss p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2206            p95_ns,
2207            budget_ns,
2208            median(&latencies),
2209            p99(&latencies),
2210        );
2211    }
2212
2213    #[test]
2214    fn perf_cms_increment_latency() {
2215        let mut cms = CountMinSketch::with_defaults();
2216        let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("k{}", i))).collect();
2217
2218        let latencies = measure_latencies(10_000, |i| {
2219            cms.increment(hashes[i]);
2220        });
2221
2222        let p95_ns = p95(&latencies);
2223        // CMS increment should be very fast: < 500ns p95.
2224        let budget_ns = perf_budget_ns(2_000);
2225        assert!(
2226            p95_ns < budget_ns,
2227            "CMS increment p95 = {}ns exceeds {}ns budget (median={}ns)",
2228            p95_ns,
2229            budget_ns,
2230            median(&latencies),
2231        );
2232    }
2233
2234    #[test]
2235    fn perf_doorkeeper_latency() {
2236        let mut dk = Doorkeeper::with_defaults();
2237        let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("d{}", i))).collect();
2238
2239        let latencies = measure_latencies(10_000, |i| {
2240            let _ = dk.check_and_set(hashes[i]);
2241        });
2242
2243        let p95_ns = p95(&latencies);
2244        // Doorkeeper bit ops: < 200ns p95.
2245        let budget_ns = perf_budget_ns(1_000);
2246        assert!(
2247            p95_ns < budget_ns,
2248            "Doorkeeper p95 = {}ns exceeds {}ns budget (median={}ns)",
2249            p95_ns,
2250            budget_ns,
2251            median(&latencies),
2252        );
2253    }
2254
2255    #[test]
2256    fn perf_fingerprint_hash_latency() {
2257        let keys: Vec<String> = (0..10_000).map(|i| format!("fp_{}", i)).collect();
2258
2259        let latencies = measure_latencies(10_000, |i| {
2260            let _ = fingerprint_hash(&keys[i]);
2261        });
2262
2263        let p95_ns = p95(&latencies);
2264        // FNV hash: < 200ns p95.
2265        let budget_ns = perf_budget_ns(1_000);
2266        assert!(
2267            p95_ns < budget_ns,
2268            "fingerprint_hash p95 = {}ns exceeds {}ns budget (median={}ns)",
2269            p95_ns,
2270            budget_ns,
2271            median(&latencies),
2272        );
2273    }
2274}