Skip to main content

ftui_text/
width_cache.rs

1#![forbid(unsafe_code)]
2
3//! LRU width cache for efficient text measurement.
4//!
5//! Text width calculation is a hot path (called every render frame).
6//! This cache stores computed widths to avoid redundant Unicode width
7//! calculations for repeated strings.
8//!
9//! # Example
10//! ```
11//! use ftui_text::WidthCache;
12//!
13//! let mut cache = WidthCache::new(1000);
14//!
15//! // First call computes width
16//! let width = cache.get_or_compute("Hello, world!");
17//! assert_eq!(width, 13);
18//!
19//! // Second call hits cache
20//! let width2 = cache.get_or_compute("Hello, world!");
21//! assert_eq!(width2, 13);
22//!
23//! // Check stats
24//! let stats = cache.stats();
25//! assert_eq!(stats.hits, 1);
26//! assert_eq!(stats.misses, 1);
27//! ```
28
29use lru::LruCache;
30use rustc_hash::FxHasher;
31use std::hash::{Hash, Hasher};
32use std::num::NonZeroUsize;
33
34/// Default cache capacity.
35pub const DEFAULT_CACHE_CAPACITY: usize = 4096;
36
37/// Statistics about cache performance.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub struct CacheStats {
40    /// Number of cache hits.
41    pub hits: u64,
42    /// Number of cache misses.
43    pub misses: u64,
44    /// Current number of entries.
45    pub size: usize,
46    /// Maximum capacity.
47    pub capacity: usize,
48}
49
50impl CacheStats {
51    /// Calculate hit rate (0.0 to 1.0).
52    #[must_use]
53    pub fn hit_rate(&self) -> f64 {
54        let total = self.hits + self.misses;
55        if total == 0 {
56            0.0
57        } else {
58            self.hits as f64 / total as f64
59        }
60    }
61}
62
63/// LRU cache for text width measurements.
64///
65/// This cache stores the computed display width (in terminal cells) for
66/// text strings, using an LRU eviction policy when capacity is reached.
67///
68/// # Performance
69/// - Uses FxHash for fast hashing
70/// - O(1) lookup and insertion
71/// - Automatic LRU eviction
72/// - Keys are stored as 64-bit hashes (not full strings) to minimize memory
73///
74/// # Hash Collisions
75/// The cache uses a 64-bit hash as the lookup key rather than storing the
76/// full string. This trades theoretical correctness for memory efficiency.
77/// With FxHash, collision probability is ~1 in 2^64, making this safe for
78/// practical use. If you require guaranteed correctness, use `contains()`
79/// to verify presence before trusting cached values.
80///
81/// # Thread Safety
82/// `WidthCache` is not thread-safe. For concurrent use, wrap in a mutex
83/// or use thread-local caches.
84#[derive(Debug)]
85pub struct WidthCache {
86    cache: LruCache<u64, usize>,
87    hits: u64,
88    misses: u64,
89}
90
91impl WidthCache {
92    /// Create a new cache with the specified capacity.
93    ///
94    /// If capacity is zero, defaults to 1.
95    #[must_use]
96    pub fn new(capacity: usize) -> Self {
97        let capacity = NonZeroUsize::new(capacity.max(1)).expect("capacity must be > 0");
98        Self {
99            cache: LruCache::new(capacity),
100            hits: 0,
101            misses: 0,
102        }
103    }
104
105    /// Create a new cache with the default capacity (4096 entries).
106    #[must_use]
107    pub fn with_default_capacity() -> Self {
108        Self::new(DEFAULT_CACHE_CAPACITY)
109    }
110
111    /// Get cached width or compute and cache it.
112    ///
113    /// If the text is in the cache, returns the cached width.
114    /// Otherwise, computes the width using `display_width` and caches it.
115    #[inline]
116    pub fn get_or_compute(&mut self, text: &str) -> usize {
117        self.get_or_compute_with(text, crate::display_width)
118    }
119
120    /// Get cached width or compute using a custom function.
121    ///
122    /// This allows using custom width calculation functions for testing
123    /// or specialized terminal behavior.
124    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
125    where
126        F: FnOnce(&str) -> usize,
127    {
128        let hash = hash_text(text);
129
130        if let Some(&width) = self.cache.get(&hash) {
131            self.hits += 1;
132            return width;
133        }
134
135        self.misses += 1;
136        let width = compute(text);
137        self.cache.put(hash, width);
138        width
139    }
140
141    /// Check if a text string is in the cache.
142    #[must_use]
143    pub fn contains(&self, text: &str) -> bool {
144        let hash = hash_text(text);
145        self.cache.contains(&hash)
146    }
147
148    /// Get the cached width for a text string without computing.
149    ///
150    /// Returns `None` if the text is not in the cache.
151    /// Note: This does update the LRU order.
152    #[must_use]
153    pub fn get(&mut self, text: &str) -> Option<usize> {
154        let hash = hash_text(text);
155        self.cache.get(&hash).copied()
156    }
157
158    /// Peek at the cached width without updating LRU order.
159    #[must_use]
160    pub fn peek(&self, text: &str) -> Option<usize> {
161        let hash = hash_text(text);
162        self.cache.peek(&hash).copied()
163    }
164
165    /// Pre-populate the cache with a text string.
166    ///
167    /// This is useful for warming up the cache with known strings.
168    pub fn preload(&mut self, text: &str) {
169        let hash = hash_text(text);
170        if !self.cache.contains(&hash) {
171            let width = crate::display_width(text);
172            self.cache.put(hash, width);
173        }
174    }
175
176    /// Pre-populate the cache with multiple strings.
177    pub fn preload_many<'a>(&mut self, texts: impl IntoIterator<Item = &'a str>) {
178        for text in texts {
179            self.preload(text);
180        }
181    }
182
183    /// Clear the cache.
184    pub fn clear(&mut self) {
185        self.cache.clear();
186    }
187
188    /// Reset statistics.
189    pub fn reset_stats(&mut self) {
190        self.hits = 0;
191        self.misses = 0;
192    }
193
194    /// Get cache statistics.
195    #[inline]
196    #[must_use]
197    pub fn stats(&self) -> CacheStats {
198        CacheStats {
199            hits: self.hits,
200            misses: self.misses,
201            size: self.cache.len(),
202            capacity: self.cache.cap().get(),
203        }
204    }
205
206    /// Get the current number of cached entries.
207    #[inline]
208    #[must_use]
209    pub fn len(&self) -> usize {
210        self.cache.len()
211    }
212
213    /// Check if the cache is empty.
214    #[inline]
215    #[must_use]
216    pub fn is_empty(&self) -> bool {
217        self.cache.is_empty()
218    }
219
220    /// Get the cache capacity.
221    #[inline]
222    #[must_use]
223    pub fn capacity(&self) -> usize {
224        self.cache.cap().get()
225    }
226
227    /// Resize the cache capacity.
228    ///
229    /// If the new capacity is smaller than the current size,
230    /// entries will be evicted (LRU order).
231    pub fn resize(&mut self, new_capacity: usize) {
232        let new_capacity = NonZeroUsize::new(new_capacity.max(1)).expect("capacity must be > 0");
233        self.cache.resize(new_capacity);
234    }
235}
236
237impl Default for WidthCache {
238    fn default() -> Self {
239        Self::with_default_capacity()
240    }
241}
242
243/// Hash a text string using FxHash for fast hashing.
244#[inline]
245fn hash_text(text: &str) -> u64 {
246    let mut hasher = FxHasher::default();
247    text.hash(&mut hasher);
248    hasher.finish()
249}
250
251// Thread-local width cache for convenience.
252//
253// This provides a global cache that is thread-local, avoiding the need
254// to pass a cache around explicitly.
255#[cfg(feature = "thread_local_cache")]
256thread_local! {
257    static THREAD_CACHE: std::cell::RefCell<WidthCache> =
258        std::cell::RefCell::new(WidthCache::with_default_capacity());
259}
260
261/// Get or compute width using the thread-local cache.
262#[cfg(feature = "thread_local_cache")]
263pub fn cached_width(text: &str) -> usize {
264    THREAD_CACHE.with(|cache| cache.borrow_mut().get_or_compute(text))
265}
266
267/// Clear the thread-local cache.
268#[cfg(feature = "thread_local_cache")]
269pub fn clear_thread_cache() {
270    THREAD_CACHE.with(|cache| cache.borrow_mut().clear());
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn new_cache_is_empty() {
279        let cache = WidthCache::new(100);
280        assert!(cache.is_empty());
281        assert_eq!(cache.len(), 0);
282        assert_eq!(cache.capacity(), 100);
283    }
284
285    #[test]
286    fn default_capacity() {
287        let cache = WidthCache::with_default_capacity();
288        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
289    }
290
291    #[test]
292    fn get_or_compute_caches_value() {
293        let mut cache = WidthCache::new(100);
294
295        let width1 = cache.get_or_compute("hello");
296        assert_eq!(width1, 5);
297        assert_eq!(cache.len(), 1);
298
299        let width2 = cache.get_or_compute("hello");
300        assert_eq!(width2, 5);
301        assert_eq!(cache.len(), 1); // Same entry
302
303        let stats = cache.stats();
304        assert_eq!(stats.hits, 1);
305        assert_eq!(stats.misses, 1);
306    }
307
308    #[test]
309    fn get_or_compute_different_strings() {
310        let mut cache = WidthCache::new(100);
311
312        cache.get_or_compute("hello");
313        cache.get_or_compute("world");
314        cache.get_or_compute("foo");
315
316        assert_eq!(cache.len(), 3);
317        let stats = cache.stats();
318        assert_eq!(stats.misses, 3);
319        assert_eq!(stats.hits, 0);
320    }
321
322    #[test]
323    fn get_or_compute_cjk() {
324        let mut cache = WidthCache::new(100);
325
326        let width = cache.get_or_compute("你好");
327        assert_eq!(width, 4); // 2 chars * 2 cells each
328    }
329
330    #[test]
331    fn contains() {
332        let mut cache = WidthCache::new(100);
333
334        assert!(!cache.contains("hello"));
335        cache.get_or_compute("hello");
336        assert!(cache.contains("hello"));
337    }
338
339    #[test]
340    fn get_returns_none_for_missing() {
341        let mut cache = WidthCache::new(100);
342        assert!(cache.get("missing").is_none());
343    }
344
345    #[test]
346    fn get_returns_cached_value() {
347        let mut cache = WidthCache::new(100);
348        cache.get_or_compute("hello");
349
350        let width = cache.get("hello");
351        assert_eq!(width, Some(5));
352    }
353
354    #[test]
355    fn peek_does_not_update_lru() {
356        let mut cache = WidthCache::new(2);
357
358        cache.get_or_compute("a");
359        cache.get_or_compute("b");
360
361        // Peek at "a" - should not update LRU order
362        let _ = cache.peek("a");
363
364        // Add "c" - should evict "a" (oldest)
365        cache.get_or_compute("c");
366
367        assert!(!cache.contains("a"));
368        assert!(cache.contains("b"));
369        assert!(cache.contains("c"));
370    }
371
372    #[test]
373    fn lru_eviction() {
374        let mut cache = WidthCache::new(2);
375
376        cache.get_or_compute("a");
377        cache.get_or_compute("b");
378        cache.get_or_compute("c"); // Should evict "a"
379
380        assert!(!cache.contains("a"));
381        assert!(cache.contains("b"));
382        assert!(cache.contains("c"));
383    }
384
385    #[test]
386    fn lru_refresh_on_access() {
387        let mut cache = WidthCache::new(2);
388
389        cache.get_or_compute("a");
390        cache.get_or_compute("b");
391        cache.get_or_compute("a"); // Refresh "a" to most recent
392        cache.get_or_compute("c"); // Should evict "b"
393
394        assert!(cache.contains("a"));
395        assert!(!cache.contains("b"));
396        assert!(cache.contains("c"));
397    }
398
399    #[test]
400    fn preload() {
401        let mut cache = WidthCache::new(100);
402
403        cache.preload("hello");
404        assert!(cache.contains("hello"));
405        assert_eq!(cache.peek("hello"), Some(5));
406
407        let stats = cache.stats();
408        assert_eq!(stats.misses, 0); // Preload doesn't count as miss
409        assert_eq!(stats.hits, 0);
410    }
411
412    #[test]
413    fn preload_many() {
414        let mut cache = WidthCache::new(100);
415
416        cache.preload_many(["hello", "world", "foo"]);
417        assert_eq!(cache.len(), 3);
418    }
419
420    #[test]
421    fn clear() {
422        let mut cache = WidthCache::new(100);
423        cache.get_or_compute("hello");
424        cache.get_or_compute("world");
425
426        cache.clear();
427        assert!(cache.is_empty());
428        assert!(!cache.contains("hello"));
429    }
430
431    #[test]
432    fn reset_stats() {
433        let mut cache = WidthCache::new(100);
434        cache.get_or_compute("hello");
435        cache.get_or_compute("hello");
436
437        let stats = cache.stats();
438        assert_eq!(stats.hits, 1);
439        assert_eq!(stats.misses, 1);
440
441        cache.reset_stats();
442        let stats = cache.stats();
443        assert_eq!(stats.hits, 0);
444        assert_eq!(stats.misses, 0);
445    }
446
447    #[test]
448    fn hit_rate() {
449        let stats = CacheStats {
450            hits: 75,
451            misses: 25,
452            size: 100,
453            capacity: 1000,
454        };
455        assert!((stats.hit_rate() - 0.75).abs() < 0.001);
456    }
457
458    #[test]
459    fn hit_rate_no_requests() {
460        let stats = CacheStats::default();
461        assert_eq!(stats.hit_rate(), 0.0);
462    }
463
464    #[test]
465    fn resize_smaller() {
466        let mut cache = WidthCache::new(100);
467        for i in 0..50 {
468            cache.get_or_compute(&format!("text{i}"));
469        }
470        assert_eq!(cache.len(), 50);
471
472        cache.resize(10);
473        assert!(cache.len() <= 10);
474        assert_eq!(cache.capacity(), 10);
475    }
476
477    #[test]
478    fn resize_larger() {
479        let mut cache = WidthCache::new(10);
480        cache.resize(100);
481        assert_eq!(cache.capacity(), 100);
482    }
483
484    #[test]
485    fn custom_compute_function() {
486        let mut cache = WidthCache::new(100);
487
488        // Use a custom width function (always returns 42)
489        let width = cache.get_or_compute_with("hello", |_| 42);
490        assert_eq!(width, 42);
491
492        // Cached value is 42
493        assert_eq!(cache.peek("hello"), Some(42));
494    }
495
496    #[test]
497    fn empty_string() {
498        let mut cache = WidthCache::new(100);
499        let width = cache.get_or_compute("");
500        assert_eq!(width, 0);
501    }
502
503    #[test]
504    fn hash_collision_handling() {
505        // Even with hash collisions, the LRU should handle them
506        // (this is just a stress test with many entries)
507        let mut cache = WidthCache::new(1000);
508
509        for i in 0..500 {
510            cache.get_or_compute(&format!("string{i}"));
511        }
512
513        assert_eq!(cache.len(), 500);
514    }
515
516    #[test]
517    fn unicode_strings() {
518        let mut cache = WidthCache::new(100);
519
520        // Various Unicode strings
521        assert_eq!(cache.get_or_compute("café"), 4);
522        assert_eq!(cache.get_or_compute("日本語"), 6);
523        assert_eq!(cache.get_or_compute("🎉"), 2); // Emoji typically 2 cells
524
525        assert_eq!(cache.len(), 3);
526    }
527
528    #[test]
529    fn combining_characters() {
530        let mut cache = WidthCache::new(100);
531
532        // e + combining acute accent
533        let width = cache.get_or_compute("e\u{0301}");
534        // Should be 1 cell (the combining char doesn't add width)
535        assert_eq!(width, 1);
536    }
537
538    // ==========================================================================
539    // Additional coverage tests
540    // ==========================================================================
541
542    #[test]
543    fn default_cache() {
544        let cache = WidthCache::default();
545        assert!(cache.is_empty());
546        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
547    }
548
549    #[test]
550    fn cache_stats_debug() {
551        let stats = CacheStats {
552            hits: 10,
553            misses: 5,
554            size: 15,
555            capacity: 100,
556        };
557        let debug = format!("{:?}", stats);
558        assert!(debug.contains("CacheStats"));
559        assert!(debug.contains("10")); // hits
560    }
561
562    #[test]
563    fn cache_stats_default() {
564        let stats = CacheStats::default();
565        assert_eq!(stats.hits, 0);
566        assert_eq!(stats.misses, 0);
567        assert_eq!(stats.size, 0);
568        assert_eq!(stats.capacity, 0);
569    }
570
571    #[test]
572    fn cache_stats_equality() {
573        let stats1 = CacheStats {
574            hits: 10,
575            misses: 5,
576            size: 15,
577            capacity: 100,
578        };
579        let stats2 = stats1; // Copy
580        assert_eq!(stats1, stats2);
581    }
582
583    #[test]
584    fn clear_after_preload() {
585        let mut cache = WidthCache::new(100);
586        cache.preload_many(["hello", "world", "test"]);
587        assert_eq!(cache.len(), 3);
588
589        cache.clear();
590        assert!(cache.is_empty());
591        assert!(!cache.contains("hello"));
592    }
593
594    #[test]
595    fn preload_existing_is_noop() {
596        let mut cache = WidthCache::new(100);
597        cache.get_or_compute("hello"); // First access
598        let len_before = cache.len();
599
600        cache.preload("hello"); // Already exists
601        assert_eq!(cache.len(), len_before);
602    }
603
604    #[test]
605    fn minimum_capacity_is_one() {
606        let cache = WidthCache::new(0);
607        assert_eq!(cache.capacity(), 1);
608    }
609
610    #[test]
611    fn width_cache_debug() {
612        let cache = WidthCache::new(10);
613        let debug = format!("{:?}", cache);
614        assert!(debug.contains("WidthCache"));
615    }
616
617    #[test]
618    fn emoji_zwj_sequence() {
619        let mut cache = WidthCache::new(100);
620        // Family emoji (ZWJ sequence)
621        let width = cache.get_or_compute("👨‍👩‍👧");
622        // Width varies by implementation, just ensure it doesn't panic
623        assert!(width >= 1);
624    }
625
626    #[test]
627    fn emoji_with_skin_tone() {
628        let mut cache = WidthCache::new(100);
629        let width = cache.get_or_compute("👍🏻");
630        assert!(width >= 1);
631    }
632
633    #[test]
634    fn flag_emoji() {
635        let mut cache = WidthCache::new(100);
636        // US flag emoji (regional indicators)
637        let width = cache.get_or_compute("🇺🇸");
638        assert!(width >= 1);
639    }
640
641    #[test]
642    fn mixed_width_strings() {
643        let mut cache = WidthCache::new(100);
644        // Mixed ASCII and CJK
645        let width = cache.get_or_compute("Hello你好World");
646        assert_eq!(width, 14); // 10 ASCII + 4 CJK
647    }
648
649    #[test]
650    fn stats_size_reflects_cache_len() {
651        let mut cache = WidthCache::new(100);
652        cache.get_or_compute("a");
653        cache.get_or_compute("b");
654        cache.get_or_compute("c");
655
656        let stats = cache.stats();
657        assert_eq!(stats.size, cache.len());
658        assert_eq!(stats.size, 3);
659    }
660
661    #[test]
662    fn stats_capacity_matches() {
663        let cache = WidthCache::new(42);
664        let stats = cache.stats();
665        assert_eq!(stats.capacity, 42);
666    }
667
668    #[test]
669    fn resize_to_zero_becomes_one() {
670        let mut cache = WidthCache::new(100);
671        cache.resize(0);
672        assert_eq!(cache.capacity(), 1);
673    }
674
675    #[test]
676    fn get_updates_lru_order() {
677        let mut cache = WidthCache::new(2);
678
679        cache.get_or_compute("a");
680        cache.get_or_compute("b");
681
682        // Access "a" via get() - should update LRU order
683        let _ = cache.get("a");
684
685        // Add "c" - should evict "b" (now oldest)
686        cache.get_or_compute("c");
687
688        assert!(cache.contains("a"));
689        assert!(!cache.contains("b"));
690        assert!(cache.contains("c"));
691    }
692
693    #[test]
694    fn contains_does_not_modify_stats() {
695        let mut cache = WidthCache::new(100);
696        cache.get_or_compute("hello");
697
698        let stats_before = cache.stats();
699        let _ = cache.contains("hello");
700        let _ = cache.contains("missing");
701        let stats_after = cache.stats();
702
703        assert_eq!(stats_before.hits, stats_after.hits);
704        assert_eq!(stats_before.misses, stats_after.misses);
705    }
706
707    #[test]
708    fn peek_returns_none_for_missing() {
709        let cache = WidthCache::new(100);
710        assert!(cache.peek("missing").is_none());
711    }
712
713    #[test]
714    fn custom_compute_called_once() {
715        let mut cache = WidthCache::new(100);
716        let mut call_count = 0;
717
718        cache.get_or_compute_with("test", |_| {
719            call_count += 1;
720            10
721        });
722
723        cache.get_or_compute_with("test", |_| {
724            call_count += 1;
725            20 // This shouldn't be called
726        });
727
728        assert_eq!(call_count, 1);
729        assert_eq!(cache.peek("test"), Some(10));
730    }
731
732    #[test]
733    fn whitespace_strings() {
734        let mut cache = WidthCache::new(100);
735        assert_eq!(cache.get_or_compute("   "), 3); // 3 spaces
736        assert_eq!(cache.get_or_compute("\t"), 1); // Tab is 1 cell typically
737        assert_eq!(cache.get_or_compute("\n"), 1); // Newline
738    }
739}
740
741// ---------------------------------------------------------------------------
742// W-TinyLFU Admission Components (bd-4kq0.6.1)
743// ---------------------------------------------------------------------------
744//
745// # Design
746//
747// W-TinyLFU augments LRU eviction with a frequency-based admission filter:
748//
749// 1. **Count-Min Sketch (CMS)**: Approximate frequency counter.
750//    - Parameters: width `w`, depth `d`.
751//    - Error bound: estimated count <= true count + epsilon * N
752//      with probability >= 1 - delta, where:
753//        epsilon = e / w  (e = Euler's number ≈ 2.718)
754//        delta   = (1/2)^d
755//    - Chosen defaults: w=1024 (epsilon ≈ 0.0027), d=4 (delta ≈ 0.0625).
756//    - Counter width: 4 bits (saturating at 15). Periodic halving (aging)
757//      every `reset_interval` increments to prevent staleness.
758//
759// 2. **Doorkeeper**: 1-bit Bloom filter (single hash, `doorkeeper_bits` entries).
760//    - Filters one-hit wonders before they reach the CMS.
761//    - On first access: set doorkeeper bit. On second access in the same
762//      epoch: increment CMS. Cleared on CMS reset.
763//    - Default: 2048 bits (256 bytes).
764//
765// 3. **Admission rule**: When evicting, compare frequencies:
766//    - `freq(candidate) > freq(victim)` → admit candidate, evict victim.
767//    - `freq(candidate) <= freq(victim)` → reject candidate, keep victim.
768//
769// 4. **Fingerprint guard**: The CMS stores 64-bit hashes. Since the main
770//    cache also keys by 64-bit hash, a collision means two distinct strings
771//    share the same key. The fingerprint guard adds a secondary hash
772//    (different seed) stored alongside the value. On lookup, if the
773//    secondary hash mismatches, the entry is treated as a miss and evicted.
774//
775// # Failure Modes
776// - CMS overcounting: bounded by epsilon * N; aging limits staleness.
777// - Doorkeeper false positives: one-hit items may leak to CMS. Bounded
778//   by Bloom FP rate ≈ (1 - e^{-k*n/m})^k with k=1.
779// - Fingerprint collision (secondary hash): probability ~2^{-64}; negligible.
780// - Reset storm: halving all counters is O(w*d). With w=1024, d=4 this is
781//   4096 operations — negligible vs. rendering cost.
782
783/// Count-Min Sketch for approximate frequency estimation.
784///
785/// Uses `depth` independent hash functions (derived from a single hash via
786/// mixing) and `width` counters per row. Each counter is a `u8` saturating
787/// at [`CountMinSketch::MAX_COUNT`] (15 by default, representing 4-bit counters).
788///
789/// # Error Bounds
790///
791/// For a sketch with width `w` and depth `d`, after `N` total increments:
792/// - `estimate(x) <= true_count(x) + epsilon * N` with probability `>= 1 - delta`
793/// - where `epsilon = e / w` and `delta = (1/2)^d`
794#[derive(Debug, Clone)]
795pub struct CountMinSketch {
796    /// Counter matrix: `depth` rows of `width` counters each.
797    counters: Vec<Vec<u8>>,
798    /// Number of hash functions (rows).
799    depth: usize,
800    /// Number of counters per row.
801    width: usize,
802    /// Total number of increments since last reset.
803    total_increments: u64,
804    /// Increment count at which to halve all counters.
805    reset_interval: u64,
806}
807
808/// Maximum counter value (4-bit saturation).
809const CMS_MAX_COUNT: u8 = 15;
810
811/// Default CMS width. epsilon = e/1024 ≈ 0.0027.
812const CMS_DEFAULT_WIDTH: usize = 1024;
813
814/// Default CMS depth. delta = (1/2)^4 = 0.0625.
815const CMS_DEFAULT_DEPTH: usize = 4;
816
817/// Default reset interval (halve counters after this many increments).
818const CMS_DEFAULT_RESET_INTERVAL: u64 = 8192;
819
820impl CountMinSketch {
821    /// Create a new Count-Min Sketch with the given dimensions.
822    pub fn new(width: usize, depth: usize, reset_interval: u64) -> Self {
823        let width = width.max(1);
824        let depth = depth.max(1);
825        Self {
826            counters: vec![vec![0u8; width]; depth],
827            depth,
828            width,
829            total_increments: 0,
830            reset_interval: reset_interval.max(1),
831        }
832    }
833
834    /// Create a sketch with default parameters (w=1024, d=4, reset=8192).
835    pub fn with_defaults() -> Self {
836        Self::new(
837            CMS_DEFAULT_WIDTH,
838            CMS_DEFAULT_DEPTH,
839            CMS_DEFAULT_RESET_INTERVAL,
840        )
841    }
842
843    /// Increment the count for a key.
844    pub fn increment(&mut self, hash: u64) {
845        for row in 0..self.depth {
846            let idx = self.index(hash, row);
847            self.counters[row][idx] = self.counters[row][idx].saturating_add(1).min(CMS_MAX_COUNT);
848        }
849        self.total_increments += 1;
850
851        if self.total_increments >= self.reset_interval {
852            self.halve();
853        }
854    }
855
856    /// Estimate the frequency of a key (minimum across all rows).
857    pub fn estimate(&self, hash: u64) -> u8 {
858        let mut min = u8::MAX;
859        for row in 0..self.depth {
860            let idx = self.index(hash, row);
861            min = min.min(self.counters[row][idx]);
862        }
863        min
864    }
865
866    /// Total number of increments since creation or last reset.
867    pub fn total_increments(&self) -> u64 {
868        self.total_increments
869    }
870
871    /// Halve all counters (aging). Resets the increment counter.
872    fn halve(&mut self) {
873        for row in &mut self.counters {
874            for c in row.iter_mut() {
875                *c /= 2;
876            }
877        }
878        self.total_increments = 0;
879    }
880
881    /// Clear all counters to zero.
882    pub fn clear(&mut self) {
883        for row in &mut self.counters {
884            row.fill(0);
885        }
886        self.total_increments = 0;
887    }
888
889    /// Compute the column index for a given hash and row.
890    #[inline]
891    fn index(&self, hash: u64, row: usize) -> usize {
892        // Mix the hash with the row index for independent hash functions.
893        let mixed = hash
894            .wrapping_mul(0x517c_c1b7_2722_0a95)
895            .wrapping_add(row as u64);
896        let mixed = mixed ^ (mixed >> 32);
897        (mixed as usize) % self.width
898    }
899}
900
901/// 1-bit Bloom filter used as a doorkeeper to filter one-hit wonders.
902///
903/// On first access within an epoch, the doorkeeper sets a bit. Only on
904/// the second access does the item get promoted to the Count-Min Sketch.
905/// The doorkeeper is cleared whenever the CMS resets (halves).
906#[derive(Debug, Clone)]
907pub struct Doorkeeper {
908    bits: Vec<u64>,
909    num_bits: usize,
910}
911
912/// Default doorkeeper size in bits.
913const DOORKEEPER_DEFAULT_BITS: usize = 2048;
914
915impl Doorkeeper {
916    /// Create a new doorkeeper with the specified number of bits.
917    pub fn new(num_bits: usize) -> Self {
918        let num_bits = num_bits.max(64);
919        let num_words = num_bits.div_ceil(64);
920        Self {
921            bits: vec![0u64; num_words],
922            num_bits,
923        }
924    }
925
926    /// Create a doorkeeper with the default size (2048 bits).
927    pub fn with_defaults() -> Self {
928        Self::new(DOORKEEPER_DEFAULT_BITS)
929    }
930
931    /// Check if a key has been seen. Returns true if the bit was already set.
932    pub fn check_and_set(&mut self, hash: u64) -> bool {
933        let idx = (hash as usize) % self.num_bits;
934        let word = idx / 64;
935        let bit = idx % 64;
936        let was_set = (self.bits[word] >> bit) & 1 == 1;
937        self.bits[word] |= 1 << bit;
938        was_set
939    }
940
941    /// Check if a key has been seen without setting.
942    pub fn contains(&self, hash: u64) -> bool {
943        let idx = (hash as usize) % self.num_bits;
944        let word = idx / 64;
945        let bit = idx % 64;
946        (self.bits[word] >> bit) & 1 == 1
947    }
948
949    /// Clear all bits.
950    pub fn clear(&mut self) {
951        self.bits.fill(0);
952    }
953}
954
955/// Compute a secondary fingerprint hash for collision guard.
956///
957/// Uses a different multiplicative constant than FxHash to produce
958/// an independent 64-bit fingerprint.
959#[inline]
960pub fn fingerprint_hash(text: &str) -> u64 {
961    // Simple but effective: fold bytes with a different constant than FxHash.
962    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
963    for &b in text.as_bytes() {
964        h ^= b as u64;
965        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
966    }
967    h
968}
969
970/// Evaluate the TinyLFU admission rule.
971///
972/// Returns `true` if the candidate should be admitted (replacing the victim).
973///
974/// # Rule
975/// Admit if `freq(candidate) > freq(victim)`. On tie, reject (keep victim).
976#[inline]
977pub fn tinylfu_admit(candidate_freq: u8, victim_freq: u8) -> bool {
978    candidate_freq > victim_freq
979}
980
981// ---------------------------------------------------------------------------
982// W-TinyLFU Width Cache (bd-4kq0.6.2)
983// ---------------------------------------------------------------------------
984
985/// Entry in the TinyLFU cache, storing value and fingerprint for collision guard.
986#[derive(Debug, Clone)]
987struct TinyLfuEntry {
988    width: usize,
989    fingerprint: u64,
990}
991
992/// Width cache using W-TinyLFU admission policy.
993///
994/// Architecture:
995/// - **Window cache** (small LRU, ~1% of capacity): captures recent items.
996/// - **Main cache** (larger LRU, ~99% of capacity): for frequently accessed items.
997/// - **Count-Min Sketch + Doorkeeper**: frequency estimation for admission decisions.
998/// - **Fingerprint guard**: secondary hash per entry to detect hash collisions.
999///
1000/// On every access:
1001/// 1. Check main cache → hit? Return value (verify fingerprint).
1002/// 2. Check window cache → hit? Return value (verify fingerprint).
1003/// 3. Miss: compute width, insert into window cache.
1004///
1005/// On window cache eviction:
1006/// 1. The evicted item becomes a candidate.
1007/// 2. The LRU victim of the main cache is identified.
1008/// 3. If `freq(candidate) > freq(victim)`, candidate enters main cache
1009///    (victim is evicted). Otherwise, candidate is discarded.
1010///
1011/// Frequency tracking uses Doorkeeper → CMS pipeline:
1012/// - First access: doorkeeper records.
1013/// - Second+ access: CMS is incremented.
1014#[derive(Debug)]
1015pub struct TinyLfuWidthCache {
1016    /// Small window cache (recency).
1017    window: LruCache<u64, TinyLfuEntry>,
1018    /// Large main cache (frequency-filtered).
1019    main: LruCache<u64, TinyLfuEntry>,
1020    /// Approximate frequency counter.
1021    sketch: CountMinSketch,
1022    /// One-hit-wonder filter.
1023    doorkeeper: Doorkeeper,
1024    /// Total capacity (window + main).
1025    total_capacity: usize,
1026    /// Hit/miss stats.
1027    hits: u64,
1028    misses: u64,
1029}
1030
1031impl TinyLfuWidthCache {
1032    /// Create a new TinyLFU cache with the given total capacity.
1033    ///
1034    /// The window gets ~1% of capacity (minimum 1), main gets the rest.
1035    pub fn new(total_capacity: usize) -> Self {
1036        let total_capacity = total_capacity.max(2);
1037        let window_cap = (total_capacity / 100).max(1);
1038        let main_cap = total_capacity - window_cap;
1039
1040        Self {
1041            window: LruCache::new(NonZeroUsize::new(window_cap).unwrap()),
1042            main: LruCache::new(NonZeroUsize::new(main_cap.max(1)).unwrap()),
1043            sketch: CountMinSketch::with_defaults(),
1044            doorkeeper: Doorkeeper::with_defaults(),
1045            total_capacity,
1046            hits: 0,
1047            misses: 0,
1048        }
1049    }
1050
1051    /// Get cached width or compute and cache it.
1052    pub fn get_or_compute(&mut self, text: &str) -> usize {
1053        self.get_or_compute_with(text, crate::display_width)
1054    }
1055
1056    /// Get cached width or compute using a custom function.
1057    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1058    where
1059        F: FnOnce(&str) -> usize,
1060    {
1061        let hash = hash_text(text);
1062        let fp = fingerprint_hash(text);
1063
1064        // Record frequency via doorkeeper → CMS pipeline.
1065        let seen = self.doorkeeper.check_and_set(hash);
1066        if seen {
1067            self.sketch.increment(hash);
1068        }
1069
1070        // Check main cache first (larger, higher value).
1071        if let Some(entry) = self.main.get(&hash) {
1072            if entry.fingerprint == fp {
1073                self.hits += 1;
1074                return entry.width;
1075            }
1076            // Fingerprint mismatch: collision. Evict stale entry.
1077            self.main.pop(&hash);
1078        }
1079
1080        // Check window cache.
1081        if let Some(entry) = self.window.get(&hash) {
1082            if entry.fingerprint == fp {
1083                self.hits += 1;
1084                return entry.width;
1085            }
1086            // Collision in window cache.
1087            self.window.pop(&hash);
1088        }
1089
1090        // Cache miss: compute width.
1091        self.misses += 1;
1092        let width = compute(text);
1093        let new_entry = TinyLfuEntry {
1094            width,
1095            fingerprint: fp,
1096        };
1097
1098        // Insert into window cache. If window is full, the evicted item
1099        // goes through admission filter for main cache.
1100        if self.window.len() >= self.window.cap().get() {
1101            // Get the LRU item from window before it's evicted.
1102            if let Some((evicted_hash, evicted_entry)) = self.window.pop_lru() {
1103                self.try_admit_to_main(evicted_hash, evicted_entry);
1104            }
1105        }
1106        self.window.put(hash, new_entry);
1107
1108        width
1109    }
1110
1111    /// Try to admit a candidate (evicted from window) into the main cache.
1112    fn try_admit_to_main(&mut self, candidate_hash: u64, candidate_entry: TinyLfuEntry) {
1113        let candidate_freq = self.sketch.estimate(candidate_hash);
1114
1115        if self.main.len() < self.main.cap().get() {
1116            // Main has room — admit unconditionally.
1117            self.main.put(candidate_hash, candidate_entry);
1118            return;
1119        }
1120
1121        // Main is full. Compare candidate frequency with the LRU victim.
1122        if let Some((&victim_hash, _)) = self.main.peek_lru() {
1123            let victim_freq = self.sketch.estimate(victim_hash);
1124            if tinylfu_admit(candidate_freq, victim_freq) {
1125                self.main.pop_lru();
1126                self.main.put(candidate_hash, candidate_entry);
1127            }
1128            // Otherwise, candidate is discarded.
1129        }
1130    }
1131
1132    /// Check if a key is in the cache (window or main).
1133    pub fn contains(&self, text: &str) -> bool {
1134        let hash = hash_text(text);
1135        let fp = fingerprint_hash(text);
1136        if let Some(e) = self.main.peek(&hash)
1137            && e.fingerprint == fp
1138        {
1139            return true;
1140        }
1141        if let Some(e) = self.window.peek(&hash)
1142            && e.fingerprint == fp
1143        {
1144            return true;
1145        }
1146        false
1147    }
1148
1149    /// Get cache statistics.
1150    pub fn stats(&self) -> CacheStats {
1151        CacheStats {
1152            hits: self.hits,
1153            misses: self.misses,
1154            size: self.window.len() + self.main.len(),
1155            capacity: self.total_capacity,
1156        }
1157    }
1158
1159    /// Clear all caches and reset sketch/doorkeeper.
1160    pub fn clear(&mut self) {
1161        self.window.clear();
1162        self.main.clear();
1163        self.sketch.clear();
1164        self.doorkeeper.clear();
1165    }
1166
1167    /// Reset statistics.
1168    pub fn reset_stats(&mut self) {
1169        self.hits = 0;
1170        self.misses = 0;
1171    }
1172
1173    /// Current number of cached entries.
1174    pub fn len(&self) -> usize {
1175        self.window.len() + self.main.len()
1176    }
1177
1178    /// Check if cache is empty.
1179    pub fn is_empty(&self) -> bool {
1180        self.window.is_empty() && self.main.is_empty()
1181    }
1182
1183    /// Total capacity (window + main).
1184    pub fn capacity(&self) -> usize {
1185        self.total_capacity
1186    }
1187
1188    /// Number of entries in the main cache.
1189    pub fn main_len(&self) -> usize {
1190        self.main.len()
1191    }
1192
1193    /// Number of entries in the window cache.
1194    pub fn window_len(&self) -> usize {
1195        self.window.len()
1196    }
1197}
1198
1199// ── S3-FIFO Width Cache ────────────────────────────────────────────────
1200
1201/// Width cache backed by S3-FIFO eviction (bd-l6yba.2).
1202///
1203/// Drop-in replacement for [`WidthCache`] that uses the scan-resistant
1204/// S3-FIFO eviction policy instead of LRU. This protects frequently-used
1205/// width entries from being evicted by one-time scan patterns (e.g. when a
1206/// large block of new text scrolls past).
1207///
1208/// Uses the same 64-bit FxHash keying as [`WidthCache`] with a secondary
1209/// FNV fingerprint for collision detection.
1210#[derive(Debug)]
1211pub struct S3FifoWidthCache {
1212    cache: ftui_core::s3_fifo::S3Fifo<u64, S3FifoEntry>,
1213    hits: u64,
1214    misses: u64,
1215    total_capacity: usize,
1216}
1217
1218/// Entry stored in the S3-FIFO width cache.
1219#[derive(Debug, Clone, Copy)]
1220struct S3FifoEntry {
1221    width: usize,
1222    fingerprint: u64,
1223}
1224
1225impl S3FifoWidthCache {
1226    /// Create a new S3-FIFO width cache with the given capacity.
1227    pub fn new(capacity: usize) -> Self {
1228        let capacity = capacity.max(2);
1229        Self {
1230            cache: ftui_core::s3_fifo::S3Fifo::new(capacity),
1231            hits: 0,
1232            misses: 0,
1233            total_capacity: capacity,
1234        }
1235    }
1236
1237    /// Create a new cache with the default capacity (4096 entries).
1238    #[must_use]
1239    pub fn with_default_capacity() -> Self {
1240        Self::new(DEFAULT_CACHE_CAPACITY)
1241    }
1242
1243    /// Get cached width or compute and cache it.
1244    pub fn get_or_compute(&mut self, text: &str) -> usize {
1245        self.get_or_compute_with(text, crate::display_width)
1246    }
1247
1248    /// Get cached width or compute using a custom function.
1249    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1250    where
1251        F: FnOnce(&str) -> usize,
1252    {
1253        let hash = hash_text(text);
1254        let fp = fingerprint_hash(text);
1255
1256        if let Some(entry) = self.cache.get(&hash) {
1257            if entry.fingerprint == fp {
1258                self.hits += 1;
1259                return entry.width;
1260            }
1261            // Fingerprint mismatch: collision. Remove stale entry.
1262            self.cache.remove(&hash);
1263        }
1264
1265        // Cache miss: compute width.
1266        self.misses += 1;
1267        let width = compute(text);
1268        self.cache.insert(
1269            hash,
1270            S3FifoEntry {
1271                width,
1272                fingerprint: fp,
1273            },
1274        );
1275        width
1276    }
1277
1278    /// Check if a key is in the cache.
1279    pub fn contains(&self, text: &str) -> bool {
1280        let hash = hash_text(text);
1281        self.cache.contains_key(&hash)
1282    }
1283
1284    /// Get cache statistics.
1285    pub fn stats(&self) -> CacheStats {
1286        CacheStats {
1287            hits: self.hits,
1288            misses: self.misses,
1289            size: self.cache.len(),
1290            capacity: self.total_capacity,
1291        }
1292    }
1293
1294    /// Clear the cache.
1295    pub fn clear(&mut self) {
1296        self.cache.clear();
1297        self.hits = 0;
1298        self.misses = 0;
1299    }
1300
1301    /// Reset statistics.
1302    pub fn reset_stats(&mut self) {
1303        self.hits = 0;
1304        self.misses = 0;
1305    }
1306
1307    /// Current number of cached entries.
1308    pub fn len(&self) -> usize {
1309        self.cache.len()
1310    }
1311
1312    /// Check if cache is empty.
1313    pub fn is_empty(&self) -> bool {
1314        self.cache.is_empty()
1315    }
1316
1317    /// Total capacity.
1318    pub fn capacity(&self) -> usize {
1319        self.total_capacity
1320    }
1321}
1322
1323impl Default for S3FifoWidthCache {
1324    fn default() -> Self {
1325        Self::with_default_capacity()
1326    }
1327}
1328
1329#[cfg(test)]
1330mod s3_fifo_width_tests {
1331    use super::*;
1332
1333    #[test]
1334    fn s3fifo_new_cache_is_empty() {
1335        let cache = S3FifoWidthCache::new(100);
1336        assert!(cache.is_empty());
1337        assert_eq!(cache.len(), 0);
1338    }
1339
1340    #[test]
1341    fn s3fifo_get_or_compute_caches_value() {
1342        let mut cache = S3FifoWidthCache::new(100);
1343        let w1 = cache.get_or_compute("hello");
1344        assert_eq!(w1, 5);
1345        assert_eq!(cache.len(), 1);
1346
1347        let w2 = cache.get_or_compute("hello");
1348        assert_eq!(w2, 5);
1349        assert_eq!(cache.len(), 1);
1350
1351        let stats = cache.stats();
1352        assert_eq!(stats.hits, 1);
1353        assert_eq!(stats.misses, 1);
1354    }
1355
1356    #[test]
1357    fn s3fifo_different_strings() {
1358        let mut cache = S3FifoWidthCache::new(100);
1359        cache.get_or_compute("hello");
1360        cache.get_or_compute("world");
1361        cache.get_or_compute("foo");
1362        assert_eq!(cache.len(), 3);
1363    }
1364
1365    #[test]
1366    fn s3fifo_cjk_width() {
1367        let mut cache = S3FifoWidthCache::new(100);
1368        let w = cache.get_or_compute("你好");
1369        assert_eq!(w, 4);
1370    }
1371
1372    #[test]
1373    fn s3fifo_contains() {
1374        let mut cache = S3FifoWidthCache::new(100);
1375        assert!(!cache.contains("hello"));
1376        cache.get_or_compute("hello");
1377        assert!(cache.contains("hello"));
1378    }
1379
1380    #[test]
1381    fn s3fifo_clear_resets() {
1382        let mut cache = S3FifoWidthCache::new(100);
1383        cache.get_or_compute("hello");
1384        cache.get_or_compute("world");
1385        cache.clear();
1386        assert!(cache.is_empty());
1387        assert!(!cache.contains("hello"));
1388        let stats = cache.stats();
1389        assert_eq!(stats.hits, 0);
1390        assert_eq!(stats.misses, 0);
1391    }
1392
1393    #[test]
1394    fn s3fifo_produces_same_widths_as_lru() {
1395        let mut lru = WidthCache::new(100);
1396        let mut s3 = S3FifoWidthCache::new(100);
1397
1398        let texts = [
1399            "hello",
1400            "你好世界",
1401            "abc",
1402            "🎉🎉",
1403            "",
1404            " ",
1405            "a\tb",
1406            "mixed中english文",
1407        ];
1408
1409        for text in &texts {
1410            let lru_w = lru.get_or_compute(text);
1411            let s3_w = s3.get_or_compute(text);
1412            assert_eq!(lru_w, s3_w, "width mismatch for {:?}", text);
1413        }
1414    }
1415
1416    #[test]
1417    fn s3fifo_scan_resistance_preserves_hot_set() {
1418        let mut cache = S3FifoWidthCache::new(50);
1419
1420        // Build a hot set
1421        let hot: Vec<String> = (0..20).map(|i| format!("hot_{i}")).collect();
1422        for text in &hot {
1423            cache.get_or_compute(text);
1424            cache.get_or_compute(text); // access twice to set freq
1425        }
1426
1427        // Scan through a large one-time set
1428        for i in 0..200 {
1429            cache.get_or_compute(&format!("scan_{i}"));
1430        }
1431
1432        // Some hot items should survive
1433        let mut survivors = 0;
1434        for text in &hot {
1435            if cache.contains(text) {
1436                survivors += 1;
1437            }
1438        }
1439        assert!(
1440            survivors > 5,
1441            "scan resistance: only {survivors}/20 hot items survived"
1442        );
1443    }
1444
1445    #[test]
1446    fn s3fifo_default_capacity() {
1447        let cache = S3FifoWidthCache::with_default_capacity();
1448        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
1449    }
1450
1451    #[test]
1452    fn s3fifo_reset_stats() {
1453        let mut cache = S3FifoWidthCache::new(100);
1454        cache.get_or_compute("a");
1455        cache.get_or_compute("a");
1456        cache.reset_stats();
1457        let stats = cache.stats();
1458        assert_eq!(stats.hits, 0);
1459        assert_eq!(stats.misses, 0);
1460    }
1461}
1462
1463#[cfg(test)]
1464mod proptests {
1465    use super::*;
1466    use proptest::prelude::*;
1467
1468    proptest! {
1469        #[test]
1470        fn cached_width_matches_direct(s in "[a-zA-Z0-9 ]{1,50}") {
1471            let mut cache = WidthCache::new(100);
1472            let cached = cache.get_or_compute(&s);
1473            let direct = crate::display_width(&s);
1474            prop_assert_eq!(cached, direct);
1475        }
1476
1477        #[test]
1478        fn second_access_is_hit(s in "[a-zA-Z0-9]{1,20}") {
1479            let mut cache = WidthCache::new(100);
1480
1481            cache.get_or_compute(&s);
1482            let stats_before = cache.stats();
1483
1484            cache.get_or_compute(&s);
1485            let stats_after = cache.stats();
1486
1487            prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1488            prop_assert_eq!(stats_after.misses, stats_before.misses);
1489        }
1490
1491        #[test]
1492        fn lru_never_exceeds_capacity(
1493            strings in prop::collection::vec("[a-z]{1,5}", 10..100),
1494            capacity in 5usize..20
1495        ) {
1496            let mut cache = WidthCache::new(capacity);
1497
1498            for s in &strings {
1499                cache.get_or_compute(s);
1500                prop_assert!(cache.len() <= capacity);
1501            }
1502        }
1503
1504        #[test]
1505        fn preload_then_access_is_hit(s in "[a-zA-Z]{1,20}") {
1506            let mut cache = WidthCache::new(100);
1507
1508            cache.preload(&s);
1509            let stats_before = cache.stats();
1510
1511            cache.get_or_compute(&s);
1512            let stats_after = cache.stats();
1513
1514            // Should be a hit (preloaded)
1515            prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1516        }
1517    }
1518}
1519
1520// ---------------------------------------------------------------------------
1521// TinyLFU Spec Tests (bd-4kq0.6.1)
1522// ---------------------------------------------------------------------------
1523
1524#[cfg(test)]
1525mod tinylfu_tests {
1526    use super::*;
1527
1528    // --- Count-Min Sketch ---
1529
1530    #[test]
1531    fn unit_cms_single_key_count() {
1532        let mut cms = CountMinSketch::with_defaults();
1533        let h = hash_text("hello");
1534
1535        for _ in 0..5 {
1536            cms.increment(h);
1537        }
1538        assert_eq!(cms.estimate(h), 5);
1539    }
1540
1541    #[test]
1542    fn unit_cms_unseen_key_is_zero() {
1543        let cms = CountMinSketch::with_defaults();
1544        assert_eq!(cms.estimate(hash_text("never_seen")), 0);
1545    }
1546
1547    #[test]
1548    fn unit_cms_saturates_at_max() {
1549        let mut cms = CountMinSketch::with_defaults();
1550        let h = hash_text("hot");
1551
1552        for _ in 0..100 {
1553            cms.increment(h);
1554        }
1555        assert_eq!(cms.estimate(h), CMS_MAX_COUNT);
1556    }
1557
1558    #[test]
1559    fn unit_cms_bounds() {
1560        // Error bound: estimate(x) <= true_count(x) + epsilon * N.
1561        // With w=1024, epsilon = e/1024 ~ 0.00266.
1562        let mut cms = CountMinSketch::new(1024, 4, u64::MAX); // no reset
1563        let n: u64 = 1000;
1564
1565        // Insert 1000 unique keys
1566        for i in 0..n {
1567            cms.increment(i.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1));
1568        }
1569
1570        // Check a target key inserted exactly 5 times
1571        let target = 0xDEAD_BEEF_u64;
1572        for _ in 0..5 {
1573            cms.increment(target);
1574        }
1575
1576        let est = cms.estimate(target);
1577        let epsilon = std::f64::consts::E / 1024.0;
1578        let upper_bound = 5.0 + epsilon * (n + 5) as f64;
1579
1580        assert!(
1581            (est as f64) <= upper_bound,
1582            "estimate {} exceeds bound {:.1} (epsilon={:.5}, N={})",
1583            est,
1584            upper_bound,
1585            epsilon,
1586            n + 5,
1587        );
1588        assert!(est >= 5, "estimate {} should be >= true count 5", est);
1589    }
1590
1591    #[test]
1592    fn unit_cms_bounds_mass_test() {
1593        let mut cms = CountMinSketch::new(1024, 4, u64::MAX);
1594        let n = 2000u64;
1595
1596        let mut true_counts = vec![0u8; n as usize];
1597        for i in 0..n {
1598            let count = (i % 10 + 1) as u8;
1599            true_counts[i as usize] = count;
1600            for _ in 0..count {
1601                cms.increment(i);
1602            }
1603        }
1604
1605        let total = cms.total_increments();
1606        let epsilon = std::f64::consts::E / 1024.0;
1607        let mut violations = 0u32;
1608
1609        for i in 0..n {
1610            let est = cms.estimate(i);
1611            let true_c = true_counts[i as usize];
1612            let upper = true_c as f64 + epsilon * total as f64;
1613            if est as f64 > upper + 0.5 {
1614                violations += 1;
1615            }
1616            assert!(
1617                est >= true_c,
1618                "key {}: estimate {} < true count {}",
1619                i,
1620                est,
1621                true_c
1622            );
1623        }
1624
1625        // delta = (1/2)^4 = 0.0625; allow generous threshold
1626        let violation_rate = violations as f64 / n as f64;
1627        assert!(
1628            violation_rate <= 0.10,
1629            "violation rate {:.3} exceeds delta threshold",
1630            violation_rate,
1631        );
1632    }
1633
1634    #[test]
1635    fn unit_cms_halving_ages_counts() {
1636        let mut cms = CountMinSketch::new(64, 2, 100);
1637
1638        let h = hash_text("test");
1639        for _ in 0..10 {
1640            cms.increment(h);
1641        }
1642        assert_eq!(cms.estimate(h), 10);
1643
1644        // Trigger reset by reaching reset_interval
1645        for _ in 10..100 {
1646            cms.increment(hash_text("noise"));
1647        }
1648
1649        let est = cms.estimate(h);
1650        assert!(est <= 5, "After halving, estimate {} should be <= 5", est);
1651    }
1652
1653    #[test]
1654    fn unit_cms_monotone() {
1655        let mut cms = CountMinSketch::with_defaults();
1656        let h = hash_text("key");
1657
1658        let mut prev_est = 0u8;
1659        for _ in 0..CMS_MAX_COUNT {
1660            cms.increment(h);
1661            let est = cms.estimate(h);
1662            assert!(est >= prev_est, "estimate should be monotone");
1663            prev_est = est;
1664        }
1665    }
1666
1667    // --- Doorkeeper ---
1668
1669    #[test]
1670    fn unit_doorkeeper_first_access_returns_false() {
1671        let mut dk = Doorkeeper::with_defaults();
1672        assert!(!dk.check_and_set(hash_text("new")));
1673    }
1674
1675    #[test]
1676    fn unit_doorkeeper_second_access_returns_true() {
1677        let mut dk = Doorkeeper::with_defaults();
1678        let h = hash_text("key");
1679        dk.check_and_set(h);
1680        assert!(dk.check_and_set(h));
1681    }
1682
1683    #[test]
1684    fn unit_doorkeeper_contains() {
1685        let mut dk = Doorkeeper::with_defaults();
1686        let h = hash_text("key");
1687        assert!(!dk.contains(h));
1688        dk.check_and_set(h);
1689        assert!(dk.contains(h));
1690    }
1691
1692    #[test]
1693    fn unit_doorkeeper_clear_resets() {
1694        let mut dk = Doorkeeper::with_defaults();
1695        let h = hash_text("key");
1696        dk.check_and_set(h);
1697        dk.clear();
1698        assert!(!dk.contains(h));
1699        assert!(!dk.check_and_set(h));
1700    }
1701
1702    #[test]
1703    fn unit_doorkeeper_false_positive_rate() {
1704        let mut dk = Doorkeeper::new(2048);
1705        let n = 100u64;
1706
1707        for i in 0..n {
1708            dk.check_and_set(i * 0x9E37_79B9 + 1);
1709        }
1710
1711        let mut false_positives = 0u32;
1712        for i in 0..1000 {
1713            let h = (i + 100_000) * 0x6A09_E667 + 7;
1714            if dk.contains(h) {
1715                false_positives += 1;
1716            }
1717        }
1718
1719        // k=1, m=2048, n=100: FP rate ~ 1 - e^{-100/2048} ~ 0.048
1720        let fp_rate = false_positives as f64 / 1000.0;
1721        assert!(
1722            fp_rate < 0.15,
1723            "FP rate {:.3} too high (expected < 0.15)",
1724            fp_rate,
1725        );
1726    }
1727
1728    // --- Admission Rule ---
1729
1730    #[test]
1731    fn unit_admission_rule() {
1732        assert!(tinylfu_admit(5, 3)); // candidate > victim -> admit
1733        assert!(!tinylfu_admit(3, 5)); // candidate < victim -> reject
1734        assert!(!tinylfu_admit(3, 3)); // tie -> reject (keep victim)
1735    }
1736
1737    #[test]
1738    fn unit_admission_rule_extremes() {
1739        assert!(tinylfu_admit(1, 0));
1740        assert!(!tinylfu_admit(0, 0));
1741        assert!(!tinylfu_admit(0, 1));
1742        assert!(tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT - 1));
1743        assert!(!tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT));
1744    }
1745
1746    // --- Fingerprint Guard ---
1747
1748    #[test]
1749    fn unit_fingerprint_guard() {
1750        let fp1 = fingerprint_hash("hello");
1751        let fp2 = fingerprint_hash("world");
1752        let fp3 = fingerprint_hash("hello");
1753
1754        assert_ne!(
1755            fp1, fp2,
1756            "Different strings should have different fingerprints"
1757        );
1758        assert_eq!(fp1, fp3, "Same string should have same fingerprint");
1759    }
1760
1761    #[test]
1762    fn unit_fingerprint_guard_collision_rate() {
1763        let mut fps = std::collections::HashSet::new();
1764        let n = 10_000;
1765
1766        for i in 0..n {
1767            let s = format!("string_{}", i);
1768            fps.insert(fingerprint_hash(&s));
1769        }
1770
1771        let collisions = n - fps.len();
1772        assert!(
1773            collisions == 0,
1774            "Expected 0 collisions in 10k items, got {}",
1775            collisions,
1776        );
1777    }
1778
1779    #[test]
1780    fn unit_fingerprint_independent_of_primary_hash() {
1781        let text = "test_string";
1782        let primary = hash_text(text);
1783        let secondary = fingerprint_hash(text);
1784
1785        assert_ne!(
1786            primary, secondary,
1787            "Fingerprint and primary hash should differ"
1788        );
1789    }
1790
1791    // --- Integration: Doorkeeper + CMS pipeline ---
1792
1793    #[test]
1794    fn unit_doorkeeper_cms_pipeline() {
1795        let mut dk = Doorkeeper::with_defaults();
1796        let mut cms = CountMinSketch::with_defaults();
1797        let h = hash_text("item");
1798
1799        // First access: doorkeeper records
1800        assert!(!dk.check_and_set(h));
1801        assert_eq!(cms.estimate(h), 0);
1802
1803        // Second access: doorkeeper confirms, CMS incremented
1804        assert!(dk.check_and_set(h));
1805        cms.increment(h);
1806        assert_eq!(cms.estimate(h), 1);
1807
1808        // Third access
1809        assert!(dk.check_and_set(h));
1810        cms.increment(h);
1811        assert_eq!(cms.estimate(h), 2);
1812    }
1813
1814    #[test]
1815    fn unit_doorkeeper_filters_one_hit_wonders() {
1816        let mut dk = Doorkeeper::with_defaults();
1817        let mut cms = CountMinSketch::with_defaults();
1818
1819        // 100 one-hit items
1820        for i in 0u64..100 {
1821            let h = i * 0x9E37_79B9 + 1;
1822            let seen = dk.check_and_set(h);
1823            if seen {
1824                cms.increment(h);
1825            }
1826        }
1827
1828        assert_eq!(cms.total_increments(), 0);
1829
1830        // Access one again -> passes doorkeeper
1831        let h = 1; // i=0 from the loop above
1832        assert!(dk.check_and_set(h));
1833        cms.increment(h);
1834        assert_eq!(cms.total_increments(), 1);
1835    }
1836}
1837
1838// ---------------------------------------------------------------------------
1839// TinyLFU Implementation Tests (bd-4kq0.6.2)
1840// ---------------------------------------------------------------------------
1841
1842#[cfg(test)]
1843mod tinylfu_impl_tests {
1844    use super::*;
1845
1846    #[test]
1847    fn basic_get_or_compute() {
1848        let mut cache = TinyLfuWidthCache::new(100);
1849        let w = cache.get_or_compute("hello");
1850        assert_eq!(w, 5);
1851        assert_eq!(cache.len(), 1);
1852
1853        let w2 = cache.get_or_compute("hello");
1854        assert_eq!(w2, 5);
1855        let stats = cache.stats();
1856        assert_eq!(stats.misses, 1);
1857        assert_eq!(stats.hits, 1);
1858    }
1859
1860    #[test]
1861    fn window_to_main_promotion() {
1862        // With capacity=100, window=1, main=99.
1863        // Fill window, then force eviction into main via new inserts.
1864        let mut cache = TinyLfuWidthCache::new(100);
1865
1866        // Access "frequent" many times to build CMS frequency.
1867        for _ in 0..10 {
1868            cache.get_or_compute("frequent");
1869        }
1870
1871        // Insert enough items to fill window and force eviction.
1872        for i in 0..5 {
1873            cache.get_or_compute(&format!("item_{}", i));
1874        }
1875
1876        // "frequent" should have been promoted to main cache via admission.
1877        assert!(cache.contains("frequent"));
1878        assert!(cache.main_len() > 0 || cache.window_len() > 0);
1879    }
1880
1881    #[test]
1882    fn unit_window_promotion() {
1883        // Frequent items should end up in main cache.
1884        let mut cache = TinyLfuWidthCache::new(50);
1885
1886        // Access "hot" repeatedly to build frequency.
1887        for _ in 0..20 {
1888            cache.get_or_compute("hot");
1889        }
1890
1891        // Now push enough items through window to force "hot" out of window.
1892        for i in 0..10 {
1893            cache.get_or_compute(&format!("filler_{}", i));
1894        }
1895
1896        // "hot" should still be accessible (promoted to main via admission).
1897        assert!(cache.contains("hot"), "Frequent item should be retained");
1898    }
1899
1900    #[test]
1901    fn fingerprint_guard_detects_collision() {
1902        let mut cache = TinyLfuWidthCache::new(100);
1903
1904        // Compute "hello" with custom function.
1905        let w = cache.get_or_compute_with("hello", |_| 42);
1906        assert_eq!(w, 42);
1907
1908        // Verify it's cached.
1909        assert!(cache.contains("hello"));
1910    }
1911
1912    #[test]
1913    fn admission_rejects_infrequent() {
1914        // Fill main cache with frequently-accessed items.
1915        // Then try to insert a cold item — it should be rejected.
1916        let mut cache = TinyLfuWidthCache::new(10); // window=1, main=9
1917
1918        // Fill main with items accessed multiple times.
1919        for i in 0..9 {
1920            let s = format!("hot_{}", i);
1921            for _ in 0..5 {
1922                cache.get_or_compute(&s);
1923            }
1924        }
1925
1926        // Now insert cold items. They should go through window but not
1927        // necessarily get into main.
1928        for i in 0..20 {
1929            cache.get_or_compute(&format!("cold_{}", i));
1930        }
1931
1932        // Hot items should mostly survive (they have high frequency).
1933        let hot_survivors: usize = (0..9)
1934            .filter(|i| cache.contains(&format!("hot_{}", i)))
1935            .count();
1936        assert!(
1937            hot_survivors >= 5,
1938            "Expected most hot items to survive, got {}/9",
1939            hot_survivors
1940        );
1941    }
1942
1943    #[test]
1944    fn clear_empties_everything() {
1945        let mut cache = TinyLfuWidthCache::new(100);
1946        cache.get_or_compute("a");
1947        cache.get_or_compute("b");
1948        cache.clear();
1949        assert!(cache.is_empty());
1950        assert_eq!(cache.len(), 0);
1951    }
1952
1953    #[test]
1954    fn stats_reflect_usage() {
1955        let mut cache = TinyLfuWidthCache::new(100);
1956        cache.get_or_compute("a");
1957        cache.get_or_compute("a");
1958        cache.get_or_compute("b");
1959
1960        let stats = cache.stats();
1961        assert_eq!(stats.misses, 2);
1962        assert_eq!(stats.hits, 1);
1963        assert_eq!(stats.size, 2);
1964    }
1965
1966    #[test]
1967    fn capacity_is_respected() {
1968        let mut cache = TinyLfuWidthCache::new(20);
1969
1970        for i in 0..100 {
1971            cache.get_or_compute(&format!("item_{}", i));
1972        }
1973
1974        assert!(
1975            cache.len() <= 20,
1976            "Cache size {} exceeds capacity 20",
1977            cache.len()
1978        );
1979    }
1980
1981    #[test]
1982    fn reset_stats_works() {
1983        let mut cache = TinyLfuWidthCache::new(100);
1984        cache.get_or_compute("x");
1985        cache.get_or_compute("x");
1986        cache.reset_stats();
1987        let stats = cache.stats();
1988        assert_eq!(stats.hits, 0);
1989        assert_eq!(stats.misses, 0);
1990    }
1991
1992    #[test]
1993    fn perf_cache_hit_rate() {
1994        // Simulate a Zipfian-like workload: some items accessed frequently,
1995        // many accessed rarely. TinyLFU should achieve decent hit rate.
1996        let mut cache = TinyLfuWidthCache::new(50);
1997
1998        // 10 hot items accessed 20 times each.
1999        for _ in 0..20 {
2000            for i in 0..10 {
2001                cache.get_or_compute(&format!("hot_{}", i));
2002            }
2003        }
2004
2005        // 100 cold items accessed once each.
2006        for i in 0..100 {
2007            cache.get_or_compute(&format!("cold_{}", i));
2008        }
2009
2010        // Re-access hot items — these should mostly be hits.
2011        cache.reset_stats();
2012        for i in 0..10 {
2013            cache.get_or_compute(&format!("hot_{}", i));
2014        }
2015
2016        let stats = cache.stats();
2017        // Hot items should have high hit rate after being frequently accessed.
2018        assert!(
2019            stats.hits >= 5,
2020            "Expected at least 5/10 hot items to hit, got {}",
2021            stats.hits
2022        );
2023    }
2024
2025    #[test]
2026    fn unicode_strings_work() {
2027        let mut cache = TinyLfuWidthCache::new(100);
2028        assert_eq!(cache.get_or_compute("日本語"), 6);
2029        assert_eq!(cache.get_or_compute("café"), 4);
2030        assert_eq!(cache.get_or_compute("日本語"), 6); // hit
2031        assert_eq!(cache.stats().hits, 1);
2032    }
2033
2034    #[test]
2035    fn empty_string() {
2036        let mut cache = TinyLfuWidthCache::new(100);
2037        assert_eq!(cache.get_or_compute(""), 0);
2038    }
2039
2040    #[test]
2041    fn minimum_capacity() {
2042        let cache = TinyLfuWidthCache::new(0);
2043        assert!(cache.capacity() >= 2);
2044    }
2045}
2046
2047// ---------------------------------------------------------------------------
2048// bd-4kq0.6.3: WidthCache Tests + Perf Gates
2049// ---------------------------------------------------------------------------
2050
2051/// Deterministic LCG for test reproducibility (no external rand dependency).
2052#[cfg(test)]
2053struct Lcg(u64);
2054
2055#[cfg(test)]
2056impl Lcg {
2057    fn new(seed: u64) -> Self {
2058        Self(seed)
2059    }
2060    fn next_u64(&mut self) -> u64 {
2061        self.0 = self
2062            .0
2063            .wrapping_mul(6_364_136_223_846_793_005)
2064            .wrapping_add(1);
2065        self.0
2066    }
2067    fn next_usize(&mut self, max: usize) -> usize {
2068        (self.next_u64() % (max as u64)) as usize
2069    }
2070}
2071
2072// ---------------------------------------------------------------------------
2073// 1. Property: cache equivalence (cached width == computed width)
2074// ---------------------------------------------------------------------------
2075
2076#[cfg(test)]
2077mod property_cache_equivalence {
2078    use super::*;
2079    use proptest::prelude::*;
2080
2081    proptest! {
2082        #[test]
2083        fn tinylfu_cached_equals_computed(s in "[a-zA-Z0-9 ]{1,80}") {
2084            let mut cache = TinyLfuWidthCache::new(200);
2085            let cached = cache.get_or_compute(&s);
2086            let direct = crate::display_width(&s);
2087            prop_assert_eq!(cached, direct,
2088                "TinyLFU returned {} but display_width says {} for {:?}", cached, direct, s);
2089        }
2090
2091        #[test]
2092        fn tinylfu_second_access_same_value(s in "[a-zA-Z0-9]{1,40}") {
2093            let mut cache = TinyLfuWidthCache::new(200);
2094            let first = cache.get_or_compute(&s);
2095            let second = cache.get_or_compute(&s);
2096            prop_assert_eq!(first, second,
2097                "First access returned {} but second returned {} for {:?}", first, second, s);
2098        }
2099
2100        #[test]
2101        fn tinylfu_never_exceeds_capacity(
2102            strings in prop::collection::vec("[a-z]{1,5}", 10..200),
2103            capacity in 10usize..50
2104        ) {
2105            let mut cache = TinyLfuWidthCache::new(capacity);
2106            for s in &strings {
2107                cache.get_or_compute(s);
2108                prop_assert!(cache.len() <= capacity,
2109                    "Cache size {} exceeded capacity {}", cache.len(), capacity);
2110            }
2111        }
2112
2113        #[test]
2114        fn tinylfu_custom_fn_matches(s in "[a-z]{1,20}") {
2115            let mut cache = TinyLfuWidthCache::new(100);
2116            let custom_fn = |text: &str| text.len(); // byte length as custom metric
2117            let cached = cache.get_or_compute_with(&s, custom_fn);
2118            prop_assert_eq!(cached, s.len(),
2119                "Custom fn: cached {} != expected {} for {:?}", cached, s.len(), s);
2120        }
2121    }
2122
2123    #[test]
2124    fn deterministic_seed_equivalence() {
2125        // Same workload with same seed → same results every time.
2126        let mut rng = super::Lcg::new(0xDEAD_BEEF);
2127
2128        let mut cache1 = TinyLfuWidthCache::new(50);
2129        let mut results1 = Vec::new();
2130        for _ in 0..500 {
2131            let idx = rng.next_usize(100);
2132            let s = format!("key_{}", idx);
2133            results1.push(cache1.get_or_compute(&s));
2134        }
2135
2136        let mut rng2 = super::Lcg::new(0xDEAD_BEEF);
2137        let mut cache2 = TinyLfuWidthCache::new(50);
2138        let mut results2 = Vec::new();
2139        for _ in 0..500 {
2140            let idx = rng2.next_usize(100);
2141            let s = format!("key_{}", idx);
2142            results2.push(cache2.get_or_compute(&s));
2143        }
2144
2145        assert_eq!(
2146            results1, results2,
2147            "Deterministic seed must produce identical results"
2148        );
2149    }
2150
2151    #[test]
2152    fn both_caches_agree_on_widths() {
2153        // WidthCache (plain LRU) and TinyLfuWidthCache must return the same
2154        // widths for any input.
2155        let mut lru = WidthCache::new(200);
2156        let mut tlfu = TinyLfuWidthCache::new(200);
2157
2158        let inputs = [
2159            "",
2160            "hello",
2161            "日本語テスト",
2162            "café résumé",
2163            "a\tb",
2164            "🎉🎊🎈",
2165            "mixed日本eng",
2166            "    spaces    ",
2167            "AAAAAAAAAAAAAAAAAAAAAAAAA",
2168            "x",
2169        ];
2170
2171        for &s in &inputs {
2172            let w1 = lru.get_or_compute(s);
2173            let w2 = tlfu.get_or_compute(s);
2174            assert_eq!(
2175                w1, w2,
2176                "Width mismatch for {:?}: LRU={}, TinyLFU={}",
2177                s, w1, w2
2178            );
2179        }
2180    }
2181}
2182
2183// ---------------------------------------------------------------------------
2184// 2. E2E cache replay: deterministic workload, log hit rate + latency (JSONL)
2185// ---------------------------------------------------------------------------
2186
2187#[cfg(test)]
2188mod e2e_cache_replay {
2189    use super::*;
2190    use std::time::Instant;
2191
2192    /// A single replay record, serialisable to JSONL.
2193    #[derive(Debug)]
2194    struct ReplayRecord {
2195        step: usize,
2196        key: String,
2197        width: usize,
2198        hit: bool,
2199        latency_ns: u128,
2200    }
2201
2202    impl ReplayRecord {
2203        fn to_jsonl(&self) -> String {
2204            format!(
2205                r#"{{"step":{},"key":"{}","width":{},"hit":{},"latency_ns":{}}}"#,
2206                self.step, self.key, self.width, self.hit, self.latency_ns,
2207            )
2208        }
2209    }
2210
2211    fn zipfian_workload(rng: &mut super::Lcg, n: usize, universe: usize) -> Vec<String> {
2212        // Approximate Zipfian: lower indices are much more frequent.
2213        (0..n)
2214            .map(|_| {
2215                let raw = rng.next_usize(universe * universe);
2216                let idx = (raw as f64).sqrt() as usize % universe;
2217                format!("item_{}", idx)
2218            })
2219            .collect()
2220    }
2221
2222    #[test]
2223    fn replay_zipfian_tinylfu() {
2224        let mut rng = super::Lcg::new(0x1234_5678);
2225        let workload = zipfian_workload(&mut rng, 2000, 200);
2226
2227        let mut cache = TinyLfuWidthCache::new(50);
2228        let mut records = Vec::with_capacity(workload.len());
2229
2230        for (i, key) in workload.iter().enumerate() {
2231            let stats_before = cache.stats();
2232            let t0 = Instant::now();
2233            let width = cache.get_or_compute(key);
2234            let elapsed = t0.elapsed().as_nanos();
2235            let stats_after = cache.stats();
2236            let hit = stats_after.hits > stats_before.hits;
2237
2238            records.push(ReplayRecord {
2239                step: i,
2240                key: key.clone(),
2241                width,
2242                hit,
2243                latency_ns: elapsed,
2244            });
2245        }
2246
2247        // Validate JSONL serialisation is parseable.
2248        for r in &records[..5] {
2249            let line = r.to_jsonl();
2250            assert!(
2251                line.starts_with('{') && line.ends_with('}'),
2252                "Bad JSONL: {}",
2253                line
2254            );
2255        }
2256
2257        // Compute aggregate stats.
2258        let total = records.len();
2259        let hits = records.iter().filter(|r| r.hit).count();
2260        let hit_rate = hits as f64 / total as f64;
2261
2262        // Zipfian workload with 50-entry cache over 200-item universe
2263        // should get a meaningful hit rate (> 10%).
2264        assert!(
2265            hit_rate > 0.10,
2266            "Zipfian hit rate too low: {:.2}% ({}/{})",
2267            hit_rate * 100.0,
2268            hits,
2269            total
2270        );
2271    }
2272
2273    #[test]
2274    fn replay_zipfian_lru_comparison() {
2275        let mut rng = super::Lcg::new(0x1234_5678);
2276        let workload = zipfian_workload(&mut rng, 2000, 200);
2277
2278        let mut tlfu = TinyLfuWidthCache::new(50);
2279        let mut lru = WidthCache::new(50);
2280
2281        for key in &workload {
2282            tlfu.get_or_compute(key);
2283            lru.get_or_compute(key);
2284        }
2285
2286        let tlfu_stats = tlfu.stats();
2287        let lru_stats = lru.stats();
2288
2289        // Both must have correct total operations.
2290        assert_eq!(tlfu_stats.hits + tlfu_stats.misses, 2000);
2291        assert_eq!(lru_stats.hits + lru_stats.misses, 2000);
2292
2293        // TinyLFU should be at least competitive with plain LRU on Zipfian.
2294        // (In practice TinyLFU often wins, but we just check both work.)
2295        assert!(
2296            tlfu_stats.hit_rate() >= lru_stats.hit_rate() * 0.8,
2297            "TinyLFU hit rate {:.2}% much worse than LRU {:.2}%",
2298            tlfu_stats.hit_rate() * 100.0,
2299            lru_stats.hit_rate() * 100.0,
2300        );
2301    }
2302
2303    #[test]
2304    fn replay_deterministic_reproduction() {
2305        // Two identical replays must produce identical hit/miss sequences.
2306        let run = |seed: u64| -> Vec<bool> {
2307            let mut rng = super::Lcg::new(seed);
2308            let workload = zipfian_workload(&mut rng, 500, 100);
2309            let mut cache = TinyLfuWidthCache::new(30);
2310            let mut hits = Vec::with_capacity(500);
2311            for key in &workload {
2312                let before = cache.stats().hits;
2313                cache.get_or_compute(key);
2314                hits.push(cache.stats().hits > before);
2315            }
2316            hits
2317        };
2318
2319        let run1 = run(0xABCD_EF01);
2320        let run2 = run(0xABCD_EF01);
2321        assert_eq!(run1, run2, "Deterministic replay diverged");
2322    }
2323
2324    #[test]
2325    fn replay_uniform_workload() {
2326        // Uniform access pattern: every key equally likely. Hit rate should be
2327        // roughly cache_size / universe_size for large N.
2328        let mut rng = super::Lcg::new(0x9999);
2329        let universe = 100;
2330        let cache_size = 25;
2331        let n = 5000;
2332
2333        let mut cache = TinyLfuWidthCache::new(cache_size);
2334
2335        // Warm up: access each key once.
2336        for i in 0..universe {
2337            cache.get_or_compute(&format!("u_{}", i));
2338        }
2339
2340        cache.reset_stats();
2341        for _ in 0..n {
2342            let idx = rng.next_usize(universe);
2343            cache.get_or_compute(&format!("u_{}", idx));
2344        }
2345
2346        let stats = cache.stats();
2347        let hit_rate = stats.hit_rate();
2348        // Theoretical: ~25/100 = 25%. Allow range 10%–60%.
2349        assert!(
2350            hit_rate > 0.10 && hit_rate < 0.60,
2351            "Uniform hit rate {:.2}% outside expected range",
2352            hit_rate * 100.0,
2353        );
2354    }
2355}
2356
2357// ---------------------------------------------------------------------------
2358// 3. Perf gates: cache operations < 1us p95
2359// ---------------------------------------------------------------------------
2360
2361#[cfg(test)]
2362mod perf_cache_overhead {
2363    use super::*;
2364    use std::time::Instant;
2365
2366    /// Collect latencies for N operations, return sorted Vec<u128> in nanoseconds.
2367    fn measure_latencies<F: FnMut(usize)>(n: usize, mut op: F) -> Vec<u128> {
2368        let mut latencies = Vec::with_capacity(n);
2369        for i in 0..n {
2370            let t0 = Instant::now();
2371            op(i);
2372            latencies.push(t0.elapsed().as_nanos());
2373        }
2374        latencies.sort_unstable();
2375        latencies
2376    }
2377
2378    fn p95(sorted: &[u128]) -> u128 {
2379        let len = sorted.len();
2380        let idx = ((len as f64 * 0.95) as usize).min(len.saturating_sub(1));
2381        sorted[idx]
2382    }
2383
2384    fn p99(sorted: &[u128]) -> u128 {
2385        let len = sorted.len();
2386        let idx = ((len as f64 * 0.99) as usize).min(len.saturating_sub(1));
2387        sorted[idx]
2388    }
2389
2390    fn median(sorted: &[u128]) -> u128 {
2391        sorted[sorted.len() / 2]
2392    }
2393
2394    #[allow(unexpected_cfgs)]
2395    fn perf_budget_ns(base_ns: u128) -> u128 {
2396        if cfg!(coverage) || cfg!(coverage_nightly) {
2397            base_ns.saturating_mul(10)
2398        } else {
2399            base_ns
2400        }
2401    }
2402
2403    #[test]
2404    fn perf_lru_hit_latency() {
2405        let mut cache = WidthCache::new(1000);
2406        // Warm up.
2407        for i in 0..100 {
2408            cache.get_or_compute(&format!("warm_{}", i));
2409        }
2410
2411        let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2412        let latencies = measure_latencies(10_000, |i| {
2413            let _ = cache.get_or_compute(&keys[i % 100]);
2414        });
2415
2416        let p95_ns = p95(&latencies);
2417        // Budget: < 1us (1000ns) p95 for cache hits.
2418        // Use generous 5us to account for CI variability (10x under coverage).
2419        let budget_ns = perf_budget_ns(5_000);
2420        assert!(
2421            p95_ns < budget_ns,
2422            "LRU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2423            p95_ns,
2424            budget_ns,
2425            median(&latencies),
2426            p99(&latencies),
2427        );
2428    }
2429
2430    #[test]
2431    fn perf_tinylfu_hit_latency() {
2432        let mut cache = TinyLfuWidthCache::new(1000);
2433        // Warm up: access each key multiple times to ensure promotion to main.
2434        for _ in 0..5 {
2435            for i in 0..100 {
2436                cache.get_or_compute(&format!("warm_{}", i));
2437            }
2438        }
2439
2440        let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2441        let latencies = measure_latencies(10_000, |i| {
2442            let _ = cache.get_or_compute(&keys[i % 100]);
2443        });
2444
2445        let p95_ns = p95(&latencies);
2446        // Budget: < 1us p95 for hits (5us CI-safe threshold).
2447        let budget_ns = perf_budget_ns(5_000);
2448        assert!(
2449            p95_ns < budget_ns,
2450            "TinyLFU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2451            p95_ns,
2452            budget_ns,
2453            median(&latencies),
2454            p99(&latencies),
2455        );
2456    }
2457
2458    #[test]
2459    fn perf_tinylfu_miss_latency() {
2460        let mut cache = TinyLfuWidthCache::new(100);
2461        let keys: Vec<String> = (0..10_000).map(|i| format!("miss_{}", i)).collect();
2462
2463        let latencies = measure_latencies(10_000, |i| {
2464            let _ = cache.get_or_compute(&keys[i]);
2465        });
2466
2467        let p95_ns = p95(&latencies);
2468        // Misses include unicode width computation. Budget: < 5us p95.
2469        // Use 20us CI-safe threshold (10x under coverage; computation dominates).
2470        let budget_ns = perf_budget_ns(20_000);
2471        assert!(
2472            p95_ns < budget_ns,
2473            "TinyLFU miss p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2474            p95_ns,
2475            budget_ns,
2476            median(&latencies),
2477            p99(&latencies),
2478        );
2479    }
2480
2481    #[test]
2482    fn perf_cms_increment_latency() {
2483        let mut cms = CountMinSketch::with_defaults();
2484        let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("k{}", i))).collect();
2485
2486        let latencies = measure_latencies(10_000, |i| {
2487            cms.increment(hashes[i]);
2488        });
2489
2490        let p95_ns = p95(&latencies);
2491        // CMS increment should be very fast: < 500ns p95.
2492        let budget_ns = perf_budget_ns(2_000);
2493        assert!(
2494            p95_ns < budget_ns,
2495            "CMS increment p95 = {}ns exceeds {}ns budget (median={}ns)",
2496            p95_ns,
2497            budget_ns,
2498            median(&latencies),
2499        );
2500    }
2501
2502    #[test]
2503    fn perf_doorkeeper_latency() {
2504        let mut dk = Doorkeeper::with_defaults();
2505        let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("d{}", i))).collect();
2506
2507        let latencies = measure_latencies(10_000, |i| {
2508            let _ = dk.check_and_set(hashes[i]);
2509        });
2510
2511        let p95_ns = p95(&latencies);
2512        // Doorkeeper bit ops: < 200ns p95.
2513        let budget_ns = perf_budget_ns(1_000);
2514        assert!(
2515            p95_ns < budget_ns,
2516            "Doorkeeper p95 = {}ns exceeds {}ns budget (median={}ns)",
2517            p95_ns,
2518            budget_ns,
2519            median(&latencies),
2520        );
2521    }
2522
2523    #[test]
2524    fn perf_fingerprint_hash_latency() {
2525        let keys: Vec<String> = (0..10_000).map(|i| format!("fp_{}", i)).collect();
2526
2527        let latencies = measure_latencies(10_000, |i| {
2528            let _ = fingerprint_hash(&keys[i]);
2529        });
2530
2531        let p95_ns = p95(&latencies);
2532        // FNV hash: < 200ns p95.
2533        let budget_ns = perf_budget_ns(1_000);
2534        assert!(
2535            p95_ns < budget_ns,
2536            "fingerprint_hash p95 = {}ns exceeds {}ns budget (median={}ns)",
2537            p95_ns,
2538            budget_ns,
2539            median(&latencies),
2540        );
2541    }
2542}