Skip to main content

ftui_text/
width_cache.rs

1#![forbid(unsafe_code)]
2
3//! LRU width cache for efficient text measurement.
4//!
5//! Text width calculation is a hot path (called every render frame).
6//! This cache stores computed widths to avoid redundant Unicode width
7//! calculations for repeated strings.
8//!
9//! ## VS16 Policy and Caching
10//!
11//! Cache keys are the grapheme string itself. The VS16 width policy
12//! (controlled by `FTUI_EMOJI_VS16_WIDTH`) is global and read once at
13//! startup via `OnceLock` in `ftui_core::text_width`. Changing the env
14//! var mid-process has no effect on cached or future lookups.
15//!
16//! # Example
17//! ```
18//! use ftui_text::WidthCache;
19//!
20//! let mut cache = WidthCache::new(1000);
21//!
22//! // First call computes width
23//! let width = cache.get_or_compute("Hello, world!");
24//! assert_eq!(width, 13);
25//!
26//! // Second call hits cache
27//! let width2 = cache.get_or_compute("Hello, world!");
28//! assert_eq!(width2, 13);
29//!
30//! // Check stats
31//! let stats = cache.stats();
32//! assert_eq!(stats.hits, 1);
33//! assert_eq!(stats.misses, 1);
34//! ```
35
36use lru::LruCache;
37use rustc_hash::FxHasher;
38use std::hash::{Hash, Hasher};
39use std::num::NonZeroUsize;
40
41/// Default cache capacity.
42pub const DEFAULT_CACHE_CAPACITY: usize = 4096;
43
44/// Statistics about cache performance.
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
46pub struct CacheStats {
47    /// Number of cache hits.
48    pub hits: u64,
49    /// Number of cache misses.
50    pub misses: u64,
51    /// Current number of entries.
52    pub size: usize,
53    /// Maximum capacity.
54    pub capacity: usize,
55}
56
57impl CacheStats {
58    /// Calculate hit rate (0.0 to 1.0).
59    #[must_use]
60    pub fn hit_rate(&self) -> f64 {
61        let total = self.hits + self.misses;
62        if total == 0 {
63            0.0
64        } else {
65            self.hits as f64 / total as f64
66        }
67    }
68}
69
70/// LRU cache for text width measurements.
71///
72/// This cache stores the computed display width (in terminal cells) for
73/// text strings, using an LRU eviction policy when capacity is reached.
74///
75/// # Performance
76/// - Uses FxHash for fast hashing
77/// - O(1) lookup and insertion
78/// - Automatic LRU eviction
79/// - Keys are stored as 64-bit hashes (not full strings) to minimize memory
80///
81/// # Hash Collisions
82/// The cache uses a 64-bit hash as the lookup key rather than storing the
83/// full string. This trades theoretical correctness for memory efficiency.
84/// With FxHash, collision probability is ~1 in 2^64, making this safe for
85/// practical use. If you require guaranteed correctness, use `contains()`
86/// to verify presence before trusting cached values.
87///
88/// # Thread Safety
89/// `WidthCache` is not thread-safe. For concurrent use, wrap in a mutex
90/// or use thread-local caches.
91#[derive(Debug)]
92pub struct WidthCache {
93    cache: LruCache<u64, usize>,
94    hits: u64,
95    misses: u64,
96}
97
98impl WidthCache {
99    /// Create a new cache with the specified capacity.
100    ///
101    /// If capacity is zero, defaults to 1.
102    #[must_use]
103    pub fn new(capacity: usize) -> Self {
104        let capacity = NonZeroUsize::new(capacity.max(1)).expect("capacity must be > 0");
105        Self {
106            cache: LruCache::new(capacity),
107            hits: 0,
108            misses: 0,
109        }
110    }
111
112    /// Create a new cache with the default capacity (4096 entries).
113    #[must_use]
114    pub fn with_default_capacity() -> Self {
115        Self::new(DEFAULT_CACHE_CAPACITY)
116    }
117
118    /// Get cached width or compute and cache it.
119    ///
120    /// If the text is in the cache, returns the cached width.
121    /// Otherwise, computes the width using `display_width` and caches it.
122    #[inline]
123    pub fn get_or_compute(&mut self, text: &str) -> usize {
124        self.get_or_compute_with(text, crate::display_width)
125    }
126
127    /// Get cached width or compute using a custom function.
128    ///
129    /// This allows using custom width calculation functions for testing
130    /// or specialized terminal behavior.
131    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
132    where
133        F: FnOnce(&str) -> usize,
134    {
135        let hash = hash_text(text);
136
137        if let Some(&width) = self.cache.get(&hash) {
138            self.hits += 1;
139            return width;
140        }
141
142        self.misses += 1;
143        let width = compute(text);
144        self.cache.put(hash, width);
145        width
146    }
147
148    /// Check if a text string is in the cache.
149    #[must_use]
150    pub fn contains(&self, text: &str) -> bool {
151        let hash = hash_text(text);
152        self.cache.contains(&hash)
153    }
154
155    /// Get the cached width for a text string without computing.
156    ///
157    /// Returns `None` if the text is not in the cache.
158    /// Note: This does update the LRU order.
159    #[must_use]
160    pub fn get(&mut self, text: &str) -> Option<usize> {
161        let hash = hash_text(text);
162        self.cache.get(&hash).copied()
163    }
164
165    /// Peek at the cached width without updating LRU order.
166    #[must_use]
167    pub fn peek(&self, text: &str) -> Option<usize> {
168        let hash = hash_text(text);
169        self.cache.peek(&hash).copied()
170    }
171
172    /// Pre-populate the cache with a text string.
173    ///
174    /// This is useful for warming up the cache with known strings.
175    pub fn preload(&mut self, text: &str) {
176        let hash = hash_text(text);
177        if !self.cache.contains(&hash) {
178            let width = crate::display_width(text);
179            self.cache.put(hash, width);
180        }
181    }
182
183    /// Pre-populate the cache with multiple strings.
184    pub fn preload_many<'a>(&mut self, texts: impl IntoIterator<Item = &'a str>) {
185        for text in texts {
186            self.preload(text);
187        }
188    }
189
190    /// Clear the cache.
191    pub fn clear(&mut self) {
192        self.cache.clear();
193    }
194
195    /// Reset statistics.
196    pub fn reset_stats(&mut self) {
197        self.hits = 0;
198        self.misses = 0;
199    }
200
201    /// Get cache statistics.
202    #[inline]
203    #[must_use]
204    pub fn stats(&self) -> CacheStats {
205        CacheStats {
206            hits: self.hits,
207            misses: self.misses,
208            size: self.cache.len(),
209            capacity: self.cache.cap().get(),
210        }
211    }
212
213    /// Get the current number of cached entries.
214    #[inline]
215    #[must_use]
216    pub fn len(&self) -> usize {
217        self.cache.len()
218    }
219
220    /// Check if the cache is empty.
221    #[inline]
222    #[must_use]
223    pub fn is_empty(&self) -> bool {
224        self.cache.is_empty()
225    }
226
227    /// Get the cache capacity.
228    #[inline]
229    #[must_use]
230    pub fn capacity(&self) -> usize {
231        self.cache.cap().get()
232    }
233
234    /// Resize the cache capacity.
235    ///
236    /// If the new capacity is smaller than the current size,
237    /// entries will be evicted (LRU order).
238    pub fn resize(&mut self, new_capacity: usize) {
239        let new_capacity = NonZeroUsize::new(new_capacity.max(1)).expect("capacity must be > 0");
240        self.cache.resize(new_capacity);
241    }
242}
243
244impl Default for WidthCache {
245    fn default() -> Self {
246        Self::with_default_capacity()
247    }
248}
249
250/// Hash a text string using FxHash for fast hashing.
251#[inline]
252fn hash_text(text: &str) -> u64 {
253    let mut hasher = FxHasher::default();
254    text.hash(&mut hasher);
255    hasher.finish()
256}
257
258// Thread-local width cache for convenience.
259//
260// This provides a global cache that is thread-local, avoiding the need
261// to pass a cache around explicitly.
262#[cfg(feature = "thread_local_cache")]
263thread_local! {
264    static THREAD_CACHE: std::cell::RefCell<WidthCache> =
265        std::cell::RefCell::new(WidthCache::with_default_capacity());
266}
267
268/// Get or compute width using the thread-local cache.
269#[cfg(feature = "thread_local_cache")]
270pub fn cached_width(text: &str) -> usize {
271    THREAD_CACHE.with(|cache| cache.borrow_mut().get_or_compute(text))
272}
273
274/// Clear the thread-local cache.
275#[cfg(feature = "thread_local_cache")]
276pub fn clear_thread_cache() {
277    THREAD_CACHE.with(|cache| cache.borrow_mut().clear());
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    #[test]
285    fn new_cache_is_empty() {
286        let cache = WidthCache::new(100);
287        assert!(cache.is_empty());
288        assert_eq!(cache.len(), 0);
289        assert_eq!(cache.capacity(), 100);
290    }
291
292    #[test]
293    fn default_capacity() {
294        let cache = WidthCache::with_default_capacity();
295        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
296    }
297
298    #[test]
299    fn get_or_compute_caches_value() {
300        let mut cache = WidthCache::new(100);
301
302        let width1 = cache.get_or_compute("hello");
303        assert_eq!(width1, 5);
304        assert_eq!(cache.len(), 1);
305
306        let width2 = cache.get_or_compute("hello");
307        assert_eq!(width2, 5);
308        assert_eq!(cache.len(), 1); // Same entry
309
310        let stats = cache.stats();
311        assert_eq!(stats.hits, 1);
312        assert_eq!(stats.misses, 1);
313    }
314
315    #[test]
316    fn get_or_compute_different_strings() {
317        let mut cache = WidthCache::new(100);
318
319        cache.get_or_compute("hello");
320        cache.get_or_compute("world");
321        cache.get_or_compute("foo");
322
323        assert_eq!(cache.len(), 3);
324        let stats = cache.stats();
325        assert_eq!(stats.misses, 3);
326        assert_eq!(stats.hits, 0);
327    }
328
329    #[test]
330    fn get_or_compute_cjk() {
331        let mut cache = WidthCache::new(100);
332
333        let width = cache.get_or_compute("你好");
334        assert_eq!(width, 4); // 2 chars * 2 cells each
335    }
336
337    #[test]
338    fn contains() {
339        let mut cache = WidthCache::new(100);
340
341        assert!(!cache.contains("hello"));
342        cache.get_or_compute("hello");
343        assert!(cache.contains("hello"));
344    }
345
346    #[test]
347    fn get_returns_none_for_missing() {
348        let mut cache = WidthCache::new(100);
349        assert!(cache.get("missing").is_none());
350    }
351
352    #[test]
353    fn get_returns_cached_value() {
354        let mut cache = WidthCache::new(100);
355        cache.get_or_compute("hello");
356
357        let width = cache.get("hello");
358        assert_eq!(width, Some(5));
359    }
360
361    #[test]
362    fn peek_does_not_update_lru() {
363        let mut cache = WidthCache::new(2);
364
365        cache.get_or_compute("a");
366        cache.get_or_compute("b");
367
368        // Peek at "a" - should not update LRU order
369        let _ = cache.peek("a");
370
371        // Add "c" - should evict "a" (oldest)
372        cache.get_or_compute("c");
373
374        assert!(!cache.contains("a"));
375        assert!(cache.contains("b"));
376        assert!(cache.contains("c"));
377    }
378
379    #[test]
380    fn lru_eviction() {
381        let mut cache = WidthCache::new(2);
382
383        cache.get_or_compute("a");
384        cache.get_or_compute("b");
385        cache.get_or_compute("c"); // Should evict "a"
386
387        assert!(!cache.contains("a"));
388        assert!(cache.contains("b"));
389        assert!(cache.contains("c"));
390    }
391
392    #[test]
393    fn lru_refresh_on_access() {
394        let mut cache = WidthCache::new(2);
395
396        cache.get_or_compute("a");
397        cache.get_or_compute("b");
398        cache.get_or_compute("a"); // Refresh "a" to most recent
399        cache.get_or_compute("c"); // Should evict "b"
400
401        assert!(cache.contains("a"));
402        assert!(!cache.contains("b"));
403        assert!(cache.contains("c"));
404    }
405
406    #[test]
407    fn preload() {
408        let mut cache = WidthCache::new(100);
409
410        cache.preload("hello");
411        assert!(cache.contains("hello"));
412        assert_eq!(cache.peek("hello"), Some(5));
413
414        let stats = cache.stats();
415        assert_eq!(stats.misses, 0); // Preload doesn't count as miss
416        assert_eq!(stats.hits, 0);
417    }
418
419    #[test]
420    fn preload_many() {
421        let mut cache = WidthCache::new(100);
422
423        cache.preload_many(["hello", "world", "foo"]);
424        assert_eq!(cache.len(), 3);
425    }
426
427    #[test]
428    fn clear() {
429        let mut cache = WidthCache::new(100);
430        cache.get_or_compute("hello");
431        cache.get_or_compute("world");
432
433        cache.clear();
434        assert!(cache.is_empty());
435        assert!(!cache.contains("hello"));
436    }
437
438    #[test]
439    fn reset_stats() {
440        let mut cache = WidthCache::new(100);
441        cache.get_or_compute("hello");
442        cache.get_or_compute("hello");
443
444        let stats = cache.stats();
445        assert_eq!(stats.hits, 1);
446        assert_eq!(stats.misses, 1);
447
448        cache.reset_stats();
449        let stats = cache.stats();
450        assert_eq!(stats.hits, 0);
451        assert_eq!(stats.misses, 0);
452    }
453
454    #[test]
455    fn hit_rate() {
456        let stats = CacheStats {
457            hits: 75,
458            misses: 25,
459            size: 100,
460            capacity: 1000,
461        };
462        assert!((stats.hit_rate() - 0.75).abs() < 0.001);
463    }
464
465    #[test]
466    fn hit_rate_no_requests() {
467        let stats = CacheStats::default();
468        assert_eq!(stats.hit_rate(), 0.0);
469    }
470
471    #[test]
472    fn resize_smaller() {
473        let mut cache = WidthCache::new(100);
474        for i in 0..50 {
475            cache.get_or_compute(&format!("text{i}"));
476        }
477        assert_eq!(cache.len(), 50);
478
479        cache.resize(10);
480        assert!(cache.len() <= 10);
481        assert_eq!(cache.capacity(), 10);
482    }
483
484    #[test]
485    fn resize_larger() {
486        let mut cache = WidthCache::new(10);
487        cache.resize(100);
488        assert_eq!(cache.capacity(), 100);
489    }
490
491    #[test]
492    fn custom_compute_function() {
493        let mut cache = WidthCache::new(100);
494
495        // Use a custom width function (always returns 42)
496        let width = cache.get_or_compute_with("hello", |_| 42);
497        assert_eq!(width, 42);
498
499        // Cached value is 42
500        assert_eq!(cache.peek("hello"), Some(42));
501    }
502
503    #[test]
504    fn empty_string() {
505        let mut cache = WidthCache::new(100);
506        let width = cache.get_or_compute("");
507        assert_eq!(width, 0);
508    }
509
510    #[test]
511    fn hash_collision_handling() {
512        // Even with hash collisions, the LRU should handle them
513        // (this is just a stress test with many entries)
514        let mut cache = WidthCache::new(1000);
515
516        for i in 0..500 {
517            cache.get_or_compute(&format!("string{i}"));
518        }
519
520        assert_eq!(cache.len(), 500);
521    }
522
523    #[test]
524    fn unicode_strings() {
525        let mut cache = WidthCache::new(100);
526
527        // Various Unicode strings
528        assert_eq!(cache.get_or_compute("café"), 4);
529        assert_eq!(cache.get_or_compute("日本語"), 6);
530        assert_eq!(cache.get_or_compute("🎉"), 2); // Emoji typically 2 cells
531
532        assert_eq!(cache.len(), 3);
533    }
534
535    #[test]
536    fn combining_characters() {
537        let mut cache = WidthCache::new(100);
538
539        // e + combining acute accent
540        let width = cache.get_or_compute("e\u{0301}");
541        // Should be 1 cell (the combining char doesn't add width)
542        assert_eq!(width, 1);
543    }
544
545    // ==========================================================================
546    // Additional coverage tests
547    // ==========================================================================
548
549    #[test]
550    fn default_cache() {
551        let cache = WidthCache::default();
552        assert!(cache.is_empty());
553        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
554    }
555
556    #[test]
557    fn cache_stats_debug() {
558        let stats = CacheStats {
559            hits: 10,
560            misses: 5,
561            size: 15,
562            capacity: 100,
563        };
564        let debug = format!("{:?}", stats);
565        assert!(debug.contains("CacheStats"));
566        assert!(debug.contains("10")); // hits
567    }
568
569    #[test]
570    fn cache_stats_default() {
571        let stats = CacheStats::default();
572        assert_eq!(stats.hits, 0);
573        assert_eq!(stats.misses, 0);
574        assert_eq!(stats.size, 0);
575        assert_eq!(stats.capacity, 0);
576    }
577
578    #[test]
579    fn cache_stats_equality() {
580        let stats1 = CacheStats {
581            hits: 10,
582            misses: 5,
583            size: 15,
584            capacity: 100,
585        };
586        let stats2 = stats1; // Copy
587        assert_eq!(stats1, stats2);
588    }
589
590    #[test]
591    fn clear_after_preload() {
592        let mut cache = WidthCache::new(100);
593        cache.preload_many(["hello", "world", "test"]);
594        assert_eq!(cache.len(), 3);
595
596        cache.clear();
597        assert!(cache.is_empty());
598        assert!(!cache.contains("hello"));
599    }
600
601    #[test]
602    fn preload_existing_is_noop() {
603        let mut cache = WidthCache::new(100);
604        cache.get_or_compute("hello"); // First access
605        let len_before = cache.len();
606
607        cache.preload("hello"); // Already exists
608        assert_eq!(cache.len(), len_before);
609    }
610
611    #[test]
612    fn minimum_capacity_is_one() {
613        let cache = WidthCache::new(0);
614        assert_eq!(cache.capacity(), 1);
615    }
616
617    #[test]
618    fn width_cache_debug() {
619        let cache = WidthCache::new(10);
620        let debug = format!("{:?}", cache);
621        assert!(debug.contains("WidthCache"));
622    }
623
624    #[test]
625    fn emoji_zwj_sequence() {
626        let mut cache = WidthCache::new(100);
627        // Family emoji (ZWJ sequence)
628        let width = cache.get_or_compute("👨‍👩‍👧");
629        // Width varies by implementation, just ensure it doesn't panic
630        assert!(width >= 1);
631    }
632
633    #[test]
634    fn emoji_with_skin_tone() {
635        let mut cache = WidthCache::new(100);
636        let width = cache.get_or_compute("👍🏻");
637        assert!(width >= 1);
638    }
639
640    #[test]
641    fn flag_emoji() {
642        let mut cache = WidthCache::new(100);
643        // US flag emoji (regional indicators)
644        let width = cache.get_or_compute("🇺🇸");
645        assert!(width >= 1);
646    }
647
648    #[test]
649    fn mixed_width_strings() {
650        let mut cache = WidthCache::new(100);
651        // Mixed ASCII and CJK
652        let width = cache.get_or_compute("Hello你好World");
653        assert_eq!(width, 14); // 10 ASCII + 4 CJK
654    }
655
656    #[test]
657    fn stats_size_reflects_cache_len() {
658        let mut cache = WidthCache::new(100);
659        cache.get_or_compute("a");
660        cache.get_or_compute("b");
661        cache.get_or_compute("c");
662
663        let stats = cache.stats();
664        assert_eq!(stats.size, cache.len());
665        assert_eq!(stats.size, 3);
666    }
667
668    #[test]
669    fn stats_capacity_matches() {
670        let cache = WidthCache::new(42);
671        let stats = cache.stats();
672        assert_eq!(stats.capacity, 42);
673    }
674
675    #[test]
676    fn resize_to_zero_becomes_one() {
677        let mut cache = WidthCache::new(100);
678        cache.resize(0);
679        assert_eq!(cache.capacity(), 1);
680    }
681
682    #[test]
683    fn get_updates_lru_order() {
684        let mut cache = WidthCache::new(2);
685
686        cache.get_or_compute("a");
687        cache.get_or_compute("b");
688
689        // Access "a" via get() - should update LRU order
690        let _ = cache.get("a");
691
692        // Add "c" - should evict "b" (now oldest)
693        cache.get_or_compute("c");
694
695        assert!(cache.contains("a"));
696        assert!(!cache.contains("b"));
697        assert!(cache.contains("c"));
698    }
699
700    #[test]
701    fn contains_does_not_modify_stats() {
702        let mut cache = WidthCache::new(100);
703        cache.get_or_compute("hello");
704
705        let stats_before = cache.stats();
706        let _ = cache.contains("hello");
707        let _ = cache.contains("missing");
708        let stats_after = cache.stats();
709
710        assert_eq!(stats_before.hits, stats_after.hits);
711        assert_eq!(stats_before.misses, stats_after.misses);
712    }
713
714    #[test]
715    fn peek_returns_none_for_missing() {
716        let cache = WidthCache::new(100);
717        assert!(cache.peek("missing").is_none());
718    }
719
720    #[test]
721    fn custom_compute_called_once() {
722        let mut cache = WidthCache::new(100);
723        let mut call_count = 0;
724
725        cache.get_or_compute_with("test", |_| {
726            call_count += 1;
727            10
728        });
729
730        cache.get_or_compute_with("test", |_| {
731            call_count += 1;
732            20 // This shouldn't be called
733        });
734
735        assert_eq!(call_count, 1);
736        assert_eq!(cache.peek("test"), Some(10));
737    }
738
739    #[test]
740    fn whitespace_strings() {
741        let mut cache = WidthCache::new(100);
742        assert_eq!(cache.get_or_compute("   "), 3); // 3 spaces
743        assert_eq!(cache.get_or_compute("\t"), 1); // Tab is 1 cell typically
744        assert_eq!(cache.get_or_compute("\n"), 1); // Newline
745    }
746}
747
748// ---------------------------------------------------------------------------
749// W-TinyLFU Admission Components (bd-4kq0.6.1)
750// ---------------------------------------------------------------------------
751//
752// # Design
753//
754// W-TinyLFU augments LRU eviction with a frequency-based admission filter:
755//
756// 1. **Count-Min Sketch (CMS)**: Approximate frequency counter.
757//    - Parameters: width `w`, depth `d`.
758//    - Error bound: estimated count <= true count + epsilon * N
759//      with probability >= 1 - delta, where:
760//        epsilon = e / w  (e = Euler's number ≈ 2.718)
761//        delta   = (1/2)^d
762//    - Chosen defaults: w=1024 (epsilon ≈ 0.0027), d=4 (delta ≈ 0.0625).
763//    - Counter width: 4 bits (saturating at 15). Periodic halving (aging)
764//      every `reset_interval` increments to prevent staleness.
765//
766// 2. **Doorkeeper**: 1-bit Bloom filter (single hash, `doorkeeper_bits` entries).
767//    - Filters one-hit wonders before they reach the CMS.
768//    - On first access: set doorkeeper bit. On second access in the same
769//      epoch: increment CMS. Cleared on CMS reset.
770//    - Default: 2048 bits (256 bytes).
771//
772// 3. **Admission rule**: When evicting, compare frequencies:
773//    - `freq(candidate) > freq(victim)` → admit candidate, evict victim.
774//    - `freq(candidate) <= freq(victim)` → reject candidate, keep victim.
775//
776// 4. **Fingerprint guard**: The CMS stores 64-bit hashes. Since the main
777//    cache also keys by 64-bit hash, a collision means two distinct strings
778//    share the same key. The fingerprint guard adds a secondary hash
779//    (different seed) stored alongside the value. On lookup, if the
780//    secondary hash mismatches, the entry is treated as a miss and evicted.
781//
782// # Failure Modes
783// - CMS overcounting: bounded by epsilon * N; aging limits staleness.
784// - Doorkeeper false positives: one-hit items may leak to CMS. Bounded
785//   by Bloom FP rate ≈ (1 - e^{-k*n/m})^k with k=1.
786// - Fingerprint collision (secondary hash): probability ~2^{-64}; negligible.
787// - Reset storm: halving all counters is O(w*d). With w=1024, d=4 this is
788//   4096 operations — negligible vs. rendering cost.
789
790/// Count-Min Sketch for approximate frequency estimation.
791///
792/// Uses `depth` independent hash functions (derived from a single hash via
793/// mixing) and `width` counters per row. Each counter is a `u8` saturating
794/// at [`CountMinSketch::MAX_COUNT`] (15 by default, representing 4-bit counters).
795///
796/// # Error Bounds
797///
798/// For a sketch with width `w` and depth `d`, after `N` total increments:
799/// - `estimate(x) <= true_count(x) + epsilon * N` with probability `>= 1 - delta`
800/// - where `epsilon = e / w` and `delta = (1/2)^d`
801#[derive(Debug, Clone)]
802pub struct CountMinSketch {
803    /// Counter matrix: `depth` rows of `width` counters each.
804    counters: Vec<Vec<u8>>,
805    /// Number of hash functions (rows).
806    depth: usize,
807    /// Number of counters per row.
808    width: usize,
809    /// Total number of increments since last reset.
810    total_increments: u64,
811    /// Increment count at which to halve all counters.
812    reset_interval: u64,
813}
814
815/// Maximum counter value (4-bit saturation).
816const CMS_MAX_COUNT: u8 = 15;
817
818/// Default CMS width. epsilon = e/1024 ≈ 0.0027.
819const CMS_DEFAULT_WIDTH: usize = 1024;
820
821/// Default CMS depth. delta = (1/2)^4 = 0.0625.
822const CMS_DEFAULT_DEPTH: usize = 4;
823
824/// Default reset interval (halve counters after this many increments).
825const CMS_DEFAULT_RESET_INTERVAL: u64 = 8192;
826
827impl CountMinSketch {
828    /// Create a new Count-Min Sketch with the given dimensions.
829    pub fn new(width: usize, depth: usize, reset_interval: u64) -> Self {
830        let width = width.max(1);
831        let depth = depth.max(1);
832        Self {
833            counters: vec![vec![0u8; width]; depth],
834            depth,
835            width,
836            total_increments: 0,
837            reset_interval: reset_interval.max(1),
838        }
839    }
840
841    /// Create a sketch with default parameters (w=1024, d=4, reset=8192).
842    pub fn with_defaults() -> Self {
843        Self::new(
844            CMS_DEFAULT_WIDTH,
845            CMS_DEFAULT_DEPTH,
846            CMS_DEFAULT_RESET_INTERVAL,
847        )
848    }
849
850    /// Increment the count for a key.
851    pub fn increment(&mut self, hash: u64) {
852        for row in 0..self.depth {
853            let idx = self.index(hash, row);
854            self.counters[row][idx] = self.counters[row][idx].saturating_add(1).min(CMS_MAX_COUNT);
855        }
856        self.total_increments += 1;
857
858        if self.total_increments >= self.reset_interval {
859            self.halve();
860        }
861    }
862
863    /// Estimate the frequency of a key (minimum across all rows).
864    pub fn estimate(&self, hash: u64) -> u8 {
865        let mut min = u8::MAX;
866        for row in 0..self.depth {
867            let idx = self.index(hash, row);
868            min = min.min(self.counters[row][idx]);
869        }
870        min
871    }
872
873    /// Total number of increments since creation or last reset.
874    pub fn total_increments(&self) -> u64 {
875        self.total_increments
876    }
877
878    /// Halve all counters (aging). Resets the increment counter.
879    fn halve(&mut self) {
880        for row in &mut self.counters {
881            for c in row.iter_mut() {
882                *c /= 2;
883            }
884        }
885        self.total_increments = 0;
886    }
887
888    /// Clear all counters to zero.
889    pub fn clear(&mut self) {
890        for row in &mut self.counters {
891            row.fill(0);
892        }
893        self.total_increments = 0;
894    }
895
896    /// Compute the column index for a given hash and row.
897    #[inline]
898    fn index(&self, hash: u64, row: usize) -> usize {
899        // Mix the hash with the row index for independent hash functions.
900        let mixed = hash
901            .wrapping_mul(0x517c_c1b7_2722_0a95)
902            .wrapping_add(row as u64);
903        let mixed = mixed ^ (mixed >> 32);
904        (mixed as usize) % self.width
905    }
906}
907
908/// 1-bit Bloom filter used as a doorkeeper to filter one-hit wonders.
909///
910/// On first access within an epoch, the doorkeeper sets a bit. Only on
911/// the second access does the item get promoted to the Count-Min Sketch.
912/// The doorkeeper is cleared whenever the CMS resets (halves).
913#[derive(Debug, Clone)]
914pub struct Doorkeeper {
915    bits: Vec<u64>,
916    num_bits: usize,
917}
918
919/// Default doorkeeper size in bits.
920const DOORKEEPER_DEFAULT_BITS: usize = 2048;
921
922impl Doorkeeper {
923    /// Create a new doorkeeper with the specified number of bits.
924    pub fn new(num_bits: usize) -> Self {
925        let num_bits = num_bits.max(64);
926        let num_words = num_bits.div_ceil(64);
927        Self {
928            bits: vec![0u64; num_words],
929            num_bits,
930        }
931    }
932
933    /// Create a doorkeeper with the default size (2048 bits).
934    pub fn with_defaults() -> Self {
935        Self::new(DOORKEEPER_DEFAULT_BITS)
936    }
937
938    /// Check if a key has been seen. Returns true if the bit was already set.
939    pub fn check_and_set(&mut self, hash: u64) -> bool {
940        let idx = (hash as usize) % self.num_bits;
941        let word = idx / 64;
942        let bit = idx % 64;
943        let was_set = (self.bits[word] >> bit) & 1 == 1;
944        self.bits[word] |= 1 << bit;
945        was_set
946    }
947
948    /// Check if a key has been seen without setting.
949    pub fn contains(&self, hash: u64) -> bool {
950        let idx = (hash as usize) % self.num_bits;
951        let word = idx / 64;
952        let bit = idx % 64;
953        (self.bits[word] >> bit) & 1 == 1
954    }
955
956    /// Clear all bits.
957    pub fn clear(&mut self) {
958        self.bits.fill(0);
959    }
960}
961
962/// Compute a secondary fingerprint hash for collision guard.
963///
964/// Uses a different multiplicative constant than FxHash to produce
965/// an independent 64-bit fingerprint.
966#[inline]
967pub fn fingerprint_hash(text: &str) -> u64 {
968    // Simple but effective: fold bytes with a different constant than FxHash.
969    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
970    for &b in text.as_bytes() {
971        h ^= b as u64;
972        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
973    }
974    h
975}
976
977/// Evaluate the TinyLFU admission rule.
978///
979/// Returns `true` if the candidate should be admitted (replacing the victim).
980///
981/// # Rule
982/// Admit if `freq(candidate) > freq(victim)`. On tie, reject (keep victim).
983#[inline]
984pub fn tinylfu_admit(candidate_freq: u8, victim_freq: u8) -> bool {
985    candidate_freq > victim_freq
986}
987
988// ---------------------------------------------------------------------------
989// W-TinyLFU Width Cache (bd-4kq0.6.2)
990// ---------------------------------------------------------------------------
991
992/// Entry in the TinyLFU cache, storing value and fingerprint for collision guard.
993#[derive(Debug, Clone)]
994struct TinyLfuEntry {
995    width: usize,
996    fingerprint: u64,
997}
998
999/// Width cache using W-TinyLFU admission policy.
1000///
1001/// Architecture:
1002/// - **Window cache** (small LRU, ~1% of capacity): captures recent items.
1003/// - **Main cache** (larger LRU, ~99% of capacity): for frequently accessed items.
1004/// - **Count-Min Sketch + Doorkeeper**: frequency estimation for admission decisions.
1005/// - **Fingerprint guard**: secondary hash per entry to detect hash collisions.
1006///
1007/// On every access:
1008/// 1. Check main cache → hit? Return value (verify fingerprint).
1009/// 2. Check window cache → hit? Return value (verify fingerprint).
1010/// 3. Miss: compute width, insert into window cache.
1011///
1012/// On window cache eviction:
1013/// 1. The evicted item becomes a candidate.
1014/// 2. The LRU victim of the main cache is identified.
1015/// 3. If `freq(candidate) > freq(victim)`, candidate enters main cache
1016///    (victim is evicted). Otherwise, candidate is discarded.
1017///
1018/// Frequency tracking uses Doorkeeper → CMS pipeline:
1019/// - First access: doorkeeper records.
1020/// - Second+ access: CMS is incremented.
1021#[derive(Debug)]
1022pub struct TinyLfuWidthCache {
1023    /// Small window cache (recency).
1024    window: LruCache<u64, TinyLfuEntry>,
1025    /// Large main cache (frequency-filtered).
1026    main: LruCache<u64, TinyLfuEntry>,
1027    /// Approximate frequency counter.
1028    sketch: CountMinSketch,
1029    /// One-hit-wonder filter.
1030    doorkeeper: Doorkeeper,
1031    /// Total capacity (window + main).
1032    total_capacity: usize,
1033    /// Hit/miss stats.
1034    hits: u64,
1035    misses: u64,
1036}
1037
1038impl TinyLfuWidthCache {
1039    /// Create a new TinyLFU cache with the given total capacity.
1040    ///
1041    /// The window gets ~1% of capacity (minimum 1), main gets the rest.
1042    pub fn new(total_capacity: usize) -> Self {
1043        let total_capacity = total_capacity.max(2);
1044        let window_cap = (total_capacity / 100).max(1);
1045        let main_cap = total_capacity - window_cap;
1046
1047        Self {
1048            window: LruCache::new(NonZeroUsize::new(window_cap).unwrap()),
1049            main: LruCache::new(NonZeroUsize::new(main_cap.max(1)).unwrap()),
1050            sketch: CountMinSketch::with_defaults(),
1051            doorkeeper: Doorkeeper::with_defaults(),
1052            total_capacity,
1053            hits: 0,
1054            misses: 0,
1055        }
1056    }
1057
1058    /// Get cached width or compute and cache it.
1059    pub fn get_or_compute(&mut self, text: &str) -> usize {
1060        self.get_or_compute_with(text, crate::display_width)
1061    }
1062
1063    /// Get cached width or compute using a custom function.
1064    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1065    where
1066        F: FnOnce(&str) -> usize,
1067    {
1068        let hash = hash_text(text);
1069        let fp = fingerprint_hash(text);
1070
1071        // Record frequency via doorkeeper → CMS pipeline.
1072        let seen = self.doorkeeper.check_and_set(hash);
1073        if seen {
1074            self.sketch.increment(hash);
1075        }
1076
1077        // Check main cache first (larger, higher value).
1078        if let Some(entry) = self.main.get(&hash) {
1079            if entry.fingerprint == fp {
1080                self.hits += 1;
1081                return entry.width;
1082            }
1083            // Fingerprint mismatch: collision. Evict stale entry.
1084            self.main.pop(&hash);
1085        }
1086
1087        // Check window cache.
1088        if let Some(entry) = self.window.get(&hash) {
1089            if entry.fingerprint == fp {
1090                self.hits += 1;
1091                return entry.width;
1092            }
1093            // Collision in window cache.
1094            self.window.pop(&hash);
1095        }
1096
1097        // Cache miss: compute width.
1098        self.misses += 1;
1099        let width = compute(text);
1100        let new_entry = TinyLfuEntry {
1101            width,
1102            fingerprint: fp,
1103        };
1104
1105        // Insert into window cache. If window is full, the evicted item
1106        // goes through admission filter for main cache.
1107        if self.window.len() >= self.window.cap().get() {
1108            // Get the LRU item from window before it's evicted.
1109            if let Some((evicted_hash, evicted_entry)) = self.window.pop_lru() {
1110                self.try_admit_to_main(evicted_hash, evicted_entry);
1111            }
1112        }
1113        self.window.put(hash, new_entry);
1114
1115        width
1116    }
1117
1118    /// Try to admit a candidate (evicted from window) into the main cache.
1119    fn try_admit_to_main(&mut self, candidate_hash: u64, candidate_entry: TinyLfuEntry) {
1120        let candidate_freq = self.sketch.estimate(candidate_hash);
1121
1122        if self.main.len() < self.main.cap().get() {
1123            // Main has room — admit unconditionally.
1124            self.main.put(candidate_hash, candidate_entry);
1125            return;
1126        }
1127
1128        // Main is full. Compare candidate frequency with the LRU victim.
1129        if let Some((&victim_hash, _)) = self.main.peek_lru() {
1130            let victim_freq = self.sketch.estimate(victim_hash);
1131            if tinylfu_admit(candidate_freq, victim_freq) {
1132                self.main.pop_lru();
1133                self.main.put(candidate_hash, candidate_entry);
1134            }
1135            // Otherwise, candidate is discarded.
1136        }
1137    }
1138
1139    /// Check if a key is in the cache (window or main).
1140    pub fn contains(&self, text: &str) -> bool {
1141        let hash = hash_text(text);
1142        let fp = fingerprint_hash(text);
1143        if let Some(e) = self.main.peek(&hash)
1144            && e.fingerprint == fp
1145        {
1146            return true;
1147        }
1148        if let Some(e) = self.window.peek(&hash)
1149            && e.fingerprint == fp
1150        {
1151            return true;
1152        }
1153        false
1154    }
1155
1156    /// Get cache statistics.
1157    pub fn stats(&self) -> CacheStats {
1158        CacheStats {
1159            hits: self.hits,
1160            misses: self.misses,
1161            size: self.window.len() + self.main.len(),
1162            capacity: self.total_capacity,
1163        }
1164    }
1165
1166    /// Clear all caches and reset sketch/doorkeeper.
1167    pub fn clear(&mut self) {
1168        self.window.clear();
1169        self.main.clear();
1170        self.sketch.clear();
1171        self.doorkeeper.clear();
1172    }
1173
1174    /// Reset statistics.
1175    pub fn reset_stats(&mut self) {
1176        self.hits = 0;
1177        self.misses = 0;
1178    }
1179
1180    /// Current number of cached entries.
1181    pub fn len(&self) -> usize {
1182        self.window.len() + self.main.len()
1183    }
1184
1185    /// Check if cache is empty.
1186    pub fn is_empty(&self) -> bool {
1187        self.window.is_empty() && self.main.is_empty()
1188    }
1189
1190    /// Total capacity (window + main).
1191    pub fn capacity(&self) -> usize {
1192        self.total_capacity
1193    }
1194
1195    /// Number of entries in the main cache.
1196    pub fn main_len(&self) -> usize {
1197        self.main.len()
1198    }
1199
1200    /// Number of entries in the window cache.
1201    pub fn window_len(&self) -> usize {
1202        self.window.len()
1203    }
1204}
1205
1206// ── S3-FIFO Width Cache ────────────────────────────────────────────────
1207
1208/// Width cache backed by S3-FIFO eviction (bd-l6yba.2).
1209///
1210/// Drop-in replacement for [`WidthCache`] that uses the scan-resistant
1211/// S3-FIFO eviction policy instead of LRU. This protects frequently-used
1212/// width entries from being evicted by one-time scan patterns (e.g. when a
1213/// large block of new text scrolls past).
1214///
1215/// Uses the same 64-bit FxHash keying as [`WidthCache`] with a secondary
1216/// FNV fingerprint for collision detection.
1217#[derive(Debug)]
1218pub struct S3FifoWidthCache {
1219    cache: ftui_core::s3_fifo::S3Fifo<u64, S3FifoEntry>,
1220    hits: u64,
1221    misses: u64,
1222    total_capacity: usize,
1223}
1224
1225/// Entry stored in the S3-FIFO width cache.
1226#[derive(Debug, Clone, Copy)]
1227struct S3FifoEntry {
1228    width: usize,
1229    fingerprint: u64,
1230}
1231
1232impl S3FifoWidthCache {
1233    /// Create a new S3-FIFO width cache with the given capacity.
1234    pub fn new(capacity: usize) -> Self {
1235        let capacity = capacity.max(2);
1236        Self {
1237            cache: ftui_core::s3_fifo::S3Fifo::new(capacity),
1238            hits: 0,
1239            misses: 0,
1240            total_capacity: capacity,
1241        }
1242    }
1243
1244    /// Create a new cache with the default capacity (4096 entries).
1245    #[must_use]
1246    pub fn with_default_capacity() -> Self {
1247        Self::new(DEFAULT_CACHE_CAPACITY)
1248    }
1249
1250    /// Get cached width or compute and cache it.
1251    pub fn get_or_compute(&mut self, text: &str) -> usize {
1252        self.get_or_compute_with(text, crate::display_width)
1253    }
1254
1255    /// Get cached width or compute using a custom function.
1256    pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1257    where
1258        F: FnOnce(&str) -> usize,
1259    {
1260        let hash = hash_text(text);
1261        let fp = fingerprint_hash(text);
1262
1263        if let Some(entry) = self.cache.get(&hash) {
1264            if entry.fingerprint == fp {
1265                self.hits += 1;
1266                return entry.width;
1267            }
1268            // Fingerprint mismatch: collision. Remove stale entry.
1269            self.cache.remove(&hash);
1270        }
1271
1272        // Cache miss: compute width.
1273        self.misses += 1;
1274        let width = compute(text);
1275        self.cache.insert(
1276            hash,
1277            S3FifoEntry {
1278                width,
1279                fingerprint: fp,
1280            },
1281        );
1282        width
1283    }
1284
1285    /// Check if a key is in the cache.
1286    pub fn contains(&self, text: &str) -> bool {
1287        let hash = hash_text(text);
1288        self.cache.contains_key(&hash)
1289    }
1290
1291    /// Get cache statistics.
1292    pub fn stats(&self) -> CacheStats {
1293        CacheStats {
1294            hits: self.hits,
1295            misses: self.misses,
1296            size: self.cache.len(),
1297            capacity: self.total_capacity,
1298        }
1299    }
1300
1301    /// Clear the cache.
1302    pub fn clear(&mut self) {
1303        self.cache.clear();
1304        self.hits = 0;
1305        self.misses = 0;
1306    }
1307
1308    /// Reset statistics.
1309    pub fn reset_stats(&mut self) {
1310        self.hits = 0;
1311        self.misses = 0;
1312    }
1313
1314    /// Current number of cached entries.
1315    pub fn len(&self) -> usize {
1316        self.cache.len()
1317    }
1318
1319    /// Check if cache is empty.
1320    pub fn is_empty(&self) -> bool {
1321        self.cache.is_empty()
1322    }
1323
1324    /// Total capacity.
1325    pub fn capacity(&self) -> usize {
1326        self.total_capacity
1327    }
1328}
1329
1330impl Default for S3FifoWidthCache {
1331    fn default() -> Self {
1332        Self::with_default_capacity()
1333    }
1334}
1335
1336#[cfg(test)]
1337mod s3_fifo_width_tests {
1338    use super::*;
1339
1340    #[test]
1341    fn s3fifo_new_cache_is_empty() {
1342        let cache = S3FifoWidthCache::new(100);
1343        assert!(cache.is_empty());
1344        assert_eq!(cache.len(), 0);
1345    }
1346
1347    #[test]
1348    fn s3fifo_get_or_compute_caches_value() {
1349        let mut cache = S3FifoWidthCache::new(100);
1350        let w1 = cache.get_or_compute("hello");
1351        assert_eq!(w1, 5);
1352        assert_eq!(cache.len(), 1);
1353
1354        let w2 = cache.get_or_compute("hello");
1355        assert_eq!(w2, 5);
1356        assert_eq!(cache.len(), 1);
1357
1358        let stats = cache.stats();
1359        assert_eq!(stats.hits, 1);
1360        assert_eq!(stats.misses, 1);
1361    }
1362
1363    #[test]
1364    fn s3fifo_different_strings() {
1365        let mut cache = S3FifoWidthCache::new(100);
1366        cache.get_or_compute("hello");
1367        cache.get_or_compute("world");
1368        cache.get_or_compute("foo");
1369        assert_eq!(cache.len(), 3);
1370    }
1371
1372    #[test]
1373    fn s3fifo_cjk_width() {
1374        let mut cache = S3FifoWidthCache::new(100);
1375        let w = cache.get_or_compute("你好");
1376        assert_eq!(w, 4);
1377    }
1378
1379    #[test]
1380    fn s3fifo_contains() {
1381        let mut cache = S3FifoWidthCache::new(100);
1382        assert!(!cache.contains("hello"));
1383        cache.get_or_compute("hello");
1384        assert!(cache.contains("hello"));
1385    }
1386
1387    #[test]
1388    fn s3fifo_clear_resets() {
1389        let mut cache = S3FifoWidthCache::new(100);
1390        cache.get_or_compute("hello");
1391        cache.get_or_compute("world");
1392        cache.clear();
1393        assert!(cache.is_empty());
1394        assert!(!cache.contains("hello"));
1395        let stats = cache.stats();
1396        assert_eq!(stats.hits, 0);
1397        assert_eq!(stats.misses, 0);
1398    }
1399
1400    #[test]
1401    fn s3fifo_produces_same_widths_as_lru() {
1402        let mut lru = WidthCache::new(100);
1403        let mut s3 = S3FifoWidthCache::new(100);
1404
1405        let texts = [
1406            "hello",
1407            "你好世界",
1408            "abc",
1409            "🎉🎉",
1410            "",
1411            " ",
1412            "a\tb",
1413            "mixed中english文",
1414        ];
1415
1416        for text in &texts {
1417            let lru_w = lru.get_or_compute(text);
1418            let s3_w = s3.get_or_compute(text);
1419            assert_eq!(lru_w, s3_w, "width mismatch for {:?}", text);
1420        }
1421    }
1422
1423    #[test]
1424    fn s3fifo_scan_resistance_preserves_hot_set() {
1425        let mut cache = S3FifoWidthCache::new(50);
1426
1427        // Build a hot set
1428        let hot: Vec<String> = (0..20).map(|i| format!("hot_{i}")).collect();
1429        for text in &hot {
1430            cache.get_or_compute(text);
1431            cache.get_or_compute(text); // access twice to set freq
1432        }
1433
1434        // Scan through a large one-time set
1435        for i in 0..200 {
1436            cache.get_or_compute(&format!("scan_{i}"));
1437        }
1438
1439        // Some hot items should survive
1440        let mut survivors = 0;
1441        for text in &hot {
1442            if cache.contains(text) {
1443                survivors += 1;
1444            }
1445        }
1446        assert!(
1447            survivors > 5,
1448            "scan resistance: only {survivors}/20 hot items survived"
1449        );
1450    }
1451
1452    #[test]
1453    fn s3fifo_default_capacity() {
1454        let cache = S3FifoWidthCache::with_default_capacity();
1455        assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
1456    }
1457
1458    #[test]
1459    fn s3fifo_reset_stats() {
1460        let mut cache = S3FifoWidthCache::new(100);
1461        cache.get_or_compute("a");
1462        cache.get_or_compute("a");
1463        cache.reset_stats();
1464        let stats = cache.stats();
1465        assert_eq!(stats.hits, 0);
1466        assert_eq!(stats.misses, 0);
1467    }
1468}
1469
1470#[cfg(test)]
1471mod proptests {
1472    use super::*;
1473    use proptest::prelude::*;
1474
1475    proptest! {
1476        #[test]
1477        fn cached_width_matches_direct(s in "[a-zA-Z0-9 ]{1,50}") {
1478            let mut cache = WidthCache::new(100);
1479            let cached = cache.get_or_compute(&s);
1480            let direct = crate::display_width(&s);
1481            prop_assert_eq!(cached, direct);
1482        }
1483
1484        #[test]
1485        fn second_access_is_hit(s in "[a-zA-Z0-9]{1,20}") {
1486            let mut cache = WidthCache::new(100);
1487
1488            cache.get_or_compute(&s);
1489            let stats_before = cache.stats();
1490
1491            cache.get_or_compute(&s);
1492            let stats_after = cache.stats();
1493
1494            prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1495            prop_assert_eq!(stats_after.misses, stats_before.misses);
1496        }
1497
1498        #[test]
1499        fn lru_never_exceeds_capacity(
1500            strings in prop::collection::vec("[a-z]{1,5}", 10..100),
1501            capacity in 5usize..20
1502        ) {
1503            let mut cache = WidthCache::new(capacity);
1504
1505            for s in &strings {
1506                cache.get_or_compute(s);
1507                prop_assert!(cache.len() <= capacity);
1508            }
1509        }
1510
1511        #[test]
1512        fn preload_then_access_is_hit(s in "[a-zA-Z]{1,20}") {
1513            let mut cache = WidthCache::new(100);
1514
1515            cache.preload(&s);
1516            let stats_before = cache.stats();
1517
1518            cache.get_or_compute(&s);
1519            let stats_after = cache.stats();
1520
1521            // Should be a hit (preloaded)
1522            prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1523        }
1524    }
1525}
1526
1527// ---------------------------------------------------------------------------
1528// TinyLFU Spec Tests (bd-4kq0.6.1)
1529// ---------------------------------------------------------------------------
1530
1531#[cfg(test)]
1532mod tinylfu_tests {
1533    use super::*;
1534
1535    // --- Count-Min Sketch ---
1536
1537    #[test]
1538    fn unit_cms_single_key_count() {
1539        let mut cms = CountMinSketch::with_defaults();
1540        let h = hash_text("hello");
1541
1542        for _ in 0..5 {
1543            cms.increment(h);
1544        }
1545        assert_eq!(cms.estimate(h), 5);
1546    }
1547
1548    #[test]
1549    fn unit_cms_unseen_key_is_zero() {
1550        let cms = CountMinSketch::with_defaults();
1551        assert_eq!(cms.estimate(hash_text("never_seen")), 0);
1552    }
1553
1554    #[test]
1555    fn unit_cms_saturates_at_max() {
1556        let mut cms = CountMinSketch::with_defaults();
1557        let h = hash_text("hot");
1558
1559        for _ in 0..100 {
1560            cms.increment(h);
1561        }
1562        assert_eq!(cms.estimate(h), CMS_MAX_COUNT);
1563    }
1564
1565    #[test]
1566    fn unit_cms_bounds() {
1567        // Error bound: estimate(x) <= true_count(x) + epsilon * N.
1568        // With w=1024, epsilon = e/1024 ~ 0.00266.
1569        let mut cms = CountMinSketch::new(1024, 4, u64::MAX); // no reset
1570        let n: u64 = 1000;
1571
1572        // Insert 1000 unique keys
1573        for i in 0..n {
1574            cms.increment(i.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1));
1575        }
1576
1577        // Check a target key inserted exactly 5 times
1578        let target = 0xDEAD_BEEF_u64;
1579        for _ in 0..5 {
1580            cms.increment(target);
1581        }
1582
1583        let est = cms.estimate(target);
1584        let epsilon = std::f64::consts::E / 1024.0;
1585        let upper_bound = 5.0 + epsilon * (n + 5) as f64;
1586
1587        assert!(
1588            (est as f64) <= upper_bound,
1589            "estimate {} exceeds bound {:.1} (epsilon={:.5}, N={})",
1590            est,
1591            upper_bound,
1592            epsilon,
1593            n + 5,
1594        );
1595        assert!(est >= 5, "estimate {} should be >= true count 5", est);
1596    }
1597
1598    #[test]
1599    fn unit_cms_bounds_mass_test() {
1600        let mut cms = CountMinSketch::new(1024, 4, u64::MAX);
1601        let n = 2000u64;
1602
1603        let mut true_counts = vec![0u8; n as usize];
1604        for i in 0..n {
1605            let count = (i % 10 + 1) as u8;
1606            true_counts[i as usize] = count;
1607            for _ in 0..count {
1608                cms.increment(i);
1609            }
1610        }
1611
1612        let total = cms.total_increments();
1613        let epsilon = std::f64::consts::E / 1024.0;
1614        let mut violations = 0u32;
1615
1616        for i in 0..n {
1617            let est = cms.estimate(i);
1618            let true_c = true_counts[i as usize];
1619            let upper = true_c as f64 + epsilon * total as f64;
1620            if est as f64 > upper + 0.5 {
1621                violations += 1;
1622            }
1623            assert!(
1624                est >= true_c,
1625                "key {}: estimate {} < true count {}",
1626                i,
1627                est,
1628                true_c
1629            );
1630        }
1631
1632        // delta = (1/2)^4 = 0.0625; allow generous threshold
1633        let violation_rate = violations as f64 / n as f64;
1634        assert!(
1635            violation_rate <= 0.10,
1636            "violation rate {:.3} exceeds delta threshold",
1637            violation_rate,
1638        );
1639    }
1640
1641    #[test]
1642    fn unit_cms_halving_ages_counts() {
1643        let mut cms = CountMinSketch::new(64, 2, 100);
1644
1645        let h = hash_text("test");
1646        for _ in 0..10 {
1647            cms.increment(h);
1648        }
1649        assert_eq!(cms.estimate(h), 10);
1650
1651        // Trigger reset by reaching reset_interval
1652        for _ in 10..100 {
1653            cms.increment(hash_text("noise"));
1654        }
1655
1656        let est = cms.estimate(h);
1657        assert!(est <= 5, "After halving, estimate {} should be <= 5", est);
1658    }
1659
1660    #[test]
1661    fn unit_cms_monotone() {
1662        let mut cms = CountMinSketch::with_defaults();
1663        let h = hash_text("key");
1664
1665        let mut prev_est = 0u8;
1666        for _ in 0..CMS_MAX_COUNT {
1667            cms.increment(h);
1668            let est = cms.estimate(h);
1669            assert!(est >= prev_est, "estimate should be monotone");
1670            prev_est = est;
1671        }
1672    }
1673
1674    // --- Doorkeeper ---
1675
1676    #[test]
1677    fn unit_doorkeeper_first_access_returns_false() {
1678        let mut dk = Doorkeeper::with_defaults();
1679        assert!(!dk.check_and_set(hash_text("new")));
1680    }
1681
1682    #[test]
1683    fn unit_doorkeeper_second_access_returns_true() {
1684        let mut dk = Doorkeeper::with_defaults();
1685        let h = hash_text("key");
1686        dk.check_and_set(h);
1687        assert!(dk.check_and_set(h));
1688    }
1689
1690    #[test]
1691    fn unit_doorkeeper_contains() {
1692        let mut dk = Doorkeeper::with_defaults();
1693        let h = hash_text("key");
1694        assert!(!dk.contains(h));
1695        dk.check_and_set(h);
1696        assert!(dk.contains(h));
1697    }
1698
1699    #[test]
1700    fn unit_doorkeeper_clear_resets() {
1701        let mut dk = Doorkeeper::with_defaults();
1702        let h = hash_text("key");
1703        dk.check_and_set(h);
1704        dk.clear();
1705        assert!(!dk.contains(h));
1706        assert!(!dk.check_and_set(h));
1707    }
1708
1709    #[test]
1710    fn unit_doorkeeper_false_positive_rate() {
1711        let mut dk = Doorkeeper::new(2048);
1712        let n = 100u64;
1713
1714        for i in 0..n {
1715            dk.check_and_set(i * 0x9E37_79B9 + 1);
1716        }
1717
1718        let mut false_positives = 0u32;
1719        for i in 0..1000 {
1720            let h = (i + 100_000) * 0x6A09_E667 + 7;
1721            if dk.contains(h) {
1722                false_positives += 1;
1723            }
1724        }
1725
1726        // k=1, m=2048, n=100: FP rate ~ 1 - e^{-100/2048} ~ 0.048
1727        let fp_rate = false_positives as f64 / 1000.0;
1728        assert!(
1729            fp_rate < 0.15,
1730            "FP rate {:.3} too high (expected < 0.15)",
1731            fp_rate,
1732        );
1733    }
1734
1735    // --- Admission Rule ---
1736
1737    #[test]
1738    fn unit_admission_rule() {
1739        assert!(tinylfu_admit(5, 3)); // candidate > victim -> admit
1740        assert!(!tinylfu_admit(3, 5)); // candidate < victim -> reject
1741        assert!(!tinylfu_admit(3, 3)); // tie -> reject (keep victim)
1742    }
1743
1744    #[test]
1745    fn unit_admission_rule_extremes() {
1746        assert!(tinylfu_admit(1, 0));
1747        assert!(!tinylfu_admit(0, 0));
1748        assert!(!tinylfu_admit(0, 1));
1749        assert!(tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT - 1));
1750        assert!(!tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT));
1751    }
1752
1753    // --- Fingerprint Guard ---
1754
1755    #[test]
1756    fn unit_fingerprint_guard() {
1757        let fp1 = fingerprint_hash("hello");
1758        let fp2 = fingerprint_hash("world");
1759        let fp3 = fingerprint_hash("hello");
1760
1761        assert_ne!(
1762            fp1, fp2,
1763            "Different strings should have different fingerprints"
1764        );
1765        assert_eq!(fp1, fp3, "Same string should have same fingerprint");
1766    }
1767
1768    #[test]
1769    fn unit_fingerprint_guard_collision_rate() {
1770        let mut fps = std::collections::HashSet::new();
1771        let n = 10_000;
1772
1773        for i in 0..n {
1774            let s = format!("string_{}", i);
1775            fps.insert(fingerprint_hash(&s));
1776        }
1777
1778        let collisions = n - fps.len();
1779        assert!(
1780            collisions == 0,
1781            "Expected 0 collisions in 10k items, got {}",
1782            collisions,
1783        );
1784    }
1785
1786    #[test]
1787    fn unit_fingerprint_independent_of_primary_hash() {
1788        let text = "test_string";
1789        let primary = hash_text(text);
1790        let secondary = fingerprint_hash(text);
1791
1792        assert_ne!(
1793            primary, secondary,
1794            "Fingerprint and primary hash should differ"
1795        );
1796    }
1797
1798    // --- Integration: Doorkeeper + CMS pipeline ---
1799
1800    #[test]
1801    fn unit_doorkeeper_cms_pipeline() {
1802        let mut dk = Doorkeeper::with_defaults();
1803        let mut cms = CountMinSketch::with_defaults();
1804        let h = hash_text("item");
1805
1806        // First access: doorkeeper records
1807        assert!(!dk.check_and_set(h));
1808        assert_eq!(cms.estimate(h), 0);
1809
1810        // Second access: doorkeeper confirms, CMS incremented
1811        assert!(dk.check_and_set(h));
1812        cms.increment(h);
1813        assert_eq!(cms.estimate(h), 1);
1814
1815        // Third access
1816        assert!(dk.check_and_set(h));
1817        cms.increment(h);
1818        assert_eq!(cms.estimate(h), 2);
1819    }
1820
1821    #[test]
1822    fn unit_doorkeeper_filters_one_hit_wonders() {
1823        let mut dk = Doorkeeper::with_defaults();
1824        let mut cms = CountMinSketch::with_defaults();
1825
1826        // 100 one-hit items
1827        for i in 0u64..100 {
1828            let h = i * 0x9E37_79B9 + 1;
1829            let seen = dk.check_and_set(h);
1830            if seen {
1831                cms.increment(h);
1832            }
1833        }
1834
1835        assert_eq!(cms.total_increments(), 0);
1836
1837        // Access one again -> passes doorkeeper
1838        let h = 1; // i=0 from the loop above
1839        assert!(dk.check_and_set(h));
1840        cms.increment(h);
1841        assert_eq!(cms.total_increments(), 1);
1842    }
1843}
1844
1845// ---------------------------------------------------------------------------
1846// TinyLFU Implementation Tests (bd-4kq0.6.2)
1847// ---------------------------------------------------------------------------
1848
1849#[cfg(test)]
1850mod tinylfu_impl_tests {
1851    use super::*;
1852
1853    #[test]
1854    fn basic_get_or_compute() {
1855        let mut cache = TinyLfuWidthCache::new(100);
1856        let w = cache.get_or_compute("hello");
1857        assert_eq!(w, 5);
1858        assert_eq!(cache.len(), 1);
1859
1860        let w2 = cache.get_or_compute("hello");
1861        assert_eq!(w2, 5);
1862        let stats = cache.stats();
1863        assert_eq!(stats.misses, 1);
1864        assert_eq!(stats.hits, 1);
1865    }
1866
1867    #[test]
1868    fn window_to_main_promotion() {
1869        // With capacity=100, window=1, main=99.
1870        // Fill window, then force eviction into main via new inserts.
1871        let mut cache = TinyLfuWidthCache::new(100);
1872
1873        // Access "frequent" many times to build CMS frequency.
1874        for _ in 0..10 {
1875            cache.get_or_compute("frequent");
1876        }
1877
1878        // Insert enough items to fill window and force eviction.
1879        for i in 0..5 {
1880            cache.get_or_compute(&format!("item_{}", i));
1881        }
1882
1883        // "frequent" should have been promoted to main cache via admission.
1884        assert!(cache.contains("frequent"));
1885        assert!(cache.main_len() > 0 || cache.window_len() > 0);
1886    }
1887
1888    #[test]
1889    fn unit_window_promotion() {
1890        // Frequent items should end up in main cache.
1891        let mut cache = TinyLfuWidthCache::new(50);
1892
1893        // Access "hot" repeatedly to build frequency.
1894        for _ in 0..20 {
1895            cache.get_or_compute("hot");
1896        }
1897
1898        // Now push enough items through window to force "hot" out of window.
1899        for i in 0..10 {
1900            cache.get_or_compute(&format!("filler_{}", i));
1901        }
1902
1903        // "hot" should still be accessible (promoted to main via admission).
1904        assert!(cache.contains("hot"), "Frequent item should be retained");
1905    }
1906
1907    #[test]
1908    fn fingerprint_guard_detects_collision() {
1909        let mut cache = TinyLfuWidthCache::new(100);
1910
1911        // Compute "hello" with custom function.
1912        let w = cache.get_or_compute_with("hello", |_| 42);
1913        assert_eq!(w, 42);
1914
1915        // Verify it's cached.
1916        assert!(cache.contains("hello"));
1917    }
1918
1919    #[test]
1920    fn admission_rejects_infrequent() {
1921        // Fill main cache with frequently-accessed items.
1922        // Then try to insert a cold item — it should be rejected.
1923        let mut cache = TinyLfuWidthCache::new(10); // window=1, main=9
1924
1925        // Fill main with items accessed multiple times.
1926        for i in 0..9 {
1927            let s = format!("hot_{}", i);
1928            for _ in 0..5 {
1929                cache.get_or_compute(&s);
1930            }
1931        }
1932
1933        // Now insert cold items. They should go through window but not
1934        // necessarily get into main.
1935        for i in 0..20 {
1936            cache.get_or_compute(&format!("cold_{}", i));
1937        }
1938
1939        // Hot items should mostly survive (they have high frequency).
1940        let hot_survivors: usize = (0..9)
1941            .filter(|i| cache.contains(&format!("hot_{}", i)))
1942            .count();
1943        assert!(
1944            hot_survivors >= 5,
1945            "Expected most hot items to survive, got {}/9",
1946            hot_survivors
1947        );
1948    }
1949
1950    #[test]
1951    fn clear_empties_everything() {
1952        let mut cache = TinyLfuWidthCache::new(100);
1953        cache.get_or_compute("a");
1954        cache.get_or_compute("b");
1955        cache.clear();
1956        assert!(cache.is_empty());
1957        assert_eq!(cache.len(), 0);
1958    }
1959
1960    #[test]
1961    fn stats_reflect_usage() {
1962        let mut cache = TinyLfuWidthCache::new(100);
1963        cache.get_or_compute("a");
1964        cache.get_or_compute("a");
1965        cache.get_or_compute("b");
1966
1967        let stats = cache.stats();
1968        assert_eq!(stats.misses, 2);
1969        assert_eq!(stats.hits, 1);
1970        assert_eq!(stats.size, 2);
1971    }
1972
1973    #[test]
1974    fn capacity_is_respected() {
1975        let mut cache = TinyLfuWidthCache::new(20);
1976
1977        for i in 0..100 {
1978            cache.get_or_compute(&format!("item_{}", i));
1979        }
1980
1981        assert!(
1982            cache.len() <= 20,
1983            "Cache size {} exceeds capacity 20",
1984            cache.len()
1985        );
1986    }
1987
1988    #[test]
1989    fn reset_stats_works() {
1990        let mut cache = TinyLfuWidthCache::new(100);
1991        cache.get_or_compute("x");
1992        cache.get_or_compute("x");
1993        cache.reset_stats();
1994        let stats = cache.stats();
1995        assert_eq!(stats.hits, 0);
1996        assert_eq!(stats.misses, 0);
1997    }
1998
1999    #[test]
2000    fn perf_cache_hit_rate() {
2001        // Simulate a Zipfian-like workload: some items accessed frequently,
2002        // many accessed rarely. TinyLFU should achieve decent hit rate.
2003        let mut cache = TinyLfuWidthCache::new(50);
2004
2005        // 10 hot items accessed 20 times each.
2006        for _ in 0..20 {
2007            for i in 0..10 {
2008                cache.get_or_compute(&format!("hot_{}", i));
2009            }
2010        }
2011
2012        // 100 cold items accessed once each.
2013        for i in 0..100 {
2014            cache.get_or_compute(&format!("cold_{}", i));
2015        }
2016
2017        // Re-access hot items — these should mostly be hits.
2018        cache.reset_stats();
2019        for i in 0..10 {
2020            cache.get_or_compute(&format!("hot_{}", i));
2021        }
2022
2023        let stats = cache.stats();
2024        // Hot items should have high hit rate after being frequently accessed.
2025        assert!(
2026            stats.hits >= 5,
2027            "Expected at least 5/10 hot items to hit, got {}",
2028            stats.hits
2029        );
2030    }
2031
2032    #[test]
2033    fn unicode_strings_work() {
2034        let mut cache = TinyLfuWidthCache::new(100);
2035        assert_eq!(cache.get_or_compute("日本語"), 6);
2036        assert_eq!(cache.get_or_compute("café"), 4);
2037        assert_eq!(cache.get_or_compute("日本語"), 6); // hit
2038        assert_eq!(cache.stats().hits, 1);
2039    }
2040
2041    #[test]
2042    fn empty_string() {
2043        let mut cache = TinyLfuWidthCache::new(100);
2044        assert_eq!(cache.get_or_compute(""), 0);
2045    }
2046
2047    #[test]
2048    fn minimum_capacity() {
2049        let cache = TinyLfuWidthCache::new(0);
2050        assert!(cache.capacity() >= 2);
2051    }
2052}
2053
2054// ---------------------------------------------------------------------------
2055// bd-4kq0.6.3: WidthCache Tests + Perf Gates
2056// ---------------------------------------------------------------------------
2057
2058/// Deterministic LCG for test reproducibility (no external rand dependency).
2059#[cfg(test)]
2060struct Lcg(u64);
2061
2062#[cfg(test)]
2063impl Lcg {
2064    fn new(seed: u64) -> Self {
2065        Self(seed)
2066    }
2067    fn next_u64(&mut self) -> u64 {
2068        self.0 = self
2069            .0
2070            .wrapping_mul(6_364_136_223_846_793_005)
2071            .wrapping_add(1);
2072        self.0
2073    }
2074    fn next_usize(&mut self, max: usize) -> usize {
2075        (self.next_u64() % (max as u64)) as usize
2076    }
2077}
2078
2079// ---------------------------------------------------------------------------
2080// 1. Property: cache equivalence (cached width == computed width)
2081// ---------------------------------------------------------------------------
2082
2083#[cfg(test)]
2084mod property_cache_equivalence {
2085    use super::*;
2086    use proptest::prelude::*;
2087
2088    proptest! {
2089        #[test]
2090        fn tinylfu_cached_equals_computed(s in "[a-zA-Z0-9 ]{1,80}") {
2091            let mut cache = TinyLfuWidthCache::new(200);
2092            let cached = cache.get_or_compute(&s);
2093            let direct = crate::display_width(&s);
2094            prop_assert_eq!(cached, direct,
2095                "TinyLFU returned {} but display_width says {} for {:?}", cached, direct, s);
2096        }
2097
2098        #[test]
2099        fn tinylfu_second_access_same_value(s in "[a-zA-Z0-9]{1,40}") {
2100            let mut cache = TinyLfuWidthCache::new(200);
2101            let first = cache.get_or_compute(&s);
2102            let second = cache.get_or_compute(&s);
2103            prop_assert_eq!(first, second,
2104                "First access returned {} but second returned {} for {:?}", first, second, s);
2105        }
2106
2107        #[test]
2108        fn tinylfu_never_exceeds_capacity(
2109            strings in prop::collection::vec("[a-z]{1,5}", 10..200),
2110            capacity in 10usize..50
2111        ) {
2112            let mut cache = TinyLfuWidthCache::new(capacity);
2113            for s in &strings {
2114                cache.get_or_compute(s);
2115                prop_assert!(cache.len() <= capacity,
2116                    "Cache size {} exceeded capacity {}", cache.len(), capacity);
2117            }
2118        }
2119
2120        #[test]
2121        fn tinylfu_custom_fn_matches(s in "[a-z]{1,20}") {
2122            let mut cache = TinyLfuWidthCache::new(100);
2123            let custom_fn = |text: &str| text.len(); // byte length as custom metric
2124            let cached = cache.get_or_compute_with(&s, custom_fn);
2125            prop_assert_eq!(cached, s.len(),
2126                "Custom fn: cached {} != expected {} for {:?}", cached, s.len(), s);
2127        }
2128    }
2129
2130    #[test]
2131    fn deterministic_seed_equivalence() {
2132        // Same workload with same seed → same results every time.
2133        let mut rng = super::Lcg::new(0xDEAD_BEEF);
2134
2135        let mut cache1 = TinyLfuWidthCache::new(50);
2136        let mut results1 = Vec::new();
2137        for _ in 0..500 {
2138            let idx = rng.next_usize(100);
2139            let s = format!("key_{}", idx);
2140            results1.push(cache1.get_or_compute(&s));
2141        }
2142
2143        let mut rng2 = super::Lcg::new(0xDEAD_BEEF);
2144        let mut cache2 = TinyLfuWidthCache::new(50);
2145        let mut results2 = Vec::new();
2146        for _ in 0..500 {
2147            let idx = rng2.next_usize(100);
2148            let s = format!("key_{}", idx);
2149            results2.push(cache2.get_or_compute(&s));
2150        }
2151
2152        assert_eq!(
2153            results1, results2,
2154            "Deterministic seed must produce identical results"
2155        );
2156    }
2157
2158    #[test]
2159    fn both_caches_agree_on_widths() {
2160        // WidthCache (plain LRU) and TinyLfuWidthCache must return the same
2161        // widths for any input.
2162        let mut lru = WidthCache::new(200);
2163        let mut tlfu = TinyLfuWidthCache::new(200);
2164
2165        let inputs = [
2166            "",
2167            "hello",
2168            "日本語テスト",
2169            "café résumé",
2170            "a\tb",
2171            "🎉🎊🎈",
2172            "mixed日本eng",
2173            "    spaces    ",
2174            "AAAAAAAAAAAAAAAAAAAAAAAAA",
2175            "x",
2176        ];
2177
2178        for &s in &inputs {
2179            let w1 = lru.get_or_compute(s);
2180            let w2 = tlfu.get_or_compute(s);
2181            assert_eq!(
2182                w1, w2,
2183                "Width mismatch for {:?}: LRU={}, TinyLFU={}",
2184                s, w1, w2
2185            );
2186        }
2187    }
2188}
2189
2190// ---------------------------------------------------------------------------
2191// 2. E2E cache replay: deterministic workload, log hit rate + latency (JSONL)
2192// ---------------------------------------------------------------------------
2193
2194#[cfg(test)]
2195mod e2e_cache_replay {
2196    use super::*;
2197    use std::time::Instant;
2198
2199    /// A single replay record, serialisable to JSONL.
2200    #[derive(Debug)]
2201    struct ReplayRecord {
2202        step: usize,
2203        key: String,
2204        width: usize,
2205        hit: bool,
2206        latency_ns: u128,
2207    }
2208
2209    impl ReplayRecord {
2210        fn to_jsonl(&self) -> String {
2211            format!(
2212                r#"{{"step":{},"key":"{}","width":{},"hit":{},"latency_ns":{}}}"#,
2213                self.step, self.key, self.width, self.hit, self.latency_ns,
2214            )
2215        }
2216    }
2217
2218    fn zipfian_workload(rng: &mut super::Lcg, n: usize, universe: usize) -> Vec<String> {
2219        // Approximate Zipfian: lower indices are much more frequent.
2220        (0..n)
2221            .map(|_| {
2222                let raw = rng.next_usize(universe * universe);
2223                let idx = (raw as f64).sqrt() as usize % universe;
2224                format!("item_{}", idx)
2225            })
2226            .collect()
2227    }
2228
2229    #[test]
2230    fn replay_zipfian_tinylfu() {
2231        let mut rng = super::Lcg::new(0x1234_5678);
2232        let workload = zipfian_workload(&mut rng, 2000, 200);
2233
2234        let mut cache = TinyLfuWidthCache::new(50);
2235        let mut records = Vec::with_capacity(workload.len());
2236
2237        for (i, key) in workload.iter().enumerate() {
2238            let stats_before = cache.stats();
2239            let t0 = Instant::now();
2240            let width = cache.get_or_compute(key);
2241            let elapsed = t0.elapsed().as_nanos();
2242            let stats_after = cache.stats();
2243            let hit = stats_after.hits > stats_before.hits;
2244
2245            records.push(ReplayRecord {
2246                step: i,
2247                key: key.clone(),
2248                width,
2249                hit,
2250                latency_ns: elapsed,
2251            });
2252        }
2253
2254        // Validate JSONL serialisation is parseable.
2255        for r in &records[..5] {
2256            let line = r.to_jsonl();
2257            assert!(
2258                line.starts_with('{') && line.ends_with('}'),
2259                "Bad JSONL: {}",
2260                line
2261            );
2262        }
2263
2264        // Compute aggregate stats.
2265        let total = records.len();
2266        let hits = records.iter().filter(|r| r.hit).count();
2267        let hit_rate = hits as f64 / total as f64;
2268
2269        // Zipfian workload with 50-entry cache over 200-item universe
2270        // should get a meaningful hit rate (> 10%).
2271        assert!(
2272            hit_rate > 0.10,
2273            "Zipfian hit rate too low: {:.2}% ({}/{})",
2274            hit_rate * 100.0,
2275            hits,
2276            total
2277        );
2278    }
2279
2280    #[test]
2281    fn replay_zipfian_lru_comparison() {
2282        let mut rng = super::Lcg::new(0x1234_5678);
2283        let workload = zipfian_workload(&mut rng, 2000, 200);
2284
2285        let mut tlfu = TinyLfuWidthCache::new(50);
2286        let mut lru = WidthCache::new(50);
2287
2288        for key in &workload {
2289            tlfu.get_or_compute(key);
2290            lru.get_or_compute(key);
2291        }
2292
2293        let tlfu_stats = tlfu.stats();
2294        let lru_stats = lru.stats();
2295
2296        // Both must have correct total operations.
2297        assert_eq!(tlfu_stats.hits + tlfu_stats.misses, 2000);
2298        assert_eq!(lru_stats.hits + lru_stats.misses, 2000);
2299
2300        // TinyLFU should be at least competitive with plain LRU on Zipfian.
2301        // (In practice TinyLFU often wins, but we just check both work.)
2302        assert!(
2303            tlfu_stats.hit_rate() >= lru_stats.hit_rate() * 0.8,
2304            "TinyLFU hit rate {:.2}% much worse than LRU {:.2}%",
2305            tlfu_stats.hit_rate() * 100.0,
2306            lru_stats.hit_rate() * 100.0,
2307        );
2308    }
2309
2310    #[test]
2311    fn replay_deterministic_reproduction() {
2312        // Two identical replays must produce identical hit/miss sequences.
2313        let run = |seed: u64| -> Vec<bool> {
2314            let mut rng = super::Lcg::new(seed);
2315            let workload = zipfian_workload(&mut rng, 500, 100);
2316            let mut cache = TinyLfuWidthCache::new(30);
2317            let mut hits = Vec::with_capacity(500);
2318            for key in &workload {
2319                let before = cache.stats().hits;
2320                cache.get_or_compute(key);
2321                hits.push(cache.stats().hits > before);
2322            }
2323            hits
2324        };
2325
2326        let run1 = run(0xABCD_EF01);
2327        let run2 = run(0xABCD_EF01);
2328        assert_eq!(run1, run2, "Deterministic replay diverged");
2329    }
2330
2331    #[test]
2332    fn replay_uniform_workload() {
2333        // Uniform access pattern: every key equally likely. Hit rate should be
2334        // roughly cache_size / universe_size for large N.
2335        let mut rng = super::Lcg::new(0x9999);
2336        let universe = 100;
2337        let cache_size = 25;
2338        let n = 5000;
2339
2340        let mut cache = TinyLfuWidthCache::new(cache_size);
2341
2342        // Warm up: access each key once.
2343        for i in 0..universe {
2344            cache.get_or_compute(&format!("u_{}", i));
2345        }
2346
2347        cache.reset_stats();
2348        for _ in 0..n {
2349            let idx = rng.next_usize(universe);
2350            cache.get_or_compute(&format!("u_{}", idx));
2351        }
2352
2353        let stats = cache.stats();
2354        let hit_rate = stats.hit_rate();
2355        // Theoretical: ~25/100 = 25%. Allow range 10%–60%.
2356        assert!(
2357            hit_rate > 0.10 && hit_rate < 0.60,
2358            "Uniform hit rate {:.2}% outside expected range",
2359            hit_rate * 100.0,
2360        );
2361    }
2362}
2363
2364// ---------------------------------------------------------------------------
2365// 3. Perf gates: cache operations < 1us p95
2366// ---------------------------------------------------------------------------
2367
2368#[cfg(test)]
2369mod perf_cache_overhead {
2370    use super::*;
2371    use std::time::Instant;
2372
2373    /// Collect latencies for N operations, return sorted Vec<u128> in nanoseconds.
2374    fn measure_latencies<F: FnMut(usize)>(n: usize, mut op: F) -> Vec<u128> {
2375        let mut latencies = Vec::with_capacity(n);
2376        for i in 0..n {
2377            let t0 = Instant::now();
2378            op(i);
2379            latencies.push(t0.elapsed().as_nanos());
2380        }
2381        latencies.sort_unstable();
2382        latencies
2383    }
2384
2385    fn p95(sorted: &[u128]) -> u128 {
2386        let len = sorted.len();
2387        let idx = ((len as f64 * 0.95) as usize).min(len.saturating_sub(1));
2388        sorted[idx]
2389    }
2390
2391    fn p99(sorted: &[u128]) -> u128 {
2392        let len = sorted.len();
2393        let idx = ((len as f64 * 0.99) as usize).min(len.saturating_sub(1));
2394        sorted[idx]
2395    }
2396
2397    fn median(sorted: &[u128]) -> u128 {
2398        sorted[sorted.len() / 2]
2399    }
2400
2401    #[allow(unexpected_cfgs)]
2402    fn perf_budget_ns(base_ns: u128) -> u128 {
2403        if cfg!(coverage) || cfg!(coverage_nightly) {
2404            base_ns.saturating_mul(10)
2405        } else {
2406            base_ns
2407        }
2408    }
2409
2410    #[test]
2411    fn perf_lru_hit_latency() {
2412        let mut cache = WidthCache::new(1000);
2413        // Warm up.
2414        for i in 0..100 {
2415            cache.get_or_compute(&format!("warm_{}", i));
2416        }
2417
2418        let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2419        let latencies = measure_latencies(10_000, |i| {
2420            let _ = cache.get_or_compute(&keys[i % 100]);
2421        });
2422
2423        let p95_ns = p95(&latencies);
2424        // Budget: < 1us (1000ns) p95 for cache hits.
2425        // Use generous 5us to account for CI variability (10x under coverage).
2426        let budget_ns = perf_budget_ns(5_000);
2427        assert!(
2428            p95_ns < budget_ns,
2429            "LRU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2430            p95_ns,
2431            budget_ns,
2432            median(&latencies),
2433            p99(&latencies),
2434        );
2435    }
2436
2437    #[test]
2438    fn perf_tinylfu_hit_latency() {
2439        let mut cache = TinyLfuWidthCache::new(1000);
2440        // Warm up: access each key multiple times to ensure promotion to main.
2441        for _ in 0..5 {
2442            for i in 0..100 {
2443                cache.get_or_compute(&format!("warm_{}", i));
2444            }
2445        }
2446
2447        let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2448        let latencies = measure_latencies(10_000, |i| {
2449            let _ = cache.get_or_compute(&keys[i % 100]);
2450        });
2451
2452        let p95_ns = p95(&latencies);
2453        // Budget: < 1us p95 for hits (5us CI-safe threshold).
2454        let budget_ns = perf_budget_ns(5_000);
2455        assert!(
2456            p95_ns < budget_ns,
2457            "TinyLFU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2458            p95_ns,
2459            budget_ns,
2460            median(&latencies),
2461            p99(&latencies),
2462        );
2463    }
2464
2465    #[test]
2466    fn perf_tinylfu_miss_latency() {
2467        let mut cache = TinyLfuWidthCache::new(100);
2468        let keys: Vec<String> = (0..10_000).map(|i| format!("miss_{}", i)).collect();
2469
2470        let latencies = measure_latencies(10_000, |i| {
2471            let _ = cache.get_or_compute(&keys[i]);
2472        });
2473
2474        let p95_ns = p95(&latencies);
2475        // Misses include unicode width computation. Budget: < 5us p95.
2476        // Use 20us CI-safe threshold (10x under coverage; computation dominates).
2477        let budget_ns = perf_budget_ns(20_000);
2478        assert!(
2479            p95_ns < budget_ns,
2480            "TinyLFU miss p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2481            p95_ns,
2482            budget_ns,
2483            median(&latencies),
2484            p99(&latencies),
2485        );
2486    }
2487
2488    #[test]
2489    fn perf_cms_increment_latency() {
2490        let mut cms = CountMinSketch::with_defaults();
2491        let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("k{}", i))).collect();
2492
2493        let latencies = measure_latencies(10_000, |i| {
2494            cms.increment(hashes[i]);
2495        });
2496
2497        let p95_ns = p95(&latencies);
2498        // CMS increment should be very fast: < 500ns p95.
2499        let budget_ns = perf_budget_ns(2_000);
2500        assert!(
2501            p95_ns < budget_ns,
2502            "CMS increment p95 = {}ns exceeds {}ns budget (median={}ns)",
2503            p95_ns,
2504            budget_ns,
2505            median(&latencies),
2506        );
2507    }
2508
2509    #[test]
2510    fn perf_doorkeeper_latency() {
2511        let mut dk = Doorkeeper::with_defaults();
2512        let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("d{}", i))).collect();
2513
2514        let latencies = measure_latencies(10_000, |i| {
2515            let _ = dk.check_and_set(hashes[i]);
2516        });
2517
2518        let p95_ns = p95(&latencies);
2519        // Doorkeeper bit ops: < 200ns p95.
2520        let budget_ns = perf_budget_ns(1_000);
2521        assert!(
2522            p95_ns < budget_ns,
2523            "Doorkeeper p95 = {}ns exceeds {}ns budget (median={}ns)",
2524            p95_ns,
2525            budget_ns,
2526            median(&latencies),
2527        );
2528    }
2529
2530    #[test]
2531    fn perf_fingerprint_hash_latency() {
2532        let keys: Vec<String> = (0..10_000).map(|i| format!("fp_{}", i)).collect();
2533
2534        let latencies = measure_latencies(10_000, |i| {
2535            let _ = fingerprint_hash(&keys[i]);
2536        });
2537
2538        let p95_ns = p95(&latencies);
2539        // FNV hash: < 200ns p95.
2540        let budget_ns = perf_budget_ns(1_000);
2541        assert!(
2542            p95_ns < budget_ns,
2543            "fingerprint_hash p95 = {}ns exceeds {}ns budget (median={}ns)",
2544            p95_ns,
2545            budget_ns,
2546            median(&latencies),
2547        );
2548    }
2549}