1#![forbid(unsafe_code)]
2
3use lru::LruCache;
30use rustc_hash::FxHasher;
31use std::hash::{Hash, Hasher};
32use std::num::NonZeroUsize;
33
34pub const DEFAULT_CACHE_CAPACITY: usize = 4096;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub struct CacheStats {
40 pub hits: u64,
42 pub misses: u64,
44 pub size: usize,
46 pub capacity: usize,
48}
49
50impl CacheStats {
51 #[must_use]
53 pub fn hit_rate(&self) -> f64 {
54 let total = self.hits + self.misses;
55 if total == 0 {
56 0.0
57 } else {
58 self.hits as f64 / total as f64
59 }
60 }
61}
62
63#[derive(Debug)]
85pub struct WidthCache {
86 cache: LruCache<u64, usize>,
87 hits: u64,
88 misses: u64,
89}
90
91impl WidthCache {
92 #[must_use]
96 pub fn new(capacity: usize) -> Self {
97 let capacity = NonZeroUsize::new(capacity.max(1)).expect("capacity must be > 0");
98 Self {
99 cache: LruCache::new(capacity),
100 hits: 0,
101 misses: 0,
102 }
103 }
104
105 #[must_use]
107 pub fn with_default_capacity() -> Self {
108 Self::new(DEFAULT_CACHE_CAPACITY)
109 }
110
111 #[inline]
116 pub fn get_or_compute(&mut self, text: &str) -> usize {
117 self.get_or_compute_with(text, crate::display_width)
118 }
119
120 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
125 where
126 F: FnOnce(&str) -> usize,
127 {
128 let hash = hash_text(text);
129
130 if let Some(&width) = self.cache.get(&hash) {
131 self.hits += 1;
132 return width;
133 }
134
135 self.misses += 1;
136 let width = compute(text);
137 self.cache.put(hash, width);
138 width
139 }
140
141 #[must_use]
143 pub fn contains(&self, text: &str) -> bool {
144 let hash = hash_text(text);
145 self.cache.contains(&hash)
146 }
147
148 #[must_use]
153 pub fn get(&mut self, text: &str) -> Option<usize> {
154 let hash = hash_text(text);
155 self.cache.get(&hash).copied()
156 }
157
158 #[must_use]
160 pub fn peek(&self, text: &str) -> Option<usize> {
161 let hash = hash_text(text);
162 self.cache.peek(&hash).copied()
163 }
164
165 pub fn preload(&mut self, text: &str) {
169 let hash = hash_text(text);
170 if !self.cache.contains(&hash) {
171 let width = crate::display_width(text);
172 self.cache.put(hash, width);
173 }
174 }
175
176 pub fn preload_many<'a>(&mut self, texts: impl IntoIterator<Item = &'a str>) {
178 for text in texts {
179 self.preload(text);
180 }
181 }
182
183 pub fn clear(&mut self) {
185 self.cache.clear();
186 }
187
188 pub fn reset_stats(&mut self) {
190 self.hits = 0;
191 self.misses = 0;
192 }
193
194 #[must_use]
196 pub fn stats(&self) -> CacheStats {
197 CacheStats {
198 hits: self.hits,
199 misses: self.misses,
200 size: self.cache.len(),
201 capacity: self.cache.cap().get(),
202 }
203 }
204
205 #[must_use]
207 pub fn len(&self) -> usize {
208 self.cache.len()
209 }
210
211 #[must_use]
213 pub fn is_empty(&self) -> bool {
214 self.cache.is_empty()
215 }
216
217 #[must_use]
219 pub fn capacity(&self) -> usize {
220 self.cache.cap().get()
221 }
222
223 pub fn resize(&mut self, new_capacity: usize) {
228 let new_capacity = NonZeroUsize::new(new_capacity.max(1)).expect("capacity must be > 0");
229 self.cache.resize(new_capacity);
230 }
231}
232
233impl Default for WidthCache {
234 fn default() -> Self {
235 Self::with_default_capacity()
236 }
237}
238
239#[inline]
241fn hash_text(text: &str) -> u64 {
242 let mut hasher = FxHasher::default();
243 text.hash(&mut hasher);
244 hasher.finish()
245}
246
247#[cfg(feature = "thread_local_cache")]
252thread_local! {
253 static THREAD_CACHE: std::cell::RefCell<WidthCache> =
254 std::cell::RefCell::new(WidthCache::with_default_capacity());
255}
256
257#[cfg(feature = "thread_local_cache")]
259pub fn cached_width(text: &str) -> usize {
260 THREAD_CACHE.with(|cache| cache.borrow_mut().get_or_compute(text))
261}
262
263#[cfg(feature = "thread_local_cache")]
265pub fn clear_thread_cache() {
266 THREAD_CACHE.with(|cache| cache.borrow_mut().clear());
267}
268
269#[cfg(test)]
270mod tests {
271 use super::*;
272
273 #[test]
274 fn new_cache_is_empty() {
275 let cache = WidthCache::new(100);
276 assert!(cache.is_empty());
277 assert_eq!(cache.len(), 0);
278 assert_eq!(cache.capacity(), 100);
279 }
280
281 #[test]
282 fn default_capacity() {
283 let cache = WidthCache::with_default_capacity();
284 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
285 }
286
287 #[test]
288 fn get_or_compute_caches_value() {
289 let mut cache = WidthCache::new(100);
290
291 let width1 = cache.get_or_compute("hello");
292 assert_eq!(width1, 5);
293 assert_eq!(cache.len(), 1);
294
295 let width2 = cache.get_or_compute("hello");
296 assert_eq!(width2, 5);
297 assert_eq!(cache.len(), 1); let stats = cache.stats();
300 assert_eq!(stats.hits, 1);
301 assert_eq!(stats.misses, 1);
302 }
303
304 #[test]
305 fn get_or_compute_different_strings() {
306 let mut cache = WidthCache::new(100);
307
308 cache.get_or_compute("hello");
309 cache.get_or_compute("world");
310 cache.get_or_compute("foo");
311
312 assert_eq!(cache.len(), 3);
313 let stats = cache.stats();
314 assert_eq!(stats.misses, 3);
315 assert_eq!(stats.hits, 0);
316 }
317
318 #[test]
319 fn get_or_compute_cjk() {
320 let mut cache = WidthCache::new(100);
321
322 let width = cache.get_or_compute("你好");
323 assert_eq!(width, 4); }
325
326 #[test]
327 fn contains() {
328 let mut cache = WidthCache::new(100);
329
330 assert!(!cache.contains("hello"));
331 cache.get_or_compute("hello");
332 assert!(cache.contains("hello"));
333 }
334
335 #[test]
336 fn get_returns_none_for_missing() {
337 let mut cache = WidthCache::new(100);
338 assert!(cache.get("missing").is_none());
339 }
340
341 #[test]
342 fn get_returns_cached_value() {
343 let mut cache = WidthCache::new(100);
344 cache.get_or_compute("hello");
345
346 let width = cache.get("hello");
347 assert_eq!(width, Some(5));
348 }
349
350 #[test]
351 fn peek_does_not_update_lru() {
352 let mut cache = WidthCache::new(2);
353
354 cache.get_or_compute("a");
355 cache.get_or_compute("b");
356
357 let _ = cache.peek("a");
359
360 cache.get_or_compute("c");
362
363 assert!(!cache.contains("a"));
364 assert!(cache.contains("b"));
365 assert!(cache.contains("c"));
366 }
367
368 #[test]
369 fn lru_eviction() {
370 let mut cache = WidthCache::new(2);
371
372 cache.get_or_compute("a");
373 cache.get_or_compute("b");
374 cache.get_or_compute("c"); assert!(!cache.contains("a"));
377 assert!(cache.contains("b"));
378 assert!(cache.contains("c"));
379 }
380
381 #[test]
382 fn lru_refresh_on_access() {
383 let mut cache = WidthCache::new(2);
384
385 cache.get_or_compute("a");
386 cache.get_or_compute("b");
387 cache.get_or_compute("a"); cache.get_or_compute("c"); assert!(cache.contains("a"));
391 assert!(!cache.contains("b"));
392 assert!(cache.contains("c"));
393 }
394
395 #[test]
396 fn preload() {
397 let mut cache = WidthCache::new(100);
398
399 cache.preload("hello");
400 assert!(cache.contains("hello"));
401 assert_eq!(cache.peek("hello"), Some(5));
402
403 let stats = cache.stats();
404 assert_eq!(stats.misses, 0); assert_eq!(stats.hits, 0);
406 }
407
408 #[test]
409 fn preload_many() {
410 let mut cache = WidthCache::new(100);
411
412 cache.preload_many(["hello", "world", "foo"]);
413 assert_eq!(cache.len(), 3);
414 }
415
416 #[test]
417 fn clear() {
418 let mut cache = WidthCache::new(100);
419 cache.get_or_compute("hello");
420 cache.get_or_compute("world");
421
422 cache.clear();
423 assert!(cache.is_empty());
424 assert!(!cache.contains("hello"));
425 }
426
427 #[test]
428 fn reset_stats() {
429 let mut cache = WidthCache::new(100);
430 cache.get_or_compute("hello");
431 cache.get_or_compute("hello");
432
433 let stats = cache.stats();
434 assert_eq!(stats.hits, 1);
435 assert_eq!(stats.misses, 1);
436
437 cache.reset_stats();
438 let stats = cache.stats();
439 assert_eq!(stats.hits, 0);
440 assert_eq!(stats.misses, 0);
441 }
442
443 #[test]
444 fn hit_rate() {
445 let stats = CacheStats {
446 hits: 75,
447 misses: 25,
448 size: 100,
449 capacity: 1000,
450 };
451 assert!((stats.hit_rate() - 0.75).abs() < 0.001);
452 }
453
454 #[test]
455 fn hit_rate_no_requests() {
456 let stats = CacheStats::default();
457 assert_eq!(stats.hit_rate(), 0.0);
458 }
459
460 #[test]
461 fn resize_smaller() {
462 let mut cache = WidthCache::new(100);
463 for i in 0..50 {
464 cache.get_or_compute(&format!("text{i}"));
465 }
466 assert_eq!(cache.len(), 50);
467
468 cache.resize(10);
469 assert!(cache.len() <= 10);
470 assert_eq!(cache.capacity(), 10);
471 }
472
473 #[test]
474 fn resize_larger() {
475 let mut cache = WidthCache::new(10);
476 cache.resize(100);
477 assert_eq!(cache.capacity(), 100);
478 }
479
480 #[test]
481 fn custom_compute_function() {
482 let mut cache = WidthCache::new(100);
483
484 let width = cache.get_or_compute_with("hello", |_| 42);
486 assert_eq!(width, 42);
487
488 assert_eq!(cache.peek("hello"), Some(42));
490 }
491
492 #[test]
493 fn empty_string() {
494 let mut cache = WidthCache::new(100);
495 let width = cache.get_or_compute("");
496 assert_eq!(width, 0);
497 }
498
499 #[test]
500 fn hash_collision_handling() {
501 let mut cache = WidthCache::new(1000);
504
505 for i in 0..500 {
506 cache.get_or_compute(&format!("string{i}"));
507 }
508
509 assert_eq!(cache.len(), 500);
510 }
511
512 #[test]
513 fn unicode_strings() {
514 let mut cache = WidthCache::new(100);
515
516 assert_eq!(cache.get_or_compute("café"), 4);
518 assert_eq!(cache.get_or_compute("日本語"), 6);
519 assert_eq!(cache.get_or_compute("🎉"), 2); assert_eq!(cache.len(), 3);
522 }
523
524 #[test]
525 fn combining_characters() {
526 let mut cache = WidthCache::new(100);
527
528 let width = cache.get_or_compute("e\u{0301}");
530 assert_eq!(width, 1);
532 }
533
534 #[test]
539 fn default_cache() {
540 let cache = WidthCache::default();
541 assert!(cache.is_empty());
542 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
543 }
544
545 #[test]
546 fn cache_stats_debug() {
547 let stats = CacheStats {
548 hits: 10,
549 misses: 5,
550 size: 15,
551 capacity: 100,
552 };
553 let debug = format!("{:?}", stats);
554 assert!(debug.contains("CacheStats"));
555 assert!(debug.contains("10")); }
557
558 #[test]
559 fn cache_stats_default() {
560 let stats = CacheStats::default();
561 assert_eq!(stats.hits, 0);
562 assert_eq!(stats.misses, 0);
563 assert_eq!(stats.size, 0);
564 assert_eq!(stats.capacity, 0);
565 }
566
567 #[test]
568 fn cache_stats_equality() {
569 let stats1 = CacheStats {
570 hits: 10,
571 misses: 5,
572 size: 15,
573 capacity: 100,
574 };
575 let stats2 = stats1; assert_eq!(stats1, stats2);
577 }
578
579 #[test]
580 fn clear_after_preload() {
581 let mut cache = WidthCache::new(100);
582 cache.preload_many(["hello", "world", "test"]);
583 assert_eq!(cache.len(), 3);
584
585 cache.clear();
586 assert!(cache.is_empty());
587 assert!(!cache.contains("hello"));
588 }
589
590 #[test]
591 fn preload_existing_is_noop() {
592 let mut cache = WidthCache::new(100);
593 cache.get_or_compute("hello"); let len_before = cache.len();
595
596 cache.preload("hello"); assert_eq!(cache.len(), len_before);
598 }
599
600 #[test]
601 fn minimum_capacity_is_one() {
602 let cache = WidthCache::new(0);
603 assert_eq!(cache.capacity(), 1);
604 }
605
606 #[test]
607 fn width_cache_debug() {
608 let cache = WidthCache::new(10);
609 let debug = format!("{:?}", cache);
610 assert!(debug.contains("WidthCache"));
611 }
612
613 #[test]
614 fn emoji_zwj_sequence() {
615 let mut cache = WidthCache::new(100);
616 let width = cache.get_or_compute("👨👩👧");
618 assert!(width >= 1);
620 }
621
622 #[test]
623 fn emoji_with_skin_tone() {
624 let mut cache = WidthCache::new(100);
625 let width = cache.get_or_compute("👍🏻");
626 assert!(width >= 1);
627 }
628
629 #[test]
630 fn flag_emoji() {
631 let mut cache = WidthCache::new(100);
632 let width = cache.get_or_compute("🇺🇸");
634 assert!(width >= 1);
635 }
636
637 #[test]
638 fn mixed_width_strings() {
639 let mut cache = WidthCache::new(100);
640 let width = cache.get_or_compute("Hello你好World");
642 assert_eq!(width, 14); }
644
645 #[test]
646 fn stats_size_reflects_cache_len() {
647 let mut cache = WidthCache::new(100);
648 cache.get_or_compute("a");
649 cache.get_or_compute("b");
650 cache.get_or_compute("c");
651
652 let stats = cache.stats();
653 assert_eq!(stats.size, cache.len());
654 assert_eq!(stats.size, 3);
655 }
656
657 #[test]
658 fn stats_capacity_matches() {
659 let cache = WidthCache::new(42);
660 let stats = cache.stats();
661 assert_eq!(stats.capacity, 42);
662 }
663
664 #[test]
665 fn resize_to_zero_becomes_one() {
666 let mut cache = WidthCache::new(100);
667 cache.resize(0);
668 assert_eq!(cache.capacity(), 1);
669 }
670
671 #[test]
672 fn get_updates_lru_order() {
673 let mut cache = WidthCache::new(2);
674
675 cache.get_or_compute("a");
676 cache.get_or_compute("b");
677
678 let _ = cache.get("a");
680
681 cache.get_or_compute("c");
683
684 assert!(cache.contains("a"));
685 assert!(!cache.contains("b"));
686 assert!(cache.contains("c"));
687 }
688
689 #[test]
690 fn contains_does_not_modify_stats() {
691 let mut cache = WidthCache::new(100);
692 cache.get_or_compute("hello");
693
694 let stats_before = cache.stats();
695 let _ = cache.contains("hello");
696 let _ = cache.contains("missing");
697 let stats_after = cache.stats();
698
699 assert_eq!(stats_before.hits, stats_after.hits);
700 assert_eq!(stats_before.misses, stats_after.misses);
701 }
702
703 #[test]
704 fn peek_returns_none_for_missing() {
705 let cache = WidthCache::new(100);
706 assert!(cache.peek("missing").is_none());
707 }
708
709 #[test]
710 fn custom_compute_called_once() {
711 let mut cache = WidthCache::new(100);
712 let mut call_count = 0;
713
714 cache.get_or_compute_with("test", |_| {
715 call_count += 1;
716 10
717 });
718
719 cache.get_or_compute_with("test", |_| {
720 call_count += 1;
721 20 });
723
724 assert_eq!(call_count, 1);
725 assert_eq!(cache.peek("test"), Some(10));
726 }
727
728 #[test]
729 fn whitespace_strings() {
730 let mut cache = WidthCache::new(100);
731 assert_eq!(cache.get_or_compute(" "), 3); assert_eq!(cache.get_or_compute("\t"), 1); assert_eq!(cache.get_or_compute("\n"), 1); }
735}
736
737#[derive(Debug, Clone)]
791pub struct CountMinSketch {
792 counters: Vec<Vec<u8>>,
794 depth: usize,
796 width: usize,
798 total_increments: u64,
800 reset_interval: u64,
802}
803
804const CMS_MAX_COUNT: u8 = 15;
806
807const CMS_DEFAULT_WIDTH: usize = 1024;
809
810const CMS_DEFAULT_DEPTH: usize = 4;
812
813const CMS_DEFAULT_RESET_INTERVAL: u64 = 8192;
815
816impl CountMinSketch {
817 pub fn new(width: usize, depth: usize, reset_interval: u64) -> Self {
819 let width = width.max(1);
820 let depth = depth.max(1);
821 Self {
822 counters: vec![vec![0u8; width]; depth],
823 depth,
824 width,
825 total_increments: 0,
826 reset_interval: reset_interval.max(1),
827 }
828 }
829
830 pub fn with_defaults() -> Self {
832 Self::new(
833 CMS_DEFAULT_WIDTH,
834 CMS_DEFAULT_DEPTH,
835 CMS_DEFAULT_RESET_INTERVAL,
836 )
837 }
838
839 pub fn increment(&mut self, hash: u64) {
841 for row in 0..self.depth {
842 let idx = self.index(hash, row);
843 self.counters[row][idx] = self.counters[row][idx].saturating_add(1).min(CMS_MAX_COUNT);
844 }
845 self.total_increments += 1;
846
847 if self.total_increments >= self.reset_interval {
848 self.halve();
849 }
850 }
851
852 pub fn estimate(&self, hash: u64) -> u8 {
854 let mut min = u8::MAX;
855 for row in 0..self.depth {
856 let idx = self.index(hash, row);
857 min = min.min(self.counters[row][idx]);
858 }
859 min
860 }
861
862 pub fn total_increments(&self) -> u64 {
864 self.total_increments
865 }
866
867 fn halve(&mut self) {
869 for row in &mut self.counters {
870 for c in row.iter_mut() {
871 *c /= 2;
872 }
873 }
874 self.total_increments = 0;
875 }
876
877 pub fn clear(&mut self) {
879 for row in &mut self.counters {
880 row.fill(0);
881 }
882 self.total_increments = 0;
883 }
884
885 #[inline]
887 fn index(&self, hash: u64, row: usize) -> usize {
888 let mixed = hash
890 .wrapping_mul(0x517c_c1b7_2722_0a95)
891 .wrapping_add(row as u64);
892 let mixed = mixed ^ (mixed >> 32);
893 (mixed as usize) % self.width
894 }
895}
896
897#[derive(Debug, Clone)]
903pub struct Doorkeeper {
904 bits: Vec<u64>,
905 num_bits: usize,
906}
907
908const DOORKEEPER_DEFAULT_BITS: usize = 2048;
910
911impl Doorkeeper {
912 pub fn new(num_bits: usize) -> Self {
914 let num_bits = num_bits.max(64);
915 let num_words = num_bits.div_ceil(64);
916 Self {
917 bits: vec![0u64; num_words],
918 num_bits,
919 }
920 }
921
922 pub fn with_defaults() -> Self {
924 Self::new(DOORKEEPER_DEFAULT_BITS)
925 }
926
927 pub fn check_and_set(&mut self, hash: u64) -> bool {
929 let idx = (hash as usize) % self.num_bits;
930 let word = idx / 64;
931 let bit = idx % 64;
932 let was_set = (self.bits[word] >> bit) & 1 == 1;
933 self.bits[word] |= 1 << bit;
934 was_set
935 }
936
937 pub fn contains(&self, hash: u64) -> bool {
939 let idx = (hash as usize) % self.num_bits;
940 let word = idx / 64;
941 let bit = idx % 64;
942 (self.bits[word] >> bit) & 1 == 1
943 }
944
945 pub fn clear(&mut self) {
947 self.bits.fill(0);
948 }
949}
950
951#[inline]
956pub fn fingerprint_hash(text: &str) -> u64 {
957 let mut h: u64 = 0xcbf2_9ce4_8422_2325; for &b in text.as_bytes() {
960 h ^= b as u64;
961 h = h.wrapping_mul(0x0100_0000_01b3); }
963 h
964}
965
966#[inline]
973pub fn tinylfu_admit(candidate_freq: u8, victim_freq: u8) -> bool {
974 candidate_freq > victim_freq
975}
976
977#[derive(Debug, Clone)]
983struct TinyLfuEntry {
984 width: usize,
985 fingerprint: u64,
986}
987
988#[derive(Debug)]
1011pub struct TinyLfuWidthCache {
1012 window: LruCache<u64, TinyLfuEntry>,
1014 main: LruCache<u64, TinyLfuEntry>,
1016 sketch: CountMinSketch,
1018 doorkeeper: Doorkeeper,
1020 total_capacity: usize,
1022 hits: u64,
1024 misses: u64,
1025}
1026
1027impl TinyLfuWidthCache {
1028 pub fn new(total_capacity: usize) -> Self {
1032 let total_capacity = total_capacity.max(2);
1033 let window_cap = (total_capacity / 100).max(1);
1034 let main_cap = total_capacity - window_cap;
1035
1036 Self {
1037 window: LruCache::new(NonZeroUsize::new(window_cap).unwrap()),
1038 main: LruCache::new(NonZeroUsize::new(main_cap.max(1)).unwrap()),
1039 sketch: CountMinSketch::with_defaults(),
1040 doorkeeper: Doorkeeper::with_defaults(),
1041 total_capacity,
1042 hits: 0,
1043 misses: 0,
1044 }
1045 }
1046
1047 pub fn get_or_compute(&mut self, text: &str) -> usize {
1049 self.get_or_compute_with(text, crate::display_width)
1050 }
1051
1052 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1054 where
1055 F: FnOnce(&str) -> usize,
1056 {
1057 let hash = hash_text(text);
1058 let fp = fingerprint_hash(text);
1059
1060 let seen = self.doorkeeper.check_and_set(hash);
1062 if seen {
1063 self.sketch.increment(hash);
1064 }
1065
1066 if let Some(entry) = self.main.get(&hash) {
1068 if entry.fingerprint == fp {
1069 self.hits += 1;
1070 return entry.width;
1071 }
1072 self.main.pop(&hash);
1074 }
1075
1076 if let Some(entry) = self.window.get(&hash) {
1078 if entry.fingerprint == fp {
1079 self.hits += 1;
1080 return entry.width;
1081 }
1082 self.window.pop(&hash);
1084 }
1085
1086 self.misses += 1;
1088 let width = compute(text);
1089 let new_entry = TinyLfuEntry {
1090 width,
1091 fingerprint: fp,
1092 };
1093
1094 if self.window.len() >= self.window.cap().get() {
1097 if let Some((evicted_hash, evicted_entry)) = self.window.pop_lru() {
1099 self.try_admit_to_main(evicted_hash, evicted_entry);
1100 }
1101 }
1102 self.window.put(hash, new_entry);
1103
1104 width
1105 }
1106
1107 fn try_admit_to_main(&mut self, candidate_hash: u64, candidate_entry: TinyLfuEntry) {
1109 let candidate_freq = self.sketch.estimate(candidate_hash);
1110
1111 if self.main.len() < self.main.cap().get() {
1112 self.main.put(candidate_hash, candidate_entry);
1114 return;
1115 }
1116
1117 if let Some((&victim_hash, _)) = self.main.peek_lru() {
1119 let victim_freq = self.sketch.estimate(victim_hash);
1120 if tinylfu_admit(candidate_freq, victim_freq) {
1121 self.main.pop_lru();
1122 self.main.put(candidate_hash, candidate_entry);
1123 }
1124 }
1126 }
1127
1128 pub fn contains(&self, text: &str) -> bool {
1130 let hash = hash_text(text);
1131 let fp = fingerprint_hash(text);
1132 if let Some(e) = self.main.peek(&hash)
1133 && e.fingerprint == fp
1134 {
1135 return true;
1136 }
1137 if let Some(e) = self.window.peek(&hash)
1138 && e.fingerprint == fp
1139 {
1140 return true;
1141 }
1142 false
1143 }
1144
1145 pub fn stats(&self) -> CacheStats {
1147 CacheStats {
1148 hits: self.hits,
1149 misses: self.misses,
1150 size: self.window.len() + self.main.len(),
1151 capacity: self.total_capacity,
1152 }
1153 }
1154
1155 pub fn clear(&mut self) {
1157 self.window.clear();
1158 self.main.clear();
1159 self.sketch.clear();
1160 self.doorkeeper.clear();
1161 }
1162
1163 pub fn reset_stats(&mut self) {
1165 self.hits = 0;
1166 self.misses = 0;
1167 }
1168
1169 pub fn len(&self) -> usize {
1171 self.window.len() + self.main.len()
1172 }
1173
1174 pub fn is_empty(&self) -> bool {
1176 self.window.is_empty() && self.main.is_empty()
1177 }
1178
1179 pub fn capacity(&self) -> usize {
1181 self.total_capacity
1182 }
1183
1184 pub fn main_len(&self) -> usize {
1186 self.main.len()
1187 }
1188
1189 pub fn window_len(&self) -> usize {
1191 self.window.len()
1192 }
1193}
1194
1195#[cfg(test)]
1196mod proptests {
1197 use super::*;
1198 use proptest::prelude::*;
1199
1200 proptest! {
1201 #[test]
1202 fn cached_width_matches_direct(s in "[a-zA-Z0-9 ]{1,50}") {
1203 let mut cache = WidthCache::new(100);
1204 let cached = cache.get_or_compute(&s);
1205 let direct = crate::display_width(&s);
1206 prop_assert_eq!(cached, direct);
1207 }
1208
1209 #[test]
1210 fn second_access_is_hit(s in "[a-zA-Z0-9]{1,20}") {
1211 let mut cache = WidthCache::new(100);
1212
1213 cache.get_or_compute(&s);
1214 let stats_before = cache.stats();
1215
1216 cache.get_or_compute(&s);
1217 let stats_after = cache.stats();
1218
1219 prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1220 prop_assert_eq!(stats_after.misses, stats_before.misses);
1221 }
1222
1223 #[test]
1224 fn lru_never_exceeds_capacity(
1225 strings in prop::collection::vec("[a-z]{1,5}", 10..100),
1226 capacity in 5usize..20
1227 ) {
1228 let mut cache = WidthCache::new(capacity);
1229
1230 for s in &strings {
1231 cache.get_or_compute(s);
1232 prop_assert!(cache.len() <= capacity);
1233 }
1234 }
1235
1236 #[test]
1237 fn preload_then_access_is_hit(s in "[a-zA-Z]{1,20}") {
1238 let mut cache = WidthCache::new(100);
1239
1240 cache.preload(&s);
1241 let stats_before = cache.stats();
1242
1243 cache.get_or_compute(&s);
1244 let stats_after = cache.stats();
1245
1246 prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1248 }
1249 }
1250}
1251
1252#[cfg(test)]
1257mod tinylfu_tests {
1258 use super::*;
1259
1260 #[test]
1263 fn unit_cms_single_key_count() {
1264 let mut cms = CountMinSketch::with_defaults();
1265 let h = hash_text("hello");
1266
1267 for _ in 0..5 {
1268 cms.increment(h);
1269 }
1270 assert_eq!(cms.estimate(h), 5);
1271 }
1272
1273 #[test]
1274 fn unit_cms_unseen_key_is_zero() {
1275 let cms = CountMinSketch::with_defaults();
1276 assert_eq!(cms.estimate(hash_text("never_seen")), 0);
1277 }
1278
1279 #[test]
1280 fn unit_cms_saturates_at_max() {
1281 let mut cms = CountMinSketch::with_defaults();
1282 let h = hash_text("hot");
1283
1284 for _ in 0..100 {
1285 cms.increment(h);
1286 }
1287 assert_eq!(cms.estimate(h), CMS_MAX_COUNT);
1288 }
1289
1290 #[test]
1291 fn unit_cms_bounds() {
1292 let mut cms = CountMinSketch::new(1024, 4, u64::MAX); let n: u64 = 1000;
1296
1297 for i in 0..n {
1299 cms.increment(i.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1));
1300 }
1301
1302 let target = 0xDEAD_BEEF_u64;
1304 for _ in 0..5 {
1305 cms.increment(target);
1306 }
1307
1308 let est = cms.estimate(target);
1309 let epsilon = std::f64::consts::E / 1024.0;
1310 let upper_bound = 5.0 + epsilon * (n + 5) as f64;
1311
1312 assert!(
1313 (est as f64) <= upper_bound,
1314 "estimate {} exceeds bound {:.1} (epsilon={:.5}, N={})",
1315 est,
1316 upper_bound,
1317 epsilon,
1318 n + 5,
1319 );
1320 assert!(est >= 5, "estimate {} should be >= true count 5", est);
1321 }
1322
1323 #[test]
1324 fn unit_cms_bounds_mass_test() {
1325 let mut cms = CountMinSketch::new(1024, 4, u64::MAX);
1326 let n = 2000u64;
1327
1328 let mut true_counts = vec![0u8; n as usize];
1329 for i in 0..n {
1330 let count = (i % 10 + 1) as u8;
1331 true_counts[i as usize] = count;
1332 for _ in 0..count {
1333 cms.increment(i);
1334 }
1335 }
1336
1337 let total = cms.total_increments();
1338 let epsilon = std::f64::consts::E / 1024.0;
1339 let mut violations = 0u32;
1340
1341 for i in 0..n {
1342 let est = cms.estimate(i);
1343 let true_c = true_counts[i as usize];
1344 let upper = true_c as f64 + epsilon * total as f64;
1345 if est as f64 > upper + 0.5 {
1346 violations += 1;
1347 }
1348 assert!(
1349 est >= true_c,
1350 "key {}: estimate {} < true count {}",
1351 i,
1352 est,
1353 true_c
1354 );
1355 }
1356
1357 let violation_rate = violations as f64 / n as f64;
1359 assert!(
1360 violation_rate <= 0.10,
1361 "violation rate {:.3} exceeds delta threshold",
1362 violation_rate,
1363 );
1364 }
1365
1366 #[test]
1367 fn unit_cms_halving_ages_counts() {
1368 let mut cms = CountMinSketch::new(64, 2, 100);
1369
1370 let h = hash_text("test");
1371 for _ in 0..10 {
1372 cms.increment(h);
1373 }
1374 assert_eq!(cms.estimate(h), 10);
1375
1376 for _ in 10..100 {
1378 cms.increment(hash_text("noise"));
1379 }
1380
1381 let est = cms.estimate(h);
1382 assert!(est <= 5, "After halving, estimate {} should be <= 5", est);
1383 }
1384
1385 #[test]
1386 fn unit_cms_monotone() {
1387 let mut cms = CountMinSketch::with_defaults();
1388 let h = hash_text("key");
1389
1390 let mut prev_est = 0u8;
1391 for _ in 0..CMS_MAX_COUNT {
1392 cms.increment(h);
1393 let est = cms.estimate(h);
1394 assert!(est >= prev_est, "estimate should be monotone");
1395 prev_est = est;
1396 }
1397 }
1398
1399 #[test]
1402 fn unit_doorkeeper_first_access_returns_false() {
1403 let mut dk = Doorkeeper::with_defaults();
1404 assert!(!dk.check_and_set(hash_text("new")));
1405 }
1406
1407 #[test]
1408 fn unit_doorkeeper_second_access_returns_true() {
1409 let mut dk = Doorkeeper::with_defaults();
1410 let h = hash_text("key");
1411 dk.check_and_set(h);
1412 assert!(dk.check_and_set(h));
1413 }
1414
1415 #[test]
1416 fn unit_doorkeeper_contains() {
1417 let mut dk = Doorkeeper::with_defaults();
1418 let h = hash_text("key");
1419 assert!(!dk.contains(h));
1420 dk.check_and_set(h);
1421 assert!(dk.contains(h));
1422 }
1423
1424 #[test]
1425 fn unit_doorkeeper_clear_resets() {
1426 let mut dk = Doorkeeper::with_defaults();
1427 let h = hash_text("key");
1428 dk.check_and_set(h);
1429 dk.clear();
1430 assert!(!dk.contains(h));
1431 assert!(!dk.check_and_set(h));
1432 }
1433
1434 #[test]
1435 fn unit_doorkeeper_false_positive_rate() {
1436 let mut dk = Doorkeeper::new(2048);
1437 let n = 100u64;
1438
1439 for i in 0..n {
1440 dk.check_and_set(i * 0x9E37_79B9 + 1);
1441 }
1442
1443 let mut false_positives = 0u32;
1444 for i in 0..1000 {
1445 let h = (i + 100_000) * 0x6A09_E667 + 7;
1446 if dk.contains(h) {
1447 false_positives += 1;
1448 }
1449 }
1450
1451 let fp_rate = false_positives as f64 / 1000.0;
1453 assert!(
1454 fp_rate < 0.15,
1455 "FP rate {:.3} too high (expected < 0.15)",
1456 fp_rate,
1457 );
1458 }
1459
1460 #[test]
1463 fn unit_admission_rule() {
1464 assert!(tinylfu_admit(5, 3)); assert!(!tinylfu_admit(3, 5)); assert!(!tinylfu_admit(3, 3)); }
1468
1469 #[test]
1470 fn unit_admission_rule_extremes() {
1471 assert!(tinylfu_admit(1, 0));
1472 assert!(!tinylfu_admit(0, 0));
1473 assert!(!tinylfu_admit(0, 1));
1474 assert!(tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT - 1));
1475 assert!(!tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT));
1476 }
1477
1478 #[test]
1481 fn unit_fingerprint_guard() {
1482 let fp1 = fingerprint_hash("hello");
1483 let fp2 = fingerprint_hash("world");
1484 let fp3 = fingerprint_hash("hello");
1485
1486 assert_ne!(
1487 fp1, fp2,
1488 "Different strings should have different fingerprints"
1489 );
1490 assert_eq!(fp1, fp3, "Same string should have same fingerprint");
1491 }
1492
1493 #[test]
1494 fn unit_fingerprint_guard_collision_rate() {
1495 let mut fps = std::collections::HashSet::new();
1496 let n = 10_000;
1497
1498 for i in 0..n {
1499 let s = format!("string_{}", i);
1500 fps.insert(fingerprint_hash(&s));
1501 }
1502
1503 let collisions = n - fps.len();
1504 assert!(
1505 collisions == 0,
1506 "Expected 0 collisions in 10k items, got {}",
1507 collisions,
1508 );
1509 }
1510
1511 #[test]
1512 fn unit_fingerprint_independent_of_primary_hash() {
1513 let text = "test_string";
1514 let primary = hash_text(text);
1515 let secondary = fingerprint_hash(text);
1516
1517 assert_ne!(
1518 primary, secondary,
1519 "Fingerprint and primary hash should differ"
1520 );
1521 }
1522
1523 #[test]
1526 fn unit_doorkeeper_cms_pipeline() {
1527 let mut dk = Doorkeeper::with_defaults();
1528 let mut cms = CountMinSketch::with_defaults();
1529 let h = hash_text("item");
1530
1531 assert!(!dk.check_and_set(h));
1533 assert_eq!(cms.estimate(h), 0);
1534
1535 assert!(dk.check_and_set(h));
1537 cms.increment(h);
1538 assert_eq!(cms.estimate(h), 1);
1539
1540 assert!(dk.check_and_set(h));
1542 cms.increment(h);
1543 assert_eq!(cms.estimate(h), 2);
1544 }
1545
1546 #[test]
1547 fn unit_doorkeeper_filters_one_hit_wonders() {
1548 let mut dk = Doorkeeper::with_defaults();
1549 let mut cms = CountMinSketch::with_defaults();
1550
1551 for i in 0u64..100 {
1553 let h = i * 0x9E37_79B9 + 1;
1554 let seen = dk.check_and_set(h);
1555 if seen {
1556 cms.increment(h);
1557 }
1558 }
1559
1560 assert_eq!(cms.total_increments(), 0);
1561
1562 let h = 1; assert!(dk.check_and_set(h));
1565 cms.increment(h);
1566 assert_eq!(cms.total_increments(), 1);
1567 }
1568}
1569
1570#[cfg(test)]
1575mod tinylfu_impl_tests {
1576 use super::*;
1577
1578 #[test]
1579 fn basic_get_or_compute() {
1580 let mut cache = TinyLfuWidthCache::new(100);
1581 let w = cache.get_or_compute("hello");
1582 assert_eq!(w, 5);
1583 assert_eq!(cache.len(), 1);
1584
1585 let w2 = cache.get_or_compute("hello");
1586 assert_eq!(w2, 5);
1587 let stats = cache.stats();
1588 assert_eq!(stats.misses, 1);
1589 assert_eq!(stats.hits, 1);
1590 }
1591
1592 #[test]
1593 fn window_to_main_promotion() {
1594 let mut cache = TinyLfuWidthCache::new(100);
1597
1598 for _ in 0..10 {
1600 cache.get_or_compute("frequent");
1601 }
1602
1603 for i in 0..5 {
1605 cache.get_or_compute(&format!("item_{}", i));
1606 }
1607
1608 assert!(cache.contains("frequent"));
1610 assert!(cache.main_len() > 0 || cache.window_len() > 0);
1611 }
1612
1613 #[test]
1614 fn unit_window_promotion() {
1615 let mut cache = TinyLfuWidthCache::new(50);
1617
1618 for _ in 0..20 {
1620 cache.get_or_compute("hot");
1621 }
1622
1623 for i in 0..10 {
1625 cache.get_or_compute(&format!("filler_{}", i));
1626 }
1627
1628 assert!(cache.contains("hot"), "Frequent item should be retained");
1630 }
1631
1632 #[test]
1633 fn fingerprint_guard_detects_collision() {
1634 let mut cache = TinyLfuWidthCache::new(100);
1635
1636 let w = cache.get_or_compute_with("hello", |_| 42);
1638 assert_eq!(w, 42);
1639
1640 assert!(cache.contains("hello"));
1642 }
1643
1644 #[test]
1645 fn admission_rejects_infrequent() {
1646 let mut cache = TinyLfuWidthCache::new(10); for i in 0..9 {
1652 let s = format!("hot_{}", i);
1653 for _ in 0..5 {
1654 cache.get_or_compute(&s);
1655 }
1656 }
1657
1658 for i in 0..20 {
1661 cache.get_or_compute(&format!("cold_{}", i));
1662 }
1663
1664 let hot_survivors: usize = (0..9)
1666 .filter(|i| cache.contains(&format!("hot_{}", i)))
1667 .count();
1668 assert!(
1669 hot_survivors >= 5,
1670 "Expected most hot items to survive, got {}/9",
1671 hot_survivors
1672 );
1673 }
1674
1675 #[test]
1676 fn clear_empties_everything() {
1677 let mut cache = TinyLfuWidthCache::new(100);
1678 cache.get_or_compute("a");
1679 cache.get_or_compute("b");
1680 cache.clear();
1681 assert!(cache.is_empty());
1682 assert_eq!(cache.len(), 0);
1683 }
1684
1685 #[test]
1686 fn stats_reflect_usage() {
1687 let mut cache = TinyLfuWidthCache::new(100);
1688 cache.get_or_compute("a");
1689 cache.get_or_compute("a");
1690 cache.get_or_compute("b");
1691
1692 let stats = cache.stats();
1693 assert_eq!(stats.misses, 2);
1694 assert_eq!(stats.hits, 1);
1695 assert_eq!(stats.size, 2);
1696 }
1697
1698 #[test]
1699 fn capacity_is_respected() {
1700 let mut cache = TinyLfuWidthCache::new(20);
1701
1702 for i in 0..100 {
1703 cache.get_or_compute(&format!("item_{}", i));
1704 }
1705
1706 assert!(
1707 cache.len() <= 20,
1708 "Cache size {} exceeds capacity 20",
1709 cache.len()
1710 );
1711 }
1712
1713 #[test]
1714 fn reset_stats_works() {
1715 let mut cache = TinyLfuWidthCache::new(100);
1716 cache.get_or_compute("x");
1717 cache.get_or_compute("x");
1718 cache.reset_stats();
1719 let stats = cache.stats();
1720 assert_eq!(stats.hits, 0);
1721 assert_eq!(stats.misses, 0);
1722 }
1723
1724 #[test]
1725 fn perf_cache_hit_rate() {
1726 let mut cache = TinyLfuWidthCache::new(50);
1729
1730 for _ in 0..20 {
1732 for i in 0..10 {
1733 cache.get_or_compute(&format!("hot_{}", i));
1734 }
1735 }
1736
1737 for i in 0..100 {
1739 cache.get_or_compute(&format!("cold_{}", i));
1740 }
1741
1742 cache.reset_stats();
1744 for i in 0..10 {
1745 cache.get_or_compute(&format!("hot_{}", i));
1746 }
1747
1748 let stats = cache.stats();
1749 assert!(
1751 stats.hits >= 5,
1752 "Expected at least 5/10 hot items to hit, got {}",
1753 stats.hits
1754 );
1755 }
1756
1757 #[test]
1758 fn unicode_strings_work() {
1759 let mut cache = TinyLfuWidthCache::new(100);
1760 assert_eq!(cache.get_or_compute("日本語"), 6);
1761 assert_eq!(cache.get_or_compute("café"), 4);
1762 assert_eq!(cache.get_or_compute("日本語"), 6); assert_eq!(cache.stats().hits, 1);
1764 }
1765
1766 #[test]
1767 fn empty_string() {
1768 let mut cache = TinyLfuWidthCache::new(100);
1769 assert_eq!(cache.get_or_compute(""), 0);
1770 }
1771
1772 #[test]
1773 fn minimum_capacity() {
1774 let cache = TinyLfuWidthCache::new(0);
1775 assert!(cache.capacity() >= 2);
1776 }
1777}
1778
1779#[cfg(test)]
1785struct Lcg(u64);
1786
1787#[cfg(test)]
1788impl Lcg {
1789 fn new(seed: u64) -> Self {
1790 Self(seed)
1791 }
1792 fn next_u64(&mut self) -> u64 {
1793 self.0 = self
1794 .0
1795 .wrapping_mul(6_364_136_223_846_793_005)
1796 .wrapping_add(1);
1797 self.0
1798 }
1799 fn next_usize(&mut self, max: usize) -> usize {
1800 (self.next_u64() % (max as u64)) as usize
1801 }
1802}
1803
1804#[cfg(test)]
1809mod property_cache_equivalence {
1810 use super::*;
1811 use proptest::prelude::*;
1812
1813 proptest! {
1814 #[test]
1815 fn tinylfu_cached_equals_computed(s in "[a-zA-Z0-9 ]{1,80}") {
1816 let mut cache = TinyLfuWidthCache::new(200);
1817 let cached = cache.get_or_compute(&s);
1818 let direct = crate::display_width(&s);
1819 prop_assert_eq!(cached, direct,
1820 "TinyLFU returned {} but display_width says {} for {:?}", cached, direct, s);
1821 }
1822
1823 #[test]
1824 fn tinylfu_second_access_same_value(s in "[a-zA-Z0-9]{1,40}") {
1825 let mut cache = TinyLfuWidthCache::new(200);
1826 let first = cache.get_or_compute(&s);
1827 let second = cache.get_or_compute(&s);
1828 prop_assert_eq!(first, second,
1829 "First access returned {} but second returned {} for {:?}", first, second, s);
1830 }
1831
1832 #[test]
1833 fn tinylfu_never_exceeds_capacity(
1834 strings in prop::collection::vec("[a-z]{1,5}", 10..200),
1835 capacity in 10usize..50
1836 ) {
1837 let mut cache = TinyLfuWidthCache::new(capacity);
1838 for s in &strings {
1839 cache.get_or_compute(s);
1840 prop_assert!(cache.len() <= capacity,
1841 "Cache size {} exceeded capacity {}", cache.len(), capacity);
1842 }
1843 }
1844
1845 #[test]
1846 fn tinylfu_custom_fn_matches(s in "[a-z]{1,20}") {
1847 let mut cache = TinyLfuWidthCache::new(100);
1848 let custom_fn = |text: &str| text.len(); let cached = cache.get_or_compute_with(&s, custom_fn);
1850 prop_assert_eq!(cached, s.len(),
1851 "Custom fn: cached {} != expected {} for {:?}", cached, s.len(), s);
1852 }
1853 }
1854
1855 #[test]
1856 fn deterministic_seed_equivalence() {
1857 let mut rng = super::Lcg::new(0xDEAD_BEEF);
1859
1860 let mut cache1 = TinyLfuWidthCache::new(50);
1861 let mut results1 = Vec::new();
1862 for _ in 0..500 {
1863 let idx = rng.next_usize(100);
1864 let s = format!("key_{}", idx);
1865 results1.push(cache1.get_or_compute(&s));
1866 }
1867
1868 let mut rng2 = super::Lcg::new(0xDEAD_BEEF);
1869 let mut cache2 = TinyLfuWidthCache::new(50);
1870 let mut results2 = Vec::new();
1871 for _ in 0..500 {
1872 let idx = rng2.next_usize(100);
1873 let s = format!("key_{}", idx);
1874 results2.push(cache2.get_or_compute(&s));
1875 }
1876
1877 assert_eq!(
1878 results1, results2,
1879 "Deterministic seed must produce identical results"
1880 );
1881 }
1882
1883 #[test]
1884 fn both_caches_agree_on_widths() {
1885 let mut lru = WidthCache::new(200);
1888 let mut tlfu = TinyLfuWidthCache::new(200);
1889
1890 let inputs = [
1891 "",
1892 "hello",
1893 "日本語テスト",
1894 "café résumé",
1895 "a\tb",
1896 "🎉🎊🎈",
1897 "mixed日本eng",
1898 " spaces ",
1899 "AAAAAAAAAAAAAAAAAAAAAAAAA",
1900 "x",
1901 ];
1902
1903 for &s in &inputs {
1904 let w1 = lru.get_or_compute(s);
1905 let w2 = tlfu.get_or_compute(s);
1906 assert_eq!(
1907 w1, w2,
1908 "Width mismatch for {:?}: LRU={}, TinyLFU={}",
1909 s, w1, w2
1910 );
1911 }
1912 }
1913}
1914
1915#[cfg(test)]
1920mod e2e_cache_replay {
1921 use super::*;
1922 use std::time::Instant;
1923
1924 #[derive(Debug)]
1926 struct ReplayRecord {
1927 step: usize,
1928 key: String,
1929 width: usize,
1930 hit: bool,
1931 latency_ns: u128,
1932 }
1933
1934 impl ReplayRecord {
1935 fn to_jsonl(&self) -> String {
1936 format!(
1937 r#"{{"step":{},"key":"{}","width":{},"hit":{},"latency_ns":{}}}"#,
1938 self.step, self.key, self.width, self.hit, self.latency_ns,
1939 )
1940 }
1941 }
1942
1943 fn zipfian_workload(rng: &mut super::Lcg, n: usize, universe: usize) -> Vec<String> {
1944 (0..n)
1946 .map(|_| {
1947 let raw = rng.next_usize(universe * universe);
1948 let idx = (raw as f64).sqrt() as usize % universe;
1949 format!("item_{}", idx)
1950 })
1951 .collect()
1952 }
1953
1954 #[test]
1955 fn replay_zipfian_tinylfu() {
1956 let mut rng = super::Lcg::new(0x1234_5678);
1957 let workload = zipfian_workload(&mut rng, 2000, 200);
1958
1959 let mut cache = TinyLfuWidthCache::new(50);
1960 let mut records = Vec::with_capacity(workload.len());
1961
1962 for (i, key) in workload.iter().enumerate() {
1963 let stats_before = cache.stats();
1964 let t0 = Instant::now();
1965 let width = cache.get_or_compute(key);
1966 let elapsed = t0.elapsed().as_nanos();
1967 let stats_after = cache.stats();
1968 let hit = stats_after.hits > stats_before.hits;
1969
1970 records.push(ReplayRecord {
1971 step: i,
1972 key: key.clone(),
1973 width,
1974 hit,
1975 latency_ns: elapsed,
1976 });
1977 }
1978
1979 for r in &records[..5] {
1981 let line = r.to_jsonl();
1982 assert!(
1983 line.starts_with('{') && line.ends_with('}'),
1984 "Bad JSONL: {}",
1985 line
1986 );
1987 }
1988
1989 let total = records.len();
1991 let hits = records.iter().filter(|r| r.hit).count();
1992 let hit_rate = hits as f64 / total as f64;
1993
1994 assert!(
1997 hit_rate > 0.10,
1998 "Zipfian hit rate too low: {:.2}% ({}/{})",
1999 hit_rate * 100.0,
2000 hits,
2001 total
2002 );
2003 }
2004
2005 #[test]
2006 fn replay_zipfian_lru_comparison() {
2007 let mut rng = super::Lcg::new(0x1234_5678);
2008 let workload = zipfian_workload(&mut rng, 2000, 200);
2009
2010 let mut tlfu = TinyLfuWidthCache::new(50);
2011 let mut lru = WidthCache::new(50);
2012
2013 for key in &workload {
2014 tlfu.get_or_compute(key);
2015 lru.get_or_compute(key);
2016 }
2017
2018 let tlfu_stats = tlfu.stats();
2019 let lru_stats = lru.stats();
2020
2021 assert_eq!(tlfu_stats.hits + tlfu_stats.misses, 2000);
2023 assert_eq!(lru_stats.hits + lru_stats.misses, 2000);
2024
2025 assert!(
2028 tlfu_stats.hit_rate() >= lru_stats.hit_rate() * 0.8,
2029 "TinyLFU hit rate {:.2}% much worse than LRU {:.2}%",
2030 tlfu_stats.hit_rate() * 100.0,
2031 lru_stats.hit_rate() * 100.0,
2032 );
2033 }
2034
2035 #[test]
2036 fn replay_deterministic_reproduction() {
2037 let run = |seed: u64| -> Vec<bool> {
2039 let mut rng = super::Lcg::new(seed);
2040 let workload = zipfian_workload(&mut rng, 500, 100);
2041 let mut cache = TinyLfuWidthCache::new(30);
2042 let mut hits = Vec::with_capacity(500);
2043 for key in &workload {
2044 let before = cache.stats().hits;
2045 cache.get_or_compute(key);
2046 hits.push(cache.stats().hits > before);
2047 }
2048 hits
2049 };
2050
2051 let run1 = run(0xABCD_EF01);
2052 let run2 = run(0xABCD_EF01);
2053 assert_eq!(run1, run2, "Deterministic replay diverged");
2054 }
2055
2056 #[test]
2057 fn replay_uniform_workload() {
2058 let mut rng = super::Lcg::new(0x9999);
2061 let universe = 100;
2062 let cache_size = 25;
2063 let n = 5000;
2064
2065 let mut cache = TinyLfuWidthCache::new(cache_size);
2066
2067 for i in 0..universe {
2069 cache.get_or_compute(&format!("u_{}", i));
2070 }
2071
2072 cache.reset_stats();
2073 for _ in 0..n {
2074 let idx = rng.next_usize(universe);
2075 cache.get_or_compute(&format!("u_{}", idx));
2076 }
2077
2078 let stats = cache.stats();
2079 let hit_rate = stats.hit_rate();
2080 assert!(
2082 hit_rate > 0.10 && hit_rate < 0.60,
2083 "Uniform hit rate {:.2}% outside expected range",
2084 hit_rate * 100.0,
2085 );
2086 }
2087}
2088
2089#[cfg(test)]
2094mod perf_cache_overhead {
2095 use super::*;
2096 use std::time::Instant;
2097
2098 fn measure_latencies<F: FnMut(usize)>(n: usize, mut op: F) -> Vec<u128> {
2100 let mut latencies = Vec::with_capacity(n);
2101 for i in 0..n {
2102 let t0 = Instant::now();
2103 op(i);
2104 latencies.push(t0.elapsed().as_nanos());
2105 }
2106 latencies.sort_unstable();
2107 latencies
2108 }
2109
2110 fn p95(sorted: &[u128]) -> u128 {
2111 let len = sorted.len();
2112 let idx = ((len as f64 * 0.95) as usize).min(len.saturating_sub(1));
2113 sorted[idx]
2114 }
2115
2116 fn p99(sorted: &[u128]) -> u128 {
2117 let len = sorted.len();
2118 let idx = ((len as f64 * 0.99) as usize).min(len.saturating_sub(1));
2119 sorted[idx]
2120 }
2121
2122 fn median(sorted: &[u128]) -> u128 {
2123 sorted[sorted.len() / 2]
2124 }
2125
2126 #[allow(unexpected_cfgs)]
2127 fn perf_budget_ns(base_ns: u128) -> u128 {
2128 if cfg!(coverage) || cfg!(coverage_nightly) {
2129 base_ns.saturating_mul(10)
2130 } else {
2131 base_ns
2132 }
2133 }
2134
2135 #[test]
2136 fn perf_lru_hit_latency() {
2137 let mut cache = WidthCache::new(1000);
2138 for i in 0..100 {
2140 cache.get_or_compute(&format!("warm_{}", i));
2141 }
2142
2143 let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2144 let latencies = measure_latencies(10_000, |i| {
2145 let _ = cache.get_or_compute(&keys[i % 100]);
2146 });
2147
2148 let p95_ns = p95(&latencies);
2149 let budget_ns = perf_budget_ns(5_000);
2152 assert!(
2153 p95_ns < budget_ns,
2154 "LRU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2155 p95_ns,
2156 budget_ns,
2157 median(&latencies),
2158 p99(&latencies),
2159 );
2160 }
2161
2162 #[test]
2163 fn perf_tinylfu_hit_latency() {
2164 let mut cache = TinyLfuWidthCache::new(1000);
2165 for _ in 0..5 {
2167 for i in 0..100 {
2168 cache.get_or_compute(&format!("warm_{}", i));
2169 }
2170 }
2171
2172 let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2173 let latencies = measure_latencies(10_000, |i| {
2174 let _ = cache.get_or_compute(&keys[i % 100]);
2175 });
2176
2177 let p95_ns = p95(&latencies);
2178 let budget_ns = perf_budget_ns(5_000);
2180 assert!(
2181 p95_ns < budget_ns,
2182 "TinyLFU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2183 p95_ns,
2184 budget_ns,
2185 median(&latencies),
2186 p99(&latencies),
2187 );
2188 }
2189
2190 #[test]
2191 fn perf_tinylfu_miss_latency() {
2192 let mut cache = TinyLfuWidthCache::new(100);
2193 let keys: Vec<String> = (0..10_000).map(|i| format!("miss_{}", i)).collect();
2194
2195 let latencies = measure_latencies(10_000, |i| {
2196 let _ = cache.get_or_compute(&keys[i]);
2197 });
2198
2199 let p95_ns = p95(&latencies);
2200 let budget_ns = perf_budget_ns(20_000);
2203 assert!(
2204 p95_ns < budget_ns,
2205 "TinyLFU miss p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2206 p95_ns,
2207 budget_ns,
2208 median(&latencies),
2209 p99(&latencies),
2210 );
2211 }
2212
2213 #[test]
2214 fn perf_cms_increment_latency() {
2215 let mut cms = CountMinSketch::with_defaults();
2216 let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("k{}", i))).collect();
2217
2218 let latencies = measure_latencies(10_000, |i| {
2219 cms.increment(hashes[i]);
2220 });
2221
2222 let p95_ns = p95(&latencies);
2223 let budget_ns = perf_budget_ns(2_000);
2225 assert!(
2226 p95_ns < budget_ns,
2227 "CMS increment p95 = {}ns exceeds {}ns budget (median={}ns)",
2228 p95_ns,
2229 budget_ns,
2230 median(&latencies),
2231 );
2232 }
2233
2234 #[test]
2235 fn perf_doorkeeper_latency() {
2236 let mut dk = Doorkeeper::with_defaults();
2237 let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("d{}", i))).collect();
2238
2239 let latencies = measure_latencies(10_000, |i| {
2240 let _ = dk.check_and_set(hashes[i]);
2241 });
2242
2243 let p95_ns = p95(&latencies);
2244 let budget_ns = perf_budget_ns(1_000);
2246 assert!(
2247 p95_ns < budget_ns,
2248 "Doorkeeper p95 = {}ns exceeds {}ns budget (median={}ns)",
2249 p95_ns,
2250 budget_ns,
2251 median(&latencies),
2252 );
2253 }
2254
2255 #[test]
2256 fn perf_fingerprint_hash_latency() {
2257 let keys: Vec<String> = (0..10_000).map(|i| format!("fp_{}", i)).collect();
2258
2259 let latencies = measure_latencies(10_000, |i| {
2260 let _ = fingerprint_hash(&keys[i]);
2261 });
2262
2263 let p95_ns = p95(&latencies);
2264 let budget_ns = perf_budget_ns(1_000);
2266 assert!(
2267 p95_ns < budget_ns,
2268 "fingerprint_hash p95 = {}ns exceeds {}ns budget (median={}ns)",
2269 p95_ns,
2270 budget_ns,
2271 median(&latencies),
2272 );
2273 }
2274}