1#![forbid(unsafe_code)]
2
3use lru::LruCache;
30use rustc_hash::FxHasher;
31use std::hash::{Hash, Hasher};
32use std::num::NonZeroUsize;
33
34pub const DEFAULT_CACHE_CAPACITY: usize = 4096;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub struct CacheStats {
40 pub hits: u64,
42 pub misses: u64,
44 pub size: usize,
46 pub capacity: usize,
48}
49
50impl CacheStats {
51 #[must_use]
53 pub fn hit_rate(&self) -> f64 {
54 let total = self.hits + self.misses;
55 if total == 0 {
56 0.0
57 } else {
58 self.hits as f64 / total as f64
59 }
60 }
61}
62
63#[derive(Debug)]
85pub struct WidthCache {
86 cache: LruCache<u64, usize>,
87 hits: u64,
88 misses: u64,
89}
90
91impl WidthCache {
92 #[must_use]
96 pub fn new(capacity: usize) -> Self {
97 let capacity = NonZeroUsize::new(capacity.max(1)).expect("capacity must be > 0");
98 Self {
99 cache: LruCache::new(capacity),
100 hits: 0,
101 misses: 0,
102 }
103 }
104
105 #[must_use]
107 pub fn with_default_capacity() -> Self {
108 Self::new(DEFAULT_CACHE_CAPACITY)
109 }
110
111 #[inline]
116 pub fn get_or_compute(&mut self, text: &str) -> usize {
117 self.get_or_compute_with(text, crate::display_width)
118 }
119
120 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
125 where
126 F: FnOnce(&str) -> usize,
127 {
128 let hash = hash_text(text);
129
130 if let Some(&width) = self.cache.get(&hash) {
131 self.hits += 1;
132 return width;
133 }
134
135 self.misses += 1;
136 let width = compute(text);
137 self.cache.put(hash, width);
138 width
139 }
140
141 #[must_use]
143 pub fn contains(&self, text: &str) -> bool {
144 let hash = hash_text(text);
145 self.cache.contains(&hash)
146 }
147
148 #[must_use]
153 pub fn get(&mut self, text: &str) -> Option<usize> {
154 let hash = hash_text(text);
155 self.cache.get(&hash).copied()
156 }
157
158 #[must_use]
160 pub fn peek(&self, text: &str) -> Option<usize> {
161 let hash = hash_text(text);
162 self.cache.peek(&hash).copied()
163 }
164
165 pub fn preload(&mut self, text: &str) {
169 let hash = hash_text(text);
170 if !self.cache.contains(&hash) {
171 let width = crate::display_width(text);
172 self.cache.put(hash, width);
173 }
174 }
175
176 pub fn preload_many<'a>(&mut self, texts: impl IntoIterator<Item = &'a str>) {
178 for text in texts {
179 self.preload(text);
180 }
181 }
182
183 pub fn clear(&mut self) {
185 self.cache.clear();
186 }
187
188 pub fn reset_stats(&mut self) {
190 self.hits = 0;
191 self.misses = 0;
192 }
193
194 #[inline]
196 #[must_use]
197 pub fn stats(&self) -> CacheStats {
198 CacheStats {
199 hits: self.hits,
200 misses: self.misses,
201 size: self.cache.len(),
202 capacity: self.cache.cap().get(),
203 }
204 }
205
206 #[inline]
208 #[must_use]
209 pub fn len(&self) -> usize {
210 self.cache.len()
211 }
212
213 #[inline]
215 #[must_use]
216 pub fn is_empty(&self) -> bool {
217 self.cache.is_empty()
218 }
219
220 #[inline]
222 #[must_use]
223 pub fn capacity(&self) -> usize {
224 self.cache.cap().get()
225 }
226
227 pub fn resize(&mut self, new_capacity: usize) {
232 let new_capacity = NonZeroUsize::new(new_capacity.max(1)).expect("capacity must be > 0");
233 self.cache.resize(new_capacity);
234 }
235}
236
237impl Default for WidthCache {
238 fn default() -> Self {
239 Self::with_default_capacity()
240 }
241}
242
243#[inline]
245fn hash_text(text: &str) -> u64 {
246 let mut hasher = FxHasher::default();
247 text.hash(&mut hasher);
248 hasher.finish()
249}
250
251#[cfg(feature = "thread_local_cache")]
256thread_local! {
257 static THREAD_CACHE: std::cell::RefCell<WidthCache> =
258 std::cell::RefCell::new(WidthCache::with_default_capacity());
259}
260
261#[cfg(feature = "thread_local_cache")]
263pub fn cached_width(text: &str) -> usize {
264 THREAD_CACHE.with(|cache| cache.borrow_mut().get_or_compute(text))
265}
266
267#[cfg(feature = "thread_local_cache")]
269pub fn clear_thread_cache() {
270 THREAD_CACHE.with(|cache| cache.borrow_mut().clear());
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276
277 #[test]
278 fn new_cache_is_empty() {
279 let cache = WidthCache::new(100);
280 assert!(cache.is_empty());
281 assert_eq!(cache.len(), 0);
282 assert_eq!(cache.capacity(), 100);
283 }
284
285 #[test]
286 fn default_capacity() {
287 let cache = WidthCache::with_default_capacity();
288 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
289 }
290
291 #[test]
292 fn get_or_compute_caches_value() {
293 let mut cache = WidthCache::new(100);
294
295 let width1 = cache.get_or_compute("hello");
296 assert_eq!(width1, 5);
297 assert_eq!(cache.len(), 1);
298
299 let width2 = cache.get_or_compute("hello");
300 assert_eq!(width2, 5);
301 assert_eq!(cache.len(), 1); let stats = cache.stats();
304 assert_eq!(stats.hits, 1);
305 assert_eq!(stats.misses, 1);
306 }
307
308 #[test]
309 fn get_or_compute_different_strings() {
310 let mut cache = WidthCache::new(100);
311
312 cache.get_or_compute("hello");
313 cache.get_or_compute("world");
314 cache.get_or_compute("foo");
315
316 assert_eq!(cache.len(), 3);
317 let stats = cache.stats();
318 assert_eq!(stats.misses, 3);
319 assert_eq!(stats.hits, 0);
320 }
321
322 #[test]
323 fn get_or_compute_cjk() {
324 let mut cache = WidthCache::new(100);
325
326 let width = cache.get_or_compute("你好");
327 assert_eq!(width, 4); }
329
330 #[test]
331 fn contains() {
332 let mut cache = WidthCache::new(100);
333
334 assert!(!cache.contains("hello"));
335 cache.get_or_compute("hello");
336 assert!(cache.contains("hello"));
337 }
338
339 #[test]
340 fn get_returns_none_for_missing() {
341 let mut cache = WidthCache::new(100);
342 assert!(cache.get("missing").is_none());
343 }
344
345 #[test]
346 fn get_returns_cached_value() {
347 let mut cache = WidthCache::new(100);
348 cache.get_or_compute("hello");
349
350 let width = cache.get("hello");
351 assert_eq!(width, Some(5));
352 }
353
354 #[test]
355 fn peek_does_not_update_lru() {
356 let mut cache = WidthCache::new(2);
357
358 cache.get_or_compute("a");
359 cache.get_or_compute("b");
360
361 let _ = cache.peek("a");
363
364 cache.get_or_compute("c");
366
367 assert!(!cache.contains("a"));
368 assert!(cache.contains("b"));
369 assert!(cache.contains("c"));
370 }
371
372 #[test]
373 fn lru_eviction() {
374 let mut cache = WidthCache::new(2);
375
376 cache.get_or_compute("a");
377 cache.get_or_compute("b");
378 cache.get_or_compute("c"); assert!(!cache.contains("a"));
381 assert!(cache.contains("b"));
382 assert!(cache.contains("c"));
383 }
384
385 #[test]
386 fn lru_refresh_on_access() {
387 let mut cache = WidthCache::new(2);
388
389 cache.get_or_compute("a");
390 cache.get_or_compute("b");
391 cache.get_or_compute("a"); cache.get_or_compute("c"); assert!(cache.contains("a"));
395 assert!(!cache.contains("b"));
396 assert!(cache.contains("c"));
397 }
398
399 #[test]
400 fn preload() {
401 let mut cache = WidthCache::new(100);
402
403 cache.preload("hello");
404 assert!(cache.contains("hello"));
405 assert_eq!(cache.peek("hello"), Some(5));
406
407 let stats = cache.stats();
408 assert_eq!(stats.misses, 0); assert_eq!(stats.hits, 0);
410 }
411
412 #[test]
413 fn preload_many() {
414 let mut cache = WidthCache::new(100);
415
416 cache.preload_many(["hello", "world", "foo"]);
417 assert_eq!(cache.len(), 3);
418 }
419
420 #[test]
421 fn clear() {
422 let mut cache = WidthCache::new(100);
423 cache.get_or_compute("hello");
424 cache.get_or_compute("world");
425
426 cache.clear();
427 assert!(cache.is_empty());
428 assert!(!cache.contains("hello"));
429 }
430
431 #[test]
432 fn reset_stats() {
433 let mut cache = WidthCache::new(100);
434 cache.get_or_compute("hello");
435 cache.get_or_compute("hello");
436
437 let stats = cache.stats();
438 assert_eq!(stats.hits, 1);
439 assert_eq!(stats.misses, 1);
440
441 cache.reset_stats();
442 let stats = cache.stats();
443 assert_eq!(stats.hits, 0);
444 assert_eq!(stats.misses, 0);
445 }
446
447 #[test]
448 fn hit_rate() {
449 let stats = CacheStats {
450 hits: 75,
451 misses: 25,
452 size: 100,
453 capacity: 1000,
454 };
455 assert!((stats.hit_rate() - 0.75).abs() < 0.001);
456 }
457
458 #[test]
459 fn hit_rate_no_requests() {
460 let stats = CacheStats::default();
461 assert_eq!(stats.hit_rate(), 0.0);
462 }
463
464 #[test]
465 fn resize_smaller() {
466 let mut cache = WidthCache::new(100);
467 for i in 0..50 {
468 cache.get_or_compute(&format!("text{i}"));
469 }
470 assert_eq!(cache.len(), 50);
471
472 cache.resize(10);
473 assert!(cache.len() <= 10);
474 assert_eq!(cache.capacity(), 10);
475 }
476
477 #[test]
478 fn resize_larger() {
479 let mut cache = WidthCache::new(10);
480 cache.resize(100);
481 assert_eq!(cache.capacity(), 100);
482 }
483
484 #[test]
485 fn custom_compute_function() {
486 let mut cache = WidthCache::new(100);
487
488 let width = cache.get_or_compute_with("hello", |_| 42);
490 assert_eq!(width, 42);
491
492 assert_eq!(cache.peek("hello"), Some(42));
494 }
495
496 #[test]
497 fn empty_string() {
498 let mut cache = WidthCache::new(100);
499 let width = cache.get_or_compute("");
500 assert_eq!(width, 0);
501 }
502
503 #[test]
504 fn hash_collision_handling() {
505 let mut cache = WidthCache::new(1000);
508
509 for i in 0..500 {
510 cache.get_or_compute(&format!("string{i}"));
511 }
512
513 assert_eq!(cache.len(), 500);
514 }
515
516 #[test]
517 fn unicode_strings() {
518 let mut cache = WidthCache::new(100);
519
520 assert_eq!(cache.get_or_compute("café"), 4);
522 assert_eq!(cache.get_or_compute("日本語"), 6);
523 assert_eq!(cache.get_or_compute("🎉"), 2); assert_eq!(cache.len(), 3);
526 }
527
528 #[test]
529 fn combining_characters() {
530 let mut cache = WidthCache::new(100);
531
532 let width = cache.get_or_compute("e\u{0301}");
534 assert_eq!(width, 1);
536 }
537
538 #[test]
543 fn default_cache() {
544 let cache = WidthCache::default();
545 assert!(cache.is_empty());
546 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
547 }
548
549 #[test]
550 fn cache_stats_debug() {
551 let stats = CacheStats {
552 hits: 10,
553 misses: 5,
554 size: 15,
555 capacity: 100,
556 };
557 let debug = format!("{:?}", stats);
558 assert!(debug.contains("CacheStats"));
559 assert!(debug.contains("10")); }
561
562 #[test]
563 fn cache_stats_default() {
564 let stats = CacheStats::default();
565 assert_eq!(stats.hits, 0);
566 assert_eq!(stats.misses, 0);
567 assert_eq!(stats.size, 0);
568 assert_eq!(stats.capacity, 0);
569 }
570
571 #[test]
572 fn cache_stats_equality() {
573 let stats1 = CacheStats {
574 hits: 10,
575 misses: 5,
576 size: 15,
577 capacity: 100,
578 };
579 let stats2 = stats1; assert_eq!(stats1, stats2);
581 }
582
583 #[test]
584 fn clear_after_preload() {
585 let mut cache = WidthCache::new(100);
586 cache.preload_many(["hello", "world", "test"]);
587 assert_eq!(cache.len(), 3);
588
589 cache.clear();
590 assert!(cache.is_empty());
591 assert!(!cache.contains("hello"));
592 }
593
594 #[test]
595 fn preload_existing_is_noop() {
596 let mut cache = WidthCache::new(100);
597 cache.get_or_compute("hello"); let len_before = cache.len();
599
600 cache.preload("hello"); assert_eq!(cache.len(), len_before);
602 }
603
604 #[test]
605 fn minimum_capacity_is_one() {
606 let cache = WidthCache::new(0);
607 assert_eq!(cache.capacity(), 1);
608 }
609
610 #[test]
611 fn width_cache_debug() {
612 let cache = WidthCache::new(10);
613 let debug = format!("{:?}", cache);
614 assert!(debug.contains("WidthCache"));
615 }
616
617 #[test]
618 fn emoji_zwj_sequence() {
619 let mut cache = WidthCache::new(100);
620 let width = cache.get_or_compute("👨👩👧");
622 assert!(width >= 1);
624 }
625
626 #[test]
627 fn emoji_with_skin_tone() {
628 let mut cache = WidthCache::new(100);
629 let width = cache.get_or_compute("👍🏻");
630 assert!(width >= 1);
631 }
632
633 #[test]
634 fn flag_emoji() {
635 let mut cache = WidthCache::new(100);
636 let width = cache.get_or_compute("🇺🇸");
638 assert!(width >= 1);
639 }
640
641 #[test]
642 fn mixed_width_strings() {
643 let mut cache = WidthCache::new(100);
644 let width = cache.get_or_compute("Hello你好World");
646 assert_eq!(width, 14); }
648
649 #[test]
650 fn stats_size_reflects_cache_len() {
651 let mut cache = WidthCache::new(100);
652 cache.get_or_compute("a");
653 cache.get_or_compute("b");
654 cache.get_or_compute("c");
655
656 let stats = cache.stats();
657 assert_eq!(stats.size, cache.len());
658 assert_eq!(stats.size, 3);
659 }
660
661 #[test]
662 fn stats_capacity_matches() {
663 let cache = WidthCache::new(42);
664 let stats = cache.stats();
665 assert_eq!(stats.capacity, 42);
666 }
667
668 #[test]
669 fn resize_to_zero_becomes_one() {
670 let mut cache = WidthCache::new(100);
671 cache.resize(0);
672 assert_eq!(cache.capacity(), 1);
673 }
674
675 #[test]
676 fn get_updates_lru_order() {
677 let mut cache = WidthCache::new(2);
678
679 cache.get_or_compute("a");
680 cache.get_or_compute("b");
681
682 let _ = cache.get("a");
684
685 cache.get_or_compute("c");
687
688 assert!(cache.contains("a"));
689 assert!(!cache.contains("b"));
690 assert!(cache.contains("c"));
691 }
692
693 #[test]
694 fn contains_does_not_modify_stats() {
695 let mut cache = WidthCache::new(100);
696 cache.get_or_compute("hello");
697
698 let stats_before = cache.stats();
699 let _ = cache.contains("hello");
700 let _ = cache.contains("missing");
701 let stats_after = cache.stats();
702
703 assert_eq!(stats_before.hits, stats_after.hits);
704 assert_eq!(stats_before.misses, stats_after.misses);
705 }
706
707 #[test]
708 fn peek_returns_none_for_missing() {
709 let cache = WidthCache::new(100);
710 assert!(cache.peek("missing").is_none());
711 }
712
713 #[test]
714 fn custom_compute_called_once() {
715 let mut cache = WidthCache::new(100);
716 let mut call_count = 0;
717
718 cache.get_or_compute_with("test", |_| {
719 call_count += 1;
720 10
721 });
722
723 cache.get_or_compute_with("test", |_| {
724 call_count += 1;
725 20 });
727
728 assert_eq!(call_count, 1);
729 assert_eq!(cache.peek("test"), Some(10));
730 }
731
732 #[test]
733 fn whitespace_strings() {
734 let mut cache = WidthCache::new(100);
735 assert_eq!(cache.get_or_compute(" "), 3); assert_eq!(cache.get_or_compute("\t"), 1); assert_eq!(cache.get_or_compute("\n"), 1); }
739}
740
741#[derive(Debug, Clone)]
795pub struct CountMinSketch {
796 counters: Vec<Vec<u8>>,
798 depth: usize,
800 width: usize,
802 total_increments: u64,
804 reset_interval: u64,
806}
807
808const CMS_MAX_COUNT: u8 = 15;
810
811const CMS_DEFAULT_WIDTH: usize = 1024;
813
814const CMS_DEFAULT_DEPTH: usize = 4;
816
817const CMS_DEFAULT_RESET_INTERVAL: u64 = 8192;
819
820impl CountMinSketch {
821 pub fn new(width: usize, depth: usize, reset_interval: u64) -> Self {
823 let width = width.max(1);
824 let depth = depth.max(1);
825 Self {
826 counters: vec![vec![0u8; width]; depth],
827 depth,
828 width,
829 total_increments: 0,
830 reset_interval: reset_interval.max(1),
831 }
832 }
833
834 pub fn with_defaults() -> Self {
836 Self::new(
837 CMS_DEFAULT_WIDTH,
838 CMS_DEFAULT_DEPTH,
839 CMS_DEFAULT_RESET_INTERVAL,
840 )
841 }
842
843 pub fn increment(&mut self, hash: u64) {
845 for row in 0..self.depth {
846 let idx = self.index(hash, row);
847 self.counters[row][idx] = self.counters[row][idx].saturating_add(1).min(CMS_MAX_COUNT);
848 }
849 self.total_increments += 1;
850
851 if self.total_increments >= self.reset_interval {
852 self.halve();
853 }
854 }
855
856 pub fn estimate(&self, hash: u64) -> u8 {
858 let mut min = u8::MAX;
859 for row in 0..self.depth {
860 let idx = self.index(hash, row);
861 min = min.min(self.counters[row][idx]);
862 }
863 min
864 }
865
866 pub fn total_increments(&self) -> u64 {
868 self.total_increments
869 }
870
871 fn halve(&mut self) {
873 for row in &mut self.counters {
874 for c in row.iter_mut() {
875 *c /= 2;
876 }
877 }
878 self.total_increments = 0;
879 }
880
881 pub fn clear(&mut self) {
883 for row in &mut self.counters {
884 row.fill(0);
885 }
886 self.total_increments = 0;
887 }
888
889 #[inline]
891 fn index(&self, hash: u64, row: usize) -> usize {
892 let mixed = hash
894 .wrapping_mul(0x517c_c1b7_2722_0a95)
895 .wrapping_add(row as u64);
896 let mixed = mixed ^ (mixed >> 32);
897 (mixed as usize) % self.width
898 }
899}
900
901#[derive(Debug, Clone)]
907pub struct Doorkeeper {
908 bits: Vec<u64>,
909 num_bits: usize,
910}
911
912const DOORKEEPER_DEFAULT_BITS: usize = 2048;
914
915impl Doorkeeper {
916 pub fn new(num_bits: usize) -> Self {
918 let num_bits = num_bits.max(64);
919 let num_words = num_bits.div_ceil(64);
920 Self {
921 bits: vec![0u64; num_words],
922 num_bits,
923 }
924 }
925
926 pub fn with_defaults() -> Self {
928 Self::new(DOORKEEPER_DEFAULT_BITS)
929 }
930
931 pub fn check_and_set(&mut self, hash: u64) -> bool {
933 let idx = (hash as usize) % self.num_bits;
934 let word = idx / 64;
935 let bit = idx % 64;
936 let was_set = (self.bits[word] >> bit) & 1 == 1;
937 self.bits[word] |= 1 << bit;
938 was_set
939 }
940
941 pub fn contains(&self, hash: u64) -> bool {
943 let idx = (hash as usize) % self.num_bits;
944 let word = idx / 64;
945 let bit = idx % 64;
946 (self.bits[word] >> bit) & 1 == 1
947 }
948
949 pub fn clear(&mut self) {
951 self.bits.fill(0);
952 }
953}
954
955#[inline]
960pub fn fingerprint_hash(text: &str) -> u64 {
961 let mut h: u64 = 0xcbf2_9ce4_8422_2325; for &b in text.as_bytes() {
964 h ^= b as u64;
965 h = h.wrapping_mul(0x0100_0000_01b3); }
967 h
968}
969
970#[inline]
977pub fn tinylfu_admit(candidate_freq: u8, victim_freq: u8) -> bool {
978 candidate_freq > victim_freq
979}
980
981#[derive(Debug, Clone)]
987struct TinyLfuEntry {
988 width: usize,
989 fingerprint: u64,
990}
991
992#[derive(Debug)]
1015pub struct TinyLfuWidthCache {
1016 window: LruCache<u64, TinyLfuEntry>,
1018 main: LruCache<u64, TinyLfuEntry>,
1020 sketch: CountMinSketch,
1022 doorkeeper: Doorkeeper,
1024 total_capacity: usize,
1026 hits: u64,
1028 misses: u64,
1029}
1030
1031impl TinyLfuWidthCache {
1032 pub fn new(total_capacity: usize) -> Self {
1036 let total_capacity = total_capacity.max(2);
1037 let window_cap = (total_capacity / 100).max(1);
1038 let main_cap = total_capacity - window_cap;
1039
1040 Self {
1041 window: LruCache::new(NonZeroUsize::new(window_cap).unwrap()),
1042 main: LruCache::new(NonZeroUsize::new(main_cap.max(1)).unwrap()),
1043 sketch: CountMinSketch::with_defaults(),
1044 doorkeeper: Doorkeeper::with_defaults(),
1045 total_capacity,
1046 hits: 0,
1047 misses: 0,
1048 }
1049 }
1050
1051 pub fn get_or_compute(&mut self, text: &str) -> usize {
1053 self.get_or_compute_with(text, crate::display_width)
1054 }
1055
1056 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1058 where
1059 F: FnOnce(&str) -> usize,
1060 {
1061 let hash = hash_text(text);
1062 let fp = fingerprint_hash(text);
1063
1064 let seen = self.doorkeeper.check_and_set(hash);
1066 if seen {
1067 self.sketch.increment(hash);
1068 }
1069
1070 if let Some(entry) = self.main.get(&hash) {
1072 if entry.fingerprint == fp {
1073 self.hits += 1;
1074 return entry.width;
1075 }
1076 self.main.pop(&hash);
1078 }
1079
1080 if let Some(entry) = self.window.get(&hash) {
1082 if entry.fingerprint == fp {
1083 self.hits += 1;
1084 return entry.width;
1085 }
1086 self.window.pop(&hash);
1088 }
1089
1090 self.misses += 1;
1092 let width = compute(text);
1093 let new_entry = TinyLfuEntry {
1094 width,
1095 fingerprint: fp,
1096 };
1097
1098 if self.window.len() >= self.window.cap().get() {
1101 if let Some((evicted_hash, evicted_entry)) = self.window.pop_lru() {
1103 self.try_admit_to_main(evicted_hash, evicted_entry);
1104 }
1105 }
1106 self.window.put(hash, new_entry);
1107
1108 width
1109 }
1110
1111 fn try_admit_to_main(&mut self, candidate_hash: u64, candidate_entry: TinyLfuEntry) {
1113 let candidate_freq = self.sketch.estimate(candidate_hash);
1114
1115 if self.main.len() < self.main.cap().get() {
1116 self.main.put(candidate_hash, candidate_entry);
1118 return;
1119 }
1120
1121 if let Some((&victim_hash, _)) = self.main.peek_lru() {
1123 let victim_freq = self.sketch.estimate(victim_hash);
1124 if tinylfu_admit(candidate_freq, victim_freq) {
1125 self.main.pop_lru();
1126 self.main.put(candidate_hash, candidate_entry);
1127 }
1128 }
1130 }
1131
1132 pub fn contains(&self, text: &str) -> bool {
1134 let hash = hash_text(text);
1135 let fp = fingerprint_hash(text);
1136 if let Some(e) = self.main.peek(&hash)
1137 && e.fingerprint == fp
1138 {
1139 return true;
1140 }
1141 if let Some(e) = self.window.peek(&hash)
1142 && e.fingerprint == fp
1143 {
1144 return true;
1145 }
1146 false
1147 }
1148
1149 pub fn stats(&self) -> CacheStats {
1151 CacheStats {
1152 hits: self.hits,
1153 misses: self.misses,
1154 size: self.window.len() + self.main.len(),
1155 capacity: self.total_capacity,
1156 }
1157 }
1158
1159 pub fn clear(&mut self) {
1161 self.window.clear();
1162 self.main.clear();
1163 self.sketch.clear();
1164 self.doorkeeper.clear();
1165 }
1166
1167 pub fn reset_stats(&mut self) {
1169 self.hits = 0;
1170 self.misses = 0;
1171 }
1172
1173 pub fn len(&self) -> usize {
1175 self.window.len() + self.main.len()
1176 }
1177
1178 pub fn is_empty(&self) -> bool {
1180 self.window.is_empty() && self.main.is_empty()
1181 }
1182
1183 pub fn capacity(&self) -> usize {
1185 self.total_capacity
1186 }
1187
1188 pub fn main_len(&self) -> usize {
1190 self.main.len()
1191 }
1192
1193 pub fn window_len(&self) -> usize {
1195 self.window.len()
1196 }
1197}
1198
1199#[derive(Debug)]
1211pub struct S3FifoWidthCache {
1212 cache: ftui_core::s3_fifo::S3Fifo<u64, S3FifoEntry>,
1213 hits: u64,
1214 misses: u64,
1215 total_capacity: usize,
1216}
1217
1218#[derive(Debug, Clone, Copy)]
1220struct S3FifoEntry {
1221 width: usize,
1222 fingerprint: u64,
1223}
1224
1225impl S3FifoWidthCache {
1226 pub fn new(capacity: usize) -> Self {
1228 let capacity = capacity.max(2);
1229 Self {
1230 cache: ftui_core::s3_fifo::S3Fifo::new(capacity),
1231 hits: 0,
1232 misses: 0,
1233 total_capacity: capacity,
1234 }
1235 }
1236
1237 #[must_use]
1239 pub fn with_default_capacity() -> Self {
1240 Self::new(DEFAULT_CACHE_CAPACITY)
1241 }
1242
1243 pub fn get_or_compute(&mut self, text: &str) -> usize {
1245 self.get_or_compute_with(text, crate::display_width)
1246 }
1247
1248 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1250 where
1251 F: FnOnce(&str) -> usize,
1252 {
1253 let hash = hash_text(text);
1254 let fp = fingerprint_hash(text);
1255
1256 if let Some(entry) = self.cache.get(&hash) {
1257 if entry.fingerprint == fp {
1258 self.hits += 1;
1259 return entry.width;
1260 }
1261 self.cache.remove(&hash);
1263 }
1264
1265 self.misses += 1;
1267 let width = compute(text);
1268 self.cache.insert(
1269 hash,
1270 S3FifoEntry {
1271 width,
1272 fingerprint: fp,
1273 },
1274 );
1275 width
1276 }
1277
1278 pub fn contains(&self, text: &str) -> bool {
1280 let hash = hash_text(text);
1281 self.cache.contains_key(&hash)
1282 }
1283
1284 pub fn stats(&self) -> CacheStats {
1286 CacheStats {
1287 hits: self.hits,
1288 misses: self.misses,
1289 size: self.cache.len(),
1290 capacity: self.total_capacity,
1291 }
1292 }
1293
1294 pub fn clear(&mut self) {
1296 self.cache.clear();
1297 self.hits = 0;
1298 self.misses = 0;
1299 }
1300
1301 pub fn reset_stats(&mut self) {
1303 self.hits = 0;
1304 self.misses = 0;
1305 }
1306
1307 pub fn len(&self) -> usize {
1309 self.cache.len()
1310 }
1311
1312 pub fn is_empty(&self) -> bool {
1314 self.cache.is_empty()
1315 }
1316
1317 pub fn capacity(&self) -> usize {
1319 self.total_capacity
1320 }
1321}
1322
1323impl Default for S3FifoWidthCache {
1324 fn default() -> Self {
1325 Self::with_default_capacity()
1326 }
1327}
1328
1329#[cfg(test)]
1330mod s3_fifo_width_tests {
1331 use super::*;
1332
1333 #[test]
1334 fn s3fifo_new_cache_is_empty() {
1335 let cache = S3FifoWidthCache::new(100);
1336 assert!(cache.is_empty());
1337 assert_eq!(cache.len(), 0);
1338 }
1339
1340 #[test]
1341 fn s3fifo_get_or_compute_caches_value() {
1342 let mut cache = S3FifoWidthCache::new(100);
1343 let w1 = cache.get_or_compute("hello");
1344 assert_eq!(w1, 5);
1345 assert_eq!(cache.len(), 1);
1346
1347 let w2 = cache.get_or_compute("hello");
1348 assert_eq!(w2, 5);
1349 assert_eq!(cache.len(), 1);
1350
1351 let stats = cache.stats();
1352 assert_eq!(stats.hits, 1);
1353 assert_eq!(stats.misses, 1);
1354 }
1355
1356 #[test]
1357 fn s3fifo_different_strings() {
1358 let mut cache = S3FifoWidthCache::new(100);
1359 cache.get_or_compute("hello");
1360 cache.get_or_compute("world");
1361 cache.get_or_compute("foo");
1362 assert_eq!(cache.len(), 3);
1363 }
1364
1365 #[test]
1366 fn s3fifo_cjk_width() {
1367 let mut cache = S3FifoWidthCache::new(100);
1368 let w = cache.get_or_compute("你好");
1369 assert_eq!(w, 4);
1370 }
1371
1372 #[test]
1373 fn s3fifo_contains() {
1374 let mut cache = S3FifoWidthCache::new(100);
1375 assert!(!cache.contains("hello"));
1376 cache.get_or_compute("hello");
1377 assert!(cache.contains("hello"));
1378 }
1379
1380 #[test]
1381 fn s3fifo_clear_resets() {
1382 let mut cache = S3FifoWidthCache::new(100);
1383 cache.get_or_compute("hello");
1384 cache.get_or_compute("world");
1385 cache.clear();
1386 assert!(cache.is_empty());
1387 assert!(!cache.contains("hello"));
1388 let stats = cache.stats();
1389 assert_eq!(stats.hits, 0);
1390 assert_eq!(stats.misses, 0);
1391 }
1392
1393 #[test]
1394 fn s3fifo_produces_same_widths_as_lru() {
1395 let mut lru = WidthCache::new(100);
1396 let mut s3 = S3FifoWidthCache::new(100);
1397
1398 let texts = [
1399 "hello",
1400 "你好世界",
1401 "abc",
1402 "🎉🎉",
1403 "",
1404 " ",
1405 "a\tb",
1406 "mixed中english文",
1407 ];
1408
1409 for text in &texts {
1410 let lru_w = lru.get_or_compute(text);
1411 let s3_w = s3.get_or_compute(text);
1412 assert_eq!(lru_w, s3_w, "width mismatch for {:?}", text);
1413 }
1414 }
1415
1416 #[test]
1417 fn s3fifo_scan_resistance_preserves_hot_set() {
1418 let mut cache = S3FifoWidthCache::new(50);
1419
1420 let hot: Vec<String> = (0..20).map(|i| format!("hot_{i}")).collect();
1422 for text in &hot {
1423 cache.get_or_compute(text);
1424 cache.get_or_compute(text); }
1426
1427 for i in 0..200 {
1429 cache.get_or_compute(&format!("scan_{i}"));
1430 }
1431
1432 let mut survivors = 0;
1434 for text in &hot {
1435 if cache.contains(text) {
1436 survivors += 1;
1437 }
1438 }
1439 assert!(
1440 survivors > 5,
1441 "scan resistance: only {survivors}/20 hot items survived"
1442 );
1443 }
1444
1445 #[test]
1446 fn s3fifo_default_capacity() {
1447 let cache = S3FifoWidthCache::with_default_capacity();
1448 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
1449 }
1450
1451 #[test]
1452 fn s3fifo_reset_stats() {
1453 let mut cache = S3FifoWidthCache::new(100);
1454 cache.get_or_compute("a");
1455 cache.get_or_compute("a");
1456 cache.reset_stats();
1457 let stats = cache.stats();
1458 assert_eq!(stats.hits, 0);
1459 assert_eq!(stats.misses, 0);
1460 }
1461}
1462
1463#[cfg(test)]
1464mod proptests {
1465 use super::*;
1466 use proptest::prelude::*;
1467
1468 proptest! {
1469 #[test]
1470 fn cached_width_matches_direct(s in "[a-zA-Z0-9 ]{1,50}") {
1471 let mut cache = WidthCache::new(100);
1472 let cached = cache.get_or_compute(&s);
1473 let direct = crate::display_width(&s);
1474 prop_assert_eq!(cached, direct);
1475 }
1476
1477 #[test]
1478 fn second_access_is_hit(s in "[a-zA-Z0-9]{1,20}") {
1479 let mut cache = WidthCache::new(100);
1480
1481 cache.get_or_compute(&s);
1482 let stats_before = cache.stats();
1483
1484 cache.get_or_compute(&s);
1485 let stats_after = cache.stats();
1486
1487 prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1488 prop_assert_eq!(stats_after.misses, stats_before.misses);
1489 }
1490
1491 #[test]
1492 fn lru_never_exceeds_capacity(
1493 strings in prop::collection::vec("[a-z]{1,5}", 10..100),
1494 capacity in 5usize..20
1495 ) {
1496 let mut cache = WidthCache::new(capacity);
1497
1498 for s in &strings {
1499 cache.get_or_compute(s);
1500 prop_assert!(cache.len() <= capacity);
1501 }
1502 }
1503
1504 #[test]
1505 fn preload_then_access_is_hit(s in "[a-zA-Z]{1,20}") {
1506 let mut cache = WidthCache::new(100);
1507
1508 cache.preload(&s);
1509 let stats_before = cache.stats();
1510
1511 cache.get_or_compute(&s);
1512 let stats_after = cache.stats();
1513
1514 prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1516 }
1517 }
1518}
1519
1520#[cfg(test)]
1525mod tinylfu_tests {
1526 use super::*;
1527
1528 #[test]
1531 fn unit_cms_single_key_count() {
1532 let mut cms = CountMinSketch::with_defaults();
1533 let h = hash_text("hello");
1534
1535 for _ in 0..5 {
1536 cms.increment(h);
1537 }
1538 assert_eq!(cms.estimate(h), 5);
1539 }
1540
1541 #[test]
1542 fn unit_cms_unseen_key_is_zero() {
1543 let cms = CountMinSketch::with_defaults();
1544 assert_eq!(cms.estimate(hash_text("never_seen")), 0);
1545 }
1546
1547 #[test]
1548 fn unit_cms_saturates_at_max() {
1549 let mut cms = CountMinSketch::with_defaults();
1550 let h = hash_text("hot");
1551
1552 for _ in 0..100 {
1553 cms.increment(h);
1554 }
1555 assert_eq!(cms.estimate(h), CMS_MAX_COUNT);
1556 }
1557
1558 #[test]
1559 fn unit_cms_bounds() {
1560 let mut cms = CountMinSketch::new(1024, 4, u64::MAX); let n: u64 = 1000;
1564
1565 for i in 0..n {
1567 cms.increment(i.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1));
1568 }
1569
1570 let target = 0xDEAD_BEEF_u64;
1572 for _ in 0..5 {
1573 cms.increment(target);
1574 }
1575
1576 let est = cms.estimate(target);
1577 let epsilon = std::f64::consts::E / 1024.0;
1578 let upper_bound = 5.0 + epsilon * (n + 5) as f64;
1579
1580 assert!(
1581 (est as f64) <= upper_bound,
1582 "estimate {} exceeds bound {:.1} (epsilon={:.5}, N={})",
1583 est,
1584 upper_bound,
1585 epsilon,
1586 n + 5,
1587 );
1588 assert!(est >= 5, "estimate {} should be >= true count 5", est);
1589 }
1590
1591 #[test]
1592 fn unit_cms_bounds_mass_test() {
1593 let mut cms = CountMinSketch::new(1024, 4, u64::MAX);
1594 let n = 2000u64;
1595
1596 let mut true_counts = vec![0u8; n as usize];
1597 for i in 0..n {
1598 let count = (i % 10 + 1) as u8;
1599 true_counts[i as usize] = count;
1600 for _ in 0..count {
1601 cms.increment(i);
1602 }
1603 }
1604
1605 let total = cms.total_increments();
1606 let epsilon = std::f64::consts::E / 1024.0;
1607 let mut violations = 0u32;
1608
1609 for i in 0..n {
1610 let est = cms.estimate(i);
1611 let true_c = true_counts[i as usize];
1612 let upper = true_c as f64 + epsilon * total as f64;
1613 if est as f64 > upper + 0.5 {
1614 violations += 1;
1615 }
1616 assert!(
1617 est >= true_c,
1618 "key {}: estimate {} < true count {}",
1619 i,
1620 est,
1621 true_c
1622 );
1623 }
1624
1625 let violation_rate = violations as f64 / n as f64;
1627 assert!(
1628 violation_rate <= 0.10,
1629 "violation rate {:.3} exceeds delta threshold",
1630 violation_rate,
1631 );
1632 }
1633
1634 #[test]
1635 fn unit_cms_halving_ages_counts() {
1636 let mut cms = CountMinSketch::new(64, 2, 100);
1637
1638 let h = hash_text("test");
1639 for _ in 0..10 {
1640 cms.increment(h);
1641 }
1642 assert_eq!(cms.estimate(h), 10);
1643
1644 for _ in 10..100 {
1646 cms.increment(hash_text("noise"));
1647 }
1648
1649 let est = cms.estimate(h);
1650 assert!(est <= 5, "After halving, estimate {} should be <= 5", est);
1651 }
1652
1653 #[test]
1654 fn unit_cms_monotone() {
1655 let mut cms = CountMinSketch::with_defaults();
1656 let h = hash_text("key");
1657
1658 let mut prev_est = 0u8;
1659 for _ in 0..CMS_MAX_COUNT {
1660 cms.increment(h);
1661 let est = cms.estimate(h);
1662 assert!(est >= prev_est, "estimate should be monotone");
1663 prev_est = est;
1664 }
1665 }
1666
1667 #[test]
1670 fn unit_doorkeeper_first_access_returns_false() {
1671 let mut dk = Doorkeeper::with_defaults();
1672 assert!(!dk.check_and_set(hash_text("new")));
1673 }
1674
1675 #[test]
1676 fn unit_doorkeeper_second_access_returns_true() {
1677 let mut dk = Doorkeeper::with_defaults();
1678 let h = hash_text("key");
1679 dk.check_and_set(h);
1680 assert!(dk.check_and_set(h));
1681 }
1682
1683 #[test]
1684 fn unit_doorkeeper_contains() {
1685 let mut dk = Doorkeeper::with_defaults();
1686 let h = hash_text("key");
1687 assert!(!dk.contains(h));
1688 dk.check_and_set(h);
1689 assert!(dk.contains(h));
1690 }
1691
1692 #[test]
1693 fn unit_doorkeeper_clear_resets() {
1694 let mut dk = Doorkeeper::with_defaults();
1695 let h = hash_text("key");
1696 dk.check_and_set(h);
1697 dk.clear();
1698 assert!(!dk.contains(h));
1699 assert!(!dk.check_and_set(h));
1700 }
1701
1702 #[test]
1703 fn unit_doorkeeper_false_positive_rate() {
1704 let mut dk = Doorkeeper::new(2048);
1705 let n = 100u64;
1706
1707 for i in 0..n {
1708 dk.check_and_set(i * 0x9E37_79B9 + 1);
1709 }
1710
1711 let mut false_positives = 0u32;
1712 for i in 0..1000 {
1713 let h = (i + 100_000) * 0x6A09_E667 + 7;
1714 if dk.contains(h) {
1715 false_positives += 1;
1716 }
1717 }
1718
1719 let fp_rate = false_positives as f64 / 1000.0;
1721 assert!(
1722 fp_rate < 0.15,
1723 "FP rate {:.3} too high (expected < 0.15)",
1724 fp_rate,
1725 );
1726 }
1727
1728 #[test]
1731 fn unit_admission_rule() {
1732 assert!(tinylfu_admit(5, 3)); assert!(!tinylfu_admit(3, 5)); assert!(!tinylfu_admit(3, 3)); }
1736
1737 #[test]
1738 fn unit_admission_rule_extremes() {
1739 assert!(tinylfu_admit(1, 0));
1740 assert!(!tinylfu_admit(0, 0));
1741 assert!(!tinylfu_admit(0, 1));
1742 assert!(tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT - 1));
1743 assert!(!tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT));
1744 }
1745
1746 #[test]
1749 fn unit_fingerprint_guard() {
1750 let fp1 = fingerprint_hash("hello");
1751 let fp2 = fingerprint_hash("world");
1752 let fp3 = fingerprint_hash("hello");
1753
1754 assert_ne!(
1755 fp1, fp2,
1756 "Different strings should have different fingerprints"
1757 );
1758 assert_eq!(fp1, fp3, "Same string should have same fingerprint");
1759 }
1760
1761 #[test]
1762 fn unit_fingerprint_guard_collision_rate() {
1763 let mut fps = std::collections::HashSet::new();
1764 let n = 10_000;
1765
1766 for i in 0..n {
1767 let s = format!("string_{}", i);
1768 fps.insert(fingerprint_hash(&s));
1769 }
1770
1771 let collisions = n - fps.len();
1772 assert!(
1773 collisions == 0,
1774 "Expected 0 collisions in 10k items, got {}",
1775 collisions,
1776 );
1777 }
1778
1779 #[test]
1780 fn unit_fingerprint_independent_of_primary_hash() {
1781 let text = "test_string";
1782 let primary = hash_text(text);
1783 let secondary = fingerprint_hash(text);
1784
1785 assert_ne!(
1786 primary, secondary,
1787 "Fingerprint and primary hash should differ"
1788 );
1789 }
1790
1791 #[test]
1794 fn unit_doorkeeper_cms_pipeline() {
1795 let mut dk = Doorkeeper::with_defaults();
1796 let mut cms = CountMinSketch::with_defaults();
1797 let h = hash_text("item");
1798
1799 assert!(!dk.check_and_set(h));
1801 assert_eq!(cms.estimate(h), 0);
1802
1803 assert!(dk.check_and_set(h));
1805 cms.increment(h);
1806 assert_eq!(cms.estimate(h), 1);
1807
1808 assert!(dk.check_and_set(h));
1810 cms.increment(h);
1811 assert_eq!(cms.estimate(h), 2);
1812 }
1813
1814 #[test]
1815 fn unit_doorkeeper_filters_one_hit_wonders() {
1816 let mut dk = Doorkeeper::with_defaults();
1817 let mut cms = CountMinSketch::with_defaults();
1818
1819 for i in 0u64..100 {
1821 let h = i * 0x9E37_79B9 + 1;
1822 let seen = dk.check_and_set(h);
1823 if seen {
1824 cms.increment(h);
1825 }
1826 }
1827
1828 assert_eq!(cms.total_increments(), 0);
1829
1830 let h = 1; assert!(dk.check_and_set(h));
1833 cms.increment(h);
1834 assert_eq!(cms.total_increments(), 1);
1835 }
1836}
1837
1838#[cfg(test)]
1843mod tinylfu_impl_tests {
1844 use super::*;
1845
1846 #[test]
1847 fn basic_get_or_compute() {
1848 let mut cache = TinyLfuWidthCache::new(100);
1849 let w = cache.get_or_compute("hello");
1850 assert_eq!(w, 5);
1851 assert_eq!(cache.len(), 1);
1852
1853 let w2 = cache.get_or_compute("hello");
1854 assert_eq!(w2, 5);
1855 let stats = cache.stats();
1856 assert_eq!(stats.misses, 1);
1857 assert_eq!(stats.hits, 1);
1858 }
1859
1860 #[test]
1861 fn window_to_main_promotion() {
1862 let mut cache = TinyLfuWidthCache::new(100);
1865
1866 for _ in 0..10 {
1868 cache.get_or_compute("frequent");
1869 }
1870
1871 for i in 0..5 {
1873 cache.get_or_compute(&format!("item_{}", i));
1874 }
1875
1876 assert!(cache.contains("frequent"));
1878 assert!(cache.main_len() > 0 || cache.window_len() > 0);
1879 }
1880
1881 #[test]
1882 fn unit_window_promotion() {
1883 let mut cache = TinyLfuWidthCache::new(50);
1885
1886 for _ in 0..20 {
1888 cache.get_or_compute("hot");
1889 }
1890
1891 for i in 0..10 {
1893 cache.get_or_compute(&format!("filler_{}", i));
1894 }
1895
1896 assert!(cache.contains("hot"), "Frequent item should be retained");
1898 }
1899
1900 #[test]
1901 fn fingerprint_guard_detects_collision() {
1902 let mut cache = TinyLfuWidthCache::new(100);
1903
1904 let w = cache.get_or_compute_with("hello", |_| 42);
1906 assert_eq!(w, 42);
1907
1908 assert!(cache.contains("hello"));
1910 }
1911
1912 #[test]
1913 fn admission_rejects_infrequent() {
1914 let mut cache = TinyLfuWidthCache::new(10); for i in 0..9 {
1920 let s = format!("hot_{}", i);
1921 for _ in 0..5 {
1922 cache.get_or_compute(&s);
1923 }
1924 }
1925
1926 for i in 0..20 {
1929 cache.get_or_compute(&format!("cold_{}", i));
1930 }
1931
1932 let hot_survivors: usize = (0..9)
1934 .filter(|i| cache.contains(&format!("hot_{}", i)))
1935 .count();
1936 assert!(
1937 hot_survivors >= 5,
1938 "Expected most hot items to survive, got {}/9",
1939 hot_survivors
1940 );
1941 }
1942
1943 #[test]
1944 fn clear_empties_everything() {
1945 let mut cache = TinyLfuWidthCache::new(100);
1946 cache.get_or_compute("a");
1947 cache.get_or_compute("b");
1948 cache.clear();
1949 assert!(cache.is_empty());
1950 assert_eq!(cache.len(), 0);
1951 }
1952
1953 #[test]
1954 fn stats_reflect_usage() {
1955 let mut cache = TinyLfuWidthCache::new(100);
1956 cache.get_or_compute("a");
1957 cache.get_or_compute("a");
1958 cache.get_or_compute("b");
1959
1960 let stats = cache.stats();
1961 assert_eq!(stats.misses, 2);
1962 assert_eq!(stats.hits, 1);
1963 assert_eq!(stats.size, 2);
1964 }
1965
1966 #[test]
1967 fn capacity_is_respected() {
1968 let mut cache = TinyLfuWidthCache::new(20);
1969
1970 for i in 0..100 {
1971 cache.get_or_compute(&format!("item_{}", i));
1972 }
1973
1974 assert!(
1975 cache.len() <= 20,
1976 "Cache size {} exceeds capacity 20",
1977 cache.len()
1978 );
1979 }
1980
1981 #[test]
1982 fn reset_stats_works() {
1983 let mut cache = TinyLfuWidthCache::new(100);
1984 cache.get_or_compute("x");
1985 cache.get_or_compute("x");
1986 cache.reset_stats();
1987 let stats = cache.stats();
1988 assert_eq!(stats.hits, 0);
1989 assert_eq!(stats.misses, 0);
1990 }
1991
1992 #[test]
1993 fn perf_cache_hit_rate() {
1994 let mut cache = TinyLfuWidthCache::new(50);
1997
1998 for _ in 0..20 {
2000 for i in 0..10 {
2001 cache.get_or_compute(&format!("hot_{}", i));
2002 }
2003 }
2004
2005 for i in 0..100 {
2007 cache.get_or_compute(&format!("cold_{}", i));
2008 }
2009
2010 cache.reset_stats();
2012 for i in 0..10 {
2013 cache.get_or_compute(&format!("hot_{}", i));
2014 }
2015
2016 let stats = cache.stats();
2017 assert!(
2019 stats.hits >= 5,
2020 "Expected at least 5/10 hot items to hit, got {}",
2021 stats.hits
2022 );
2023 }
2024
2025 #[test]
2026 fn unicode_strings_work() {
2027 let mut cache = TinyLfuWidthCache::new(100);
2028 assert_eq!(cache.get_or_compute("日本語"), 6);
2029 assert_eq!(cache.get_or_compute("café"), 4);
2030 assert_eq!(cache.get_or_compute("日本語"), 6); assert_eq!(cache.stats().hits, 1);
2032 }
2033
2034 #[test]
2035 fn empty_string() {
2036 let mut cache = TinyLfuWidthCache::new(100);
2037 assert_eq!(cache.get_or_compute(""), 0);
2038 }
2039
2040 #[test]
2041 fn minimum_capacity() {
2042 let cache = TinyLfuWidthCache::new(0);
2043 assert!(cache.capacity() >= 2);
2044 }
2045}
2046
2047#[cfg(test)]
2053struct Lcg(u64);
2054
2055#[cfg(test)]
2056impl Lcg {
2057 fn new(seed: u64) -> Self {
2058 Self(seed)
2059 }
2060 fn next_u64(&mut self) -> u64 {
2061 self.0 = self
2062 .0
2063 .wrapping_mul(6_364_136_223_846_793_005)
2064 .wrapping_add(1);
2065 self.0
2066 }
2067 fn next_usize(&mut self, max: usize) -> usize {
2068 (self.next_u64() % (max as u64)) as usize
2069 }
2070}
2071
2072#[cfg(test)]
2077mod property_cache_equivalence {
2078 use super::*;
2079 use proptest::prelude::*;
2080
2081 proptest! {
2082 #[test]
2083 fn tinylfu_cached_equals_computed(s in "[a-zA-Z0-9 ]{1,80}") {
2084 let mut cache = TinyLfuWidthCache::new(200);
2085 let cached = cache.get_or_compute(&s);
2086 let direct = crate::display_width(&s);
2087 prop_assert_eq!(cached, direct,
2088 "TinyLFU returned {} but display_width says {} for {:?}", cached, direct, s);
2089 }
2090
2091 #[test]
2092 fn tinylfu_second_access_same_value(s in "[a-zA-Z0-9]{1,40}") {
2093 let mut cache = TinyLfuWidthCache::new(200);
2094 let first = cache.get_or_compute(&s);
2095 let second = cache.get_or_compute(&s);
2096 prop_assert_eq!(first, second,
2097 "First access returned {} but second returned {} for {:?}", first, second, s);
2098 }
2099
2100 #[test]
2101 fn tinylfu_never_exceeds_capacity(
2102 strings in prop::collection::vec("[a-z]{1,5}", 10..200),
2103 capacity in 10usize..50
2104 ) {
2105 let mut cache = TinyLfuWidthCache::new(capacity);
2106 for s in &strings {
2107 cache.get_or_compute(s);
2108 prop_assert!(cache.len() <= capacity,
2109 "Cache size {} exceeded capacity {}", cache.len(), capacity);
2110 }
2111 }
2112
2113 #[test]
2114 fn tinylfu_custom_fn_matches(s in "[a-z]{1,20}") {
2115 let mut cache = TinyLfuWidthCache::new(100);
2116 let custom_fn = |text: &str| text.len(); let cached = cache.get_or_compute_with(&s, custom_fn);
2118 prop_assert_eq!(cached, s.len(),
2119 "Custom fn: cached {} != expected {} for {:?}", cached, s.len(), s);
2120 }
2121 }
2122
2123 #[test]
2124 fn deterministic_seed_equivalence() {
2125 let mut rng = super::Lcg::new(0xDEAD_BEEF);
2127
2128 let mut cache1 = TinyLfuWidthCache::new(50);
2129 let mut results1 = Vec::new();
2130 for _ in 0..500 {
2131 let idx = rng.next_usize(100);
2132 let s = format!("key_{}", idx);
2133 results1.push(cache1.get_or_compute(&s));
2134 }
2135
2136 let mut rng2 = super::Lcg::new(0xDEAD_BEEF);
2137 let mut cache2 = TinyLfuWidthCache::new(50);
2138 let mut results2 = Vec::new();
2139 for _ in 0..500 {
2140 let idx = rng2.next_usize(100);
2141 let s = format!("key_{}", idx);
2142 results2.push(cache2.get_or_compute(&s));
2143 }
2144
2145 assert_eq!(
2146 results1, results2,
2147 "Deterministic seed must produce identical results"
2148 );
2149 }
2150
2151 #[test]
2152 fn both_caches_agree_on_widths() {
2153 let mut lru = WidthCache::new(200);
2156 let mut tlfu = TinyLfuWidthCache::new(200);
2157
2158 let inputs = [
2159 "",
2160 "hello",
2161 "日本語テスト",
2162 "café résumé",
2163 "a\tb",
2164 "🎉🎊🎈",
2165 "mixed日本eng",
2166 " spaces ",
2167 "AAAAAAAAAAAAAAAAAAAAAAAAA",
2168 "x",
2169 ];
2170
2171 for &s in &inputs {
2172 let w1 = lru.get_or_compute(s);
2173 let w2 = tlfu.get_or_compute(s);
2174 assert_eq!(
2175 w1, w2,
2176 "Width mismatch for {:?}: LRU={}, TinyLFU={}",
2177 s, w1, w2
2178 );
2179 }
2180 }
2181}
2182
2183#[cfg(test)]
2188mod e2e_cache_replay {
2189 use super::*;
2190 use std::time::Instant;
2191
2192 #[derive(Debug)]
2194 struct ReplayRecord {
2195 step: usize,
2196 key: String,
2197 width: usize,
2198 hit: bool,
2199 latency_ns: u128,
2200 }
2201
2202 impl ReplayRecord {
2203 fn to_jsonl(&self) -> String {
2204 format!(
2205 r#"{{"step":{},"key":"{}","width":{},"hit":{},"latency_ns":{}}}"#,
2206 self.step, self.key, self.width, self.hit, self.latency_ns,
2207 )
2208 }
2209 }
2210
2211 fn zipfian_workload(rng: &mut super::Lcg, n: usize, universe: usize) -> Vec<String> {
2212 (0..n)
2214 .map(|_| {
2215 let raw = rng.next_usize(universe * universe);
2216 let idx = (raw as f64).sqrt() as usize % universe;
2217 format!("item_{}", idx)
2218 })
2219 .collect()
2220 }
2221
2222 #[test]
2223 fn replay_zipfian_tinylfu() {
2224 let mut rng = super::Lcg::new(0x1234_5678);
2225 let workload = zipfian_workload(&mut rng, 2000, 200);
2226
2227 let mut cache = TinyLfuWidthCache::new(50);
2228 let mut records = Vec::with_capacity(workload.len());
2229
2230 for (i, key) in workload.iter().enumerate() {
2231 let stats_before = cache.stats();
2232 let t0 = Instant::now();
2233 let width = cache.get_or_compute(key);
2234 let elapsed = t0.elapsed().as_nanos();
2235 let stats_after = cache.stats();
2236 let hit = stats_after.hits > stats_before.hits;
2237
2238 records.push(ReplayRecord {
2239 step: i,
2240 key: key.clone(),
2241 width,
2242 hit,
2243 latency_ns: elapsed,
2244 });
2245 }
2246
2247 for r in &records[..5] {
2249 let line = r.to_jsonl();
2250 assert!(
2251 line.starts_with('{') && line.ends_with('}'),
2252 "Bad JSONL: {}",
2253 line
2254 );
2255 }
2256
2257 let total = records.len();
2259 let hits = records.iter().filter(|r| r.hit).count();
2260 let hit_rate = hits as f64 / total as f64;
2261
2262 assert!(
2265 hit_rate > 0.10,
2266 "Zipfian hit rate too low: {:.2}% ({}/{})",
2267 hit_rate * 100.0,
2268 hits,
2269 total
2270 );
2271 }
2272
2273 #[test]
2274 fn replay_zipfian_lru_comparison() {
2275 let mut rng = super::Lcg::new(0x1234_5678);
2276 let workload = zipfian_workload(&mut rng, 2000, 200);
2277
2278 let mut tlfu = TinyLfuWidthCache::new(50);
2279 let mut lru = WidthCache::new(50);
2280
2281 for key in &workload {
2282 tlfu.get_or_compute(key);
2283 lru.get_or_compute(key);
2284 }
2285
2286 let tlfu_stats = tlfu.stats();
2287 let lru_stats = lru.stats();
2288
2289 assert_eq!(tlfu_stats.hits + tlfu_stats.misses, 2000);
2291 assert_eq!(lru_stats.hits + lru_stats.misses, 2000);
2292
2293 assert!(
2296 tlfu_stats.hit_rate() >= lru_stats.hit_rate() * 0.8,
2297 "TinyLFU hit rate {:.2}% much worse than LRU {:.2}%",
2298 tlfu_stats.hit_rate() * 100.0,
2299 lru_stats.hit_rate() * 100.0,
2300 );
2301 }
2302
2303 #[test]
2304 fn replay_deterministic_reproduction() {
2305 let run = |seed: u64| -> Vec<bool> {
2307 let mut rng = super::Lcg::new(seed);
2308 let workload = zipfian_workload(&mut rng, 500, 100);
2309 let mut cache = TinyLfuWidthCache::new(30);
2310 let mut hits = Vec::with_capacity(500);
2311 for key in &workload {
2312 let before = cache.stats().hits;
2313 cache.get_or_compute(key);
2314 hits.push(cache.stats().hits > before);
2315 }
2316 hits
2317 };
2318
2319 let run1 = run(0xABCD_EF01);
2320 let run2 = run(0xABCD_EF01);
2321 assert_eq!(run1, run2, "Deterministic replay diverged");
2322 }
2323
2324 #[test]
2325 fn replay_uniform_workload() {
2326 let mut rng = super::Lcg::new(0x9999);
2329 let universe = 100;
2330 let cache_size = 25;
2331 let n = 5000;
2332
2333 let mut cache = TinyLfuWidthCache::new(cache_size);
2334
2335 for i in 0..universe {
2337 cache.get_or_compute(&format!("u_{}", i));
2338 }
2339
2340 cache.reset_stats();
2341 for _ in 0..n {
2342 let idx = rng.next_usize(universe);
2343 cache.get_or_compute(&format!("u_{}", idx));
2344 }
2345
2346 let stats = cache.stats();
2347 let hit_rate = stats.hit_rate();
2348 assert!(
2350 hit_rate > 0.10 && hit_rate < 0.60,
2351 "Uniform hit rate {:.2}% outside expected range",
2352 hit_rate * 100.0,
2353 );
2354 }
2355}
2356
2357#[cfg(test)]
2362mod perf_cache_overhead {
2363 use super::*;
2364 use std::time::Instant;
2365
2366 fn measure_latencies<F: FnMut(usize)>(n: usize, mut op: F) -> Vec<u128> {
2368 let mut latencies = Vec::with_capacity(n);
2369 for i in 0..n {
2370 let t0 = Instant::now();
2371 op(i);
2372 latencies.push(t0.elapsed().as_nanos());
2373 }
2374 latencies.sort_unstable();
2375 latencies
2376 }
2377
2378 fn p95(sorted: &[u128]) -> u128 {
2379 let len = sorted.len();
2380 let idx = ((len as f64 * 0.95) as usize).min(len.saturating_sub(1));
2381 sorted[idx]
2382 }
2383
2384 fn p99(sorted: &[u128]) -> u128 {
2385 let len = sorted.len();
2386 let idx = ((len as f64 * 0.99) as usize).min(len.saturating_sub(1));
2387 sorted[idx]
2388 }
2389
2390 fn median(sorted: &[u128]) -> u128 {
2391 sorted[sorted.len() / 2]
2392 }
2393
2394 #[allow(unexpected_cfgs)]
2395 fn perf_budget_ns(base_ns: u128) -> u128 {
2396 if cfg!(coverage) || cfg!(coverage_nightly) {
2397 base_ns.saturating_mul(10)
2398 } else {
2399 base_ns
2400 }
2401 }
2402
2403 #[test]
2404 fn perf_lru_hit_latency() {
2405 let mut cache = WidthCache::new(1000);
2406 for i in 0..100 {
2408 cache.get_or_compute(&format!("warm_{}", i));
2409 }
2410
2411 let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2412 let latencies = measure_latencies(10_000, |i| {
2413 let _ = cache.get_or_compute(&keys[i % 100]);
2414 });
2415
2416 let p95_ns = p95(&latencies);
2417 let budget_ns = perf_budget_ns(5_000);
2420 assert!(
2421 p95_ns < budget_ns,
2422 "LRU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2423 p95_ns,
2424 budget_ns,
2425 median(&latencies),
2426 p99(&latencies),
2427 );
2428 }
2429
2430 #[test]
2431 fn perf_tinylfu_hit_latency() {
2432 let mut cache = TinyLfuWidthCache::new(1000);
2433 for _ in 0..5 {
2435 for i in 0..100 {
2436 cache.get_or_compute(&format!("warm_{}", i));
2437 }
2438 }
2439
2440 let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2441 let latencies = measure_latencies(10_000, |i| {
2442 let _ = cache.get_or_compute(&keys[i % 100]);
2443 });
2444
2445 let p95_ns = p95(&latencies);
2446 let budget_ns = perf_budget_ns(5_000);
2448 assert!(
2449 p95_ns < budget_ns,
2450 "TinyLFU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2451 p95_ns,
2452 budget_ns,
2453 median(&latencies),
2454 p99(&latencies),
2455 );
2456 }
2457
2458 #[test]
2459 fn perf_tinylfu_miss_latency() {
2460 let mut cache = TinyLfuWidthCache::new(100);
2461 let keys: Vec<String> = (0..10_000).map(|i| format!("miss_{}", i)).collect();
2462
2463 let latencies = measure_latencies(10_000, |i| {
2464 let _ = cache.get_or_compute(&keys[i]);
2465 });
2466
2467 let p95_ns = p95(&latencies);
2468 let budget_ns = perf_budget_ns(20_000);
2471 assert!(
2472 p95_ns < budget_ns,
2473 "TinyLFU miss p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2474 p95_ns,
2475 budget_ns,
2476 median(&latencies),
2477 p99(&latencies),
2478 );
2479 }
2480
2481 #[test]
2482 fn perf_cms_increment_latency() {
2483 let mut cms = CountMinSketch::with_defaults();
2484 let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("k{}", i))).collect();
2485
2486 let latencies = measure_latencies(10_000, |i| {
2487 cms.increment(hashes[i]);
2488 });
2489
2490 let p95_ns = p95(&latencies);
2491 let budget_ns = perf_budget_ns(2_000);
2493 assert!(
2494 p95_ns < budget_ns,
2495 "CMS increment p95 = {}ns exceeds {}ns budget (median={}ns)",
2496 p95_ns,
2497 budget_ns,
2498 median(&latencies),
2499 );
2500 }
2501
2502 #[test]
2503 fn perf_doorkeeper_latency() {
2504 let mut dk = Doorkeeper::with_defaults();
2505 let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("d{}", i))).collect();
2506
2507 let latencies = measure_latencies(10_000, |i| {
2508 let _ = dk.check_and_set(hashes[i]);
2509 });
2510
2511 let p95_ns = p95(&latencies);
2512 let budget_ns = perf_budget_ns(1_000);
2514 assert!(
2515 p95_ns < budget_ns,
2516 "Doorkeeper p95 = {}ns exceeds {}ns budget (median={}ns)",
2517 p95_ns,
2518 budget_ns,
2519 median(&latencies),
2520 );
2521 }
2522
2523 #[test]
2524 fn perf_fingerprint_hash_latency() {
2525 let keys: Vec<String> = (0..10_000).map(|i| format!("fp_{}", i)).collect();
2526
2527 let latencies = measure_latencies(10_000, |i| {
2528 let _ = fingerprint_hash(&keys[i]);
2529 });
2530
2531 let p95_ns = p95(&latencies);
2532 let budget_ns = perf_budget_ns(1_000);
2534 assert!(
2535 p95_ns < budget_ns,
2536 "fingerprint_hash p95 = {}ns exceeds {}ns budget (median={}ns)",
2537 p95_ns,
2538 budget_ns,
2539 median(&latencies),
2540 );
2541 }
2542}