1#![forbid(unsafe_code)]
2
3use lru::LruCache;
37use rustc_hash::FxHasher;
38use std::hash::{Hash, Hasher};
39use std::num::NonZeroUsize;
40
41pub const DEFAULT_CACHE_CAPACITY: usize = 4096;
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
46pub struct CacheStats {
47 pub hits: u64,
49 pub misses: u64,
51 pub size: usize,
53 pub capacity: usize,
55}
56
57impl CacheStats {
58 #[must_use]
60 pub fn hit_rate(&self) -> f64 {
61 let total = self.hits + self.misses;
62 if total == 0 {
63 0.0
64 } else {
65 self.hits as f64 / total as f64
66 }
67 }
68}
69
70#[derive(Debug)]
92pub struct WidthCache {
93 cache: LruCache<u64, usize>,
94 hits: u64,
95 misses: u64,
96}
97
98impl WidthCache {
99 #[must_use]
103 pub fn new(capacity: usize) -> Self {
104 let capacity = NonZeroUsize::new(capacity.max(1)).expect("capacity must be > 0");
105 Self {
106 cache: LruCache::new(capacity),
107 hits: 0,
108 misses: 0,
109 }
110 }
111
112 #[must_use]
114 pub fn with_default_capacity() -> Self {
115 Self::new(DEFAULT_CACHE_CAPACITY)
116 }
117
118 #[inline]
123 pub fn get_or_compute(&mut self, text: &str) -> usize {
124 self.get_or_compute_with(text, crate::display_width)
125 }
126
127 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
132 where
133 F: FnOnce(&str) -> usize,
134 {
135 let hash = hash_text(text);
136
137 if let Some(&width) = self.cache.get(&hash) {
138 self.hits += 1;
139 return width;
140 }
141
142 self.misses += 1;
143 let width = compute(text);
144 self.cache.put(hash, width);
145 width
146 }
147
148 #[must_use]
150 pub fn contains(&self, text: &str) -> bool {
151 let hash = hash_text(text);
152 self.cache.contains(&hash)
153 }
154
155 #[must_use]
160 pub fn get(&mut self, text: &str) -> Option<usize> {
161 let hash = hash_text(text);
162 self.cache.get(&hash).copied()
163 }
164
165 #[must_use]
167 pub fn peek(&self, text: &str) -> Option<usize> {
168 let hash = hash_text(text);
169 self.cache.peek(&hash).copied()
170 }
171
172 pub fn preload(&mut self, text: &str) {
176 let hash = hash_text(text);
177 if !self.cache.contains(&hash) {
178 let width = crate::display_width(text);
179 self.cache.put(hash, width);
180 }
181 }
182
183 pub fn preload_many<'a>(&mut self, texts: impl IntoIterator<Item = &'a str>) {
185 for text in texts {
186 self.preload(text);
187 }
188 }
189
190 pub fn clear(&mut self) {
192 self.cache.clear();
193 }
194
195 pub fn reset_stats(&mut self) {
197 self.hits = 0;
198 self.misses = 0;
199 }
200
201 #[inline]
203 #[must_use]
204 pub fn stats(&self) -> CacheStats {
205 CacheStats {
206 hits: self.hits,
207 misses: self.misses,
208 size: self.cache.len(),
209 capacity: self.cache.cap().get(),
210 }
211 }
212
213 #[inline]
215 #[must_use]
216 pub fn len(&self) -> usize {
217 self.cache.len()
218 }
219
220 #[inline]
222 #[must_use]
223 pub fn is_empty(&self) -> bool {
224 self.cache.is_empty()
225 }
226
227 #[inline]
229 #[must_use]
230 pub fn capacity(&self) -> usize {
231 self.cache.cap().get()
232 }
233
234 pub fn resize(&mut self, new_capacity: usize) {
239 let new_capacity = NonZeroUsize::new(new_capacity.max(1)).expect("capacity must be > 0");
240 self.cache.resize(new_capacity);
241 }
242}
243
244impl Default for WidthCache {
245 fn default() -> Self {
246 Self::with_default_capacity()
247 }
248}
249
250#[inline]
252fn hash_text(text: &str) -> u64 {
253 let mut hasher = FxHasher::default();
254 text.hash(&mut hasher);
255 hasher.finish()
256}
257
258#[cfg(feature = "thread_local_cache")]
263thread_local! {
264 static THREAD_CACHE: std::cell::RefCell<WidthCache> =
265 std::cell::RefCell::new(WidthCache::with_default_capacity());
266}
267
268#[cfg(feature = "thread_local_cache")]
270pub fn cached_width(text: &str) -> usize {
271 THREAD_CACHE.with(|cache| cache.borrow_mut().get_or_compute(text))
272}
273
274#[cfg(feature = "thread_local_cache")]
276pub fn clear_thread_cache() {
277 THREAD_CACHE.with(|cache| cache.borrow_mut().clear());
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283
284 #[test]
285 fn new_cache_is_empty() {
286 let cache = WidthCache::new(100);
287 assert!(cache.is_empty());
288 assert_eq!(cache.len(), 0);
289 assert_eq!(cache.capacity(), 100);
290 }
291
292 #[test]
293 fn default_capacity() {
294 let cache = WidthCache::with_default_capacity();
295 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
296 }
297
298 #[test]
299 fn get_or_compute_caches_value() {
300 let mut cache = WidthCache::new(100);
301
302 let width1 = cache.get_or_compute("hello");
303 assert_eq!(width1, 5);
304 assert_eq!(cache.len(), 1);
305
306 let width2 = cache.get_or_compute("hello");
307 assert_eq!(width2, 5);
308 assert_eq!(cache.len(), 1); let stats = cache.stats();
311 assert_eq!(stats.hits, 1);
312 assert_eq!(stats.misses, 1);
313 }
314
315 #[test]
316 fn get_or_compute_different_strings() {
317 let mut cache = WidthCache::new(100);
318
319 cache.get_or_compute("hello");
320 cache.get_or_compute("world");
321 cache.get_or_compute("foo");
322
323 assert_eq!(cache.len(), 3);
324 let stats = cache.stats();
325 assert_eq!(stats.misses, 3);
326 assert_eq!(stats.hits, 0);
327 }
328
329 #[test]
330 fn get_or_compute_cjk() {
331 let mut cache = WidthCache::new(100);
332
333 let width = cache.get_or_compute("你好");
334 assert_eq!(width, 4); }
336
337 #[test]
338 fn contains() {
339 let mut cache = WidthCache::new(100);
340
341 assert!(!cache.contains("hello"));
342 cache.get_or_compute("hello");
343 assert!(cache.contains("hello"));
344 }
345
346 #[test]
347 fn get_returns_none_for_missing() {
348 let mut cache = WidthCache::new(100);
349 assert!(cache.get("missing").is_none());
350 }
351
352 #[test]
353 fn get_returns_cached_value() {
354 let mut cache = WidthCache::new(100);
355 cache.get_or_compute("hello");
356
357 let width = cache.get("hello");
358 assert_eq!(width, Some(5));
359 }
360
361 #[test]
362 fn peek_does_not_update_lru() {
363 let mut cache = WidthCache::new(2);
364
365 cache.get_or_compute("a");
366 cache.get_or_compute("b");
367
368 let _ = cache.peek("a");
370
371 cache.get_or_compute("c");
373
374 assert!(!cache.contains("a"));
375 assert!(cache.contains("b"));
376 assert!(cache.contains("c"));
377 }
378
379 #[test]
380 fn lru_eviction() {
381 let mut cache = WidthCache::new(2);
382
383 cache.get_or_compute("a");
384 cache.get_or_compute("b");
385 cache.get_or_compute("c"); assert!(!cache.contains("a"));
388 assert!(cache.contains("b"));
389 assert!(cache.contains("c"));
390 }
391
392 #[test]
393 fn lru_refresh_on_access() {
394 let mut cache = WidthCache::new(2);
395
396 cache.get_or_compute("a");
397 cache.get_or_compute("b");
398 cache.get_or_compute("a"); cache.get_or_compute("c"); assert!(cache.contains("a"));
402 assert!(!cache.contains("b"));
403 assert!(cache.contains("c"));
404 }
405
406 #[test]
407 fn preload() {
408 let mut cache = WidthCache::new(100);
409
410 cache.preload("hello");
411 assert!(cache.contains("hello"));
412 assert_eq!(cache.peek("hello"), Some(5));
413
414 let stats = cache.stats();
415 assert_eq!(stats.misses, 0); assert_eq!(stats.hits, 0);
417 }
418
419 #[test]
420 fn preload_many() {
421 let mut cache = WidthCache::new(100);
422
423 cache.preload_many(["hello", "world", "foo"]);
424 assert_eq!(cache.len(), 3);
425 }
426
427 #[test]
428 fn clear() {
429 let mut cache = WidthCache::new(100);
430 cache.get_or_compute("hello");
431 cache.get_or_compute("world");
432
433 cache.clear();
434 assert!(cache.is_empty());
435 assert!(!cache.contains("hello"));
436 }
437
438 #[test]
439 fn reset_stats() {
440 let mut cache = WidthCache::new(100);
441 cache.get_or_compute("hello");
442 cache.get_or_compute("hello");
443
444 let stats = cache.stats();
445 assert_eq!(stats.hits, 1);
446 assert_eq!(stats.misses, 1);
447
448 cache.reset_stats();
449 let stats = cache.stats();
450 assert_eq!(stats.hits, 0);
451 assert_eq!(stats.misses, 0);
452 }
453
454 #[test]
455 fn hit_rate() {
456 let stats = CacheStats {
457 hits: 75,
458 misses: 25,
459 size: 100,
460 capacity: 1000,
461 };
462 assert!((stats.hit_rate() - 0.75).abs() < 0.001);
463 }
464
465 #[test]
466 fn hit_rate_no_requests() {
467 let stats = CacheStats::default();
468 assert_eq!(stats.hit_rate(), 0.0);
469 }
470
471 #[test]
472 fn resize_smaller() {
473 let mut cache = WidthCache::new(100);
474 for i in 0..50 {
475 cache.get_or_compute(&format!("text{i}"));
476 }
477 assert_eq!(cache.len(), 50);
478
479 cache.resize(10);
480 assert!(cache.len() <= 10);
481 assert_eq!(cache.capacity(), 10);
482 }
483
484 #[test]
485 fn resize_larger() {
486 let mut cache = WidthCache::new(10);
487 cache.resize(100);
488 assert_eq!(cache.capacity(), 100);
489 }
490
491 #[test]
492 fn custom_compute_function() {
493 let mut cache = WidthCache::new(100);
494
495 let width = cache.get_or_compute_with("hello", |_| 42);
497 assert_eq!(width, 42);
498
499 assert_eq!(cache.peek("hello"), Some(42));
501 }
502
503 #[test]
504 fn empty_string() {
505 let mut cache = WidthCache::new(100);
506 let width = cache.get_or_compute("");
507 assert_eq!(width, 0);
508 }
509
510 #[test]
511 fn hash_collision_handling() {
512 let mut cache = WidthCache::new(1000);
515
516 for i in 0..500 {
517 cache.get_or_compute(&format!("string{i}"));
518 }
519
520 assert_eq!(cache.len(), 500);
521 }
522
523 #[test]
524 fn unicode_strings() {
525 let mut cache = WidthCache::new(100);
526
527 assert_eq!(cache.get_or_compute("café"), 4);
529 assert_eq!(cache.get_or_compute("日本語"), 6);
530 assert_eq!(cache.get_or_compute("🎉"), 2); assert_eq!(cache.len(), 3);
533 }
534
535 #[test]
536 fn combining_characters() {
537 let mut cache = WidthCache::new(100);
538
539 let width = cache.get_or_compute("e\u{0301}");
541 assert_eq!(width, 1);
543 }
544
545 #[test]
550 fn default_cache() {
551 let cache = WidthCache::default();
552 assert!(cache.is_empty());
553 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
554 }
555
556 #[test]
557 fn cache_stats_debug() {
558 let stats = CacheStats {
559 hits: 10,
560 misses: 5,
561 size: 15,
562 capacity: 100,
563 };
564 let debug = format!("{:?}", stats);
565 assert!(debug.contains("CacheStats"));
566 assert!(debug.contains("10")); }
568
569 #[test]
570 fn cache_stats_default() {
571 let stats = CacheStats::default();
572 assert_eq!(stats.hits, 0);
573 assert_eq!(stats.misses, 0);
574 assert_eq!(stats.size, 0);
575 assert_eq!(stats.capacity, 0);
576 }
577
578 #[test]
579 fn cache_stats_equality() {
580 let stats1 = CacheStats {
581 hits: 10,
582 misses: 5,
583 size: 15,
584 capacity: 100,
585 };
586 let stats2 = stats1; assert_eq!(stats1, stats2);
588 }
589
590 #[test]
591 fn clear_after_preload() {
592 let mut cache = WidthCache::new(100);
593 cache.preload_many(["hello", "world", "test"]);
594 assert_eq!(cache.len(), 3);
595
596 cache.clear();
597 assert!(cache.is_empty());
598 assert!(!cache.contains("hello"));
599 }
600
601 #[test]
602 fn preload_existing_is_noop() {
603 let mut cache = WidthCache::new(100);
604 cache.get_or_compute("hello"); let len_before = cache.len();
606
607 cache.preload("hello"); assert_eq!(cache.len(), len_before);
609 }
610
611 #[test]
612 fn minimum_capacity_is_one() {
613 let cache = WidthCache::new(0);
614 assert_eq!(cache.capacity(), 1);
615 }
616
617 #[test]
618 fn width_cache_debug() {
619 let cache = WidthCache::new(10);
620 let debug = format!("{:?}", cache);
621 assert!(debug.contains("WidthCache"));
622 }
623
624 #[test]
625 fn emoji_zwj_sequence() {
626 let mut cache = WidthCache::new(100);
627 let width = cache.get_or_compute("👨👩👧");
629 assert!(width >= 1);
631 }
632
633 #[test]
634 fn emoji_with_skin_tone() {
635 let mut cache = WidthCache::new(100);
636 let width = cache.get_or_compute("👍🏻");
637 assert!(width >= 1);
638 }
639
640 #[test]
641 fn flag_emoji() {
642 let mut cache = WidthCache::new(100);
643 let width = cache.get_or_compute("🇺🇸");
645 assert!(width >= 1);
646 }
647
648 #[test]
649 fn mixed_width_strings() {
650 let mut cache = WidthCache::new(100);
651 let width = cache.get_or_compute("Hello你好World");
653 assert_eq!(width, 14); }
655
656 #[test]
657 fn stats_size_reflects_cache_len() {
658 let mut cache = WidthCache::new(100);
659 cache.get_or_compute("a");
660 cache.get_or_compute("b");
661 cache.get_or_compute("c");
662
663 let stats = cache.stats();
664 assert_eq!(stats.size, cache.len());
665 assert_eq!(stats.size, 3);
666 }
667
668 #[test]
669 fn stats_capacity_matches() {
670 let cache = WidthCache::new(42);
671 let stats = cache.stats();
672 assert_eq!(stats.capacity, 42);
673 }
674
675 #[test]
676 fn resize_to_zero_becomes_one() {
677 let mut cache = WidthCache::new(100);
678 cache.resize(0);
679 assert_eq!(cache.capacity(), 1);
680 }
681
682 #[test]
683 fn get_updates_lru_order() {
684 let mut cache = WidthCache::new(2);
685
686 cache.get_or_compute("a");
687 cache.get_or_compute("b");
688
689 let _ = cache.get("a");
691
692 cache.get_or_compute("c");
694
695 assert!(cache.contains("a"));
696 assert!(!cache.contains("b"));
697 assert!(cache.contains("c"));
698 }
699
700 #[test]
701 fn contains_does_not_modify_stats() {
702 let mut cache = WidthCache::new(100);
703 cache.get_or_compute("hello");
704
705 let stats_before = cache.stats();
706 let _ = cache.contains("hello");
707 let _ = cache.contains("missing");
708 let stats_after = cache.stats();
709
710 assert_eq!(stats_before.hits, stats_after.hits);
711 assert_eq!(stats_before.misses, stats_after.misses);
712 }
713
714 #[test]
715 fn peek_returns_none_for_missing() {
716 let cache = WidthCache::new(100);
717 assert!(cache.peek("missing").is_none());
718 }
719
720 #[test]
721 fn custom_compute_called_once() {
722 let mut cache = WidthCache::new(100);
723 let mut call_count = 0;
724
725 cache.get_or_compute_with("test", |_| {
726 call_count += 1;
727 10
728 });
729
730 cache.get_or_compute_with("test", |_| {
731 call_count += 1;
732 20 });
734
735 assert_eq!(call_count, 1);
736 assert_eq!(cache.peek("test"), Some(10));
737 }
738
739 #[test]
740 fn whitespace_strings() {
741 let mut cache = WidthCache::new(100);
742 assert_eq!(cache.get_or_compute(" "), 3); assert_eq!(cache.get_or_compute("\t"), 1); assert_eq!(cache.get_or_compute("\n"), 1); }
746}
747
748#[derive(Debug, Clone)]
802pub struct CountMinSketch {
803 counters: Vec<Vec<u8>>,
805 depth: usize,
807 width: usize,
809 total_increments: u64,
811 reset_interval: u64,
813}
814
815const CMS_MAX_COUNT: u8 = 15;
817
818const CMS_DEFAULT_WIDTH: usize = 1024;
820
821const CMS_DEFAULT_DEPTH: usize = 4;
823
824const CMS_DEFAULT_RESET_INTERVAL: u64 = 8192;
826
827impl CountMinSketch {
828 pub fn new(width: usize, depth: usize, reset_interval: u64) -> Self {
830 let width = width.max(1);
831 let depth = depth.max(1);
832 Self {
833 counters: vec![vec![0u8; width]; depth],
834 depth,
835 width,
836 total_increments: 0,
837 reset_interval: reset_interval.max(1),
838 }
839 }
840
841 pub fn with_defaults() -> Self {
843 Self::new(
844 CMS_DEFAULT_WIDTH,
845 CMS_DEFAULT_DEPTH,
846 CMS_DEFAULT_RESET_INTERVAL,
847 )
848 }
849
850 pub fn increment(&mut self, hash: u64) {
852 for row in 0..self.depth {
853 let idx = self.index(hash, row);
854 self.counters[row][idx] = self.counters[row][idx].saturating_add(1).min(CMS_MAX_COUNT);
855 }
856 self.total_increments += 1;
857
858 if self.total_increments >= self.reset_interval {
859 self.halve();
860 }
861 }
862
863 pub fn estimate(&self, hash: u64) -> u8 {
865 let mut min = u8::MAX;
866 for row in 0..self.depth {
867 let idx = self.index(hash, row);
868 min = min.min(self.counters[row][idx]);
869 }
870 min
871 }
872
873 pub fn total_increments(&self) -> u64 {
875 self.total_increments
876 }
877
878 fn halve(&mut self) {
880 for row in &mut self.counters {
881 for c in row.iter_mut() {
882 *c /= 2;
883 }
884 }
885 self.total_increments = 0;
886 }
887
888 pub fn clear(&mut self) {
890 for row in &mut self.counters {
891 row.fill(0);
892 }
893 self.total_increments = 0;
894 }
895
896 #[inline]
898 fn index(&self, hash: u64, row: usize) -> usize {
899 let mixed = hash
901 .wrapping_mul(0x517c_c1b7_2722_0a95)
902 .wrapping_add(row as u64);
903 let mixed = mixed ^ (mixed >> 32);
904 (mixed as usize) % self.width
905 }
906}
907
908#[derive(Debug, Clone)]
914pub struct Doorkeeper {
915 bits: Vec<u64>,
916 num_bits: usize,
917}
918
919const DOORKEEPER_DEFAULT_BITS: usize = 2048;
921
922impl Doorkeeper {
923 pub fn new(num_bits: usize) -> Self {
925 let num_bits = num_bits.max(64);
926 let num_words = num_bits.div_ceil(64);
927 Self {
928 bits: vec![0u64; num_words],
929 num_bits,
930 }
931 }
932
933 pub fn with_defaults() -> Self {
935 Self::new(DOORKEEPER_DEFAULT_BITS)
936 }
937
938 pub fn check_and_set(&mut self, hash: u64) -> bool {
940 let idx = (hash as usize) % self.num_bits;
941 let word = idx / 64;
942 let bit = idx % 64;
943 let was_set = (self.bits[word] >> bit) & 1 == 1;
944 self.bits[word] |= 1 << bit;
945 was_set
946 }
947
948 pub fn contains(&self, hash: u64) -> bool {
950 let idx = (hash as usize) % self.num_bits;
951 let word = idx / 64;
952 let bit = idx % 64;
953 (self.bits[word] >> bit) & 1 == 1
954 }
955
956 pub fn clear(&mut self) {
958 self.bits.fill(0);
959 }
960}
961
962#[inline]
967pub fn fingerprint_hash(text: &str) -> u64 {
968 let mut h: u64 = 0xcbf2_9ce4_8422_2325; for &b in text.as_bytes() {
971 h ^= b as u64;
972 h = h.wrapping_mul(0x0100_0000_01b3); }
974 h
975}
976
977#[inline]
984pub fn tinylfu_admit(candidate_freq: u8, victim_freq: u8) -> bool {
985 candidate_freq > victim_freq
986}
987
988#[derive(Debug, Clone)]
994struct TinyLfuEntry {
995 width: usize,
996 fingerprint: u64,
997}
998
999#[derive(Debug)]
1022pub struct TinyLfuWidthCache {
1023 window: LruCache<u64, TinyLfuEntry>,
1025 main: LruCache<u64, TinyLfuEntry>,
1027 sketch: CountMinSketch,
1029 doorkeeper: Doorkeeper,
1031 total_capacity: usize,
1033 hits: u64,
1035 misses: u64,
1036}
1037
1038impl TinyLfuWidthCache {
1039 pub fn new(total_capacity: usize) -> Self {
1043 let total_capacity = total_capacity.max(2);
1044 let window_cap = (total_capacity / 100).max(1);
1045 let main_cap = total_capacity - window_cap;
1046
1047 Self {
1048 window: LruCache::new(NonZeroUsize::new(window_cap).unwrap()),
1049 main: LruCache::new(NonZeroUsize::new(main_cap.max(1)).unwrap()),
1050 sketch: CountMinSketch::with_defaults(),
1051 doorkeeper: Doorkeeper::with_defaults(),
1052 total_capacity,
1053 hits: 0,
1054 misses: 0,
1055 }
1056 }
1057
1058 pub fn get_or_compute(&mut self, text: &str) -> usize {
1060 self.get_or_compute_with(text, crate::display_width)
1061 }
1062
1063 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1065 where
1066 F: FnOnce(&str) -> usize,
1067 {
1068 let hash = hash_text(text);
1069 let fp = fingerprint_hash(text);
1070
1071 let seen = self.doorkeeper.check_and_set(hash);
1073 if seen {
1074 self.sketch.increment(hash);
1075 }
1076
1077 if let Some(entry) = self.main.get(&hash) {
1079 if entry.fingerprint == fp {
1080 self.hits += 1;
1081 return entry.width;
1082 }
1083 self.main.pop(&hash);
1085 }
1086
1087 if let Some(entry) = self.window.get(&hash) {
1089 if entry.fingerprint == fp {
1090 self.hits += 1;
1091 return entry.width;
1092 }
1093 self.window.pop(&hash);
1095 }
1096
1097 self.misses += 1;
1099 let width = compute(text);
1100 let new_entry = TinyLfuEntry {
1101 width,
1102 fingerprint: fp,
1103 };
1104
1105 if self.window.len() >= self.window.cap().get() {
1108 if let Some((evicted_hash, evicted_entry)) = self.window.pop_lru() {
1110 self.try_admit_to_main(evicted_hash, evicted_entry);
1111 }
1112 }
1113 self.window.put(hash, new_entry);
1114
1115 width
1116 }
1117
1118 fn try_admit_to_main(&mut self, candidate_hash: u64, candidate_entry: TinyLfuEntry) {
1120 let candidate_freq = self.sketch.estimate(candidate_hash);
1121
1122 if self.main.len() < self.main.cap().get() {
1123 self.main.put(candidate_hash, candidate_entry);
1125 return;
1126 }
1127
1128 if let Some((&victim_hash, _)) = self.main.peek_lru() {
1130 let victim_freq = self.sketch.estimate(victim_hash);
1131 if tinylfu_admit(candidate_freq, victim_freq) {
1132 self.main.pop_lru();
1133 self.main.put(candidate_hash, candidate_entry);
1134 }
1135 }
1137 }
1138
1139 pub fn contains(&self, text: &str) -> bool {
1141 let hash = hash_text(text);
1142 let fp = fingerprint_hash(text);
1143 if let Some(e) = self.main.peek(&hash)
1144 && e.fingerprint == fp
1145 {
1146 return true;
1147 }
1148 if let Some(e) = self.window.peek(&hash)
1149 && e.fingerprint == fp
1150 {
1151 return true;
1152 }
1153 false
1154 }
1155
1156 pub fn stats(&self) -> CacheStats {
1158 CacheStats {
1159 hits: self.hits,
1160 misses: self.misses,
1161 size: self.window.len() + self.main.len(),
1162 capacity: self.total_capacity,
1163 }
1164 }
1165
1166 pub fn clear(&mut self) {
1168 self.window.clear();
1169 self.main.clear();
1170 self.sketch.clear();
1171 self.doorkeeper.clear();
1172 }
1173
1174 pub fn reset_stats(&mut self) {
1176 self.hits = 0;
1177 self.misses = 0;
1178 }
1179
1180 pub fn len(&self) -> usize {
1182 self.window.len() + self.main.len()
1183 }
1184
1185 pub fn is_empty(&self) -> bool {
1187 self.window.is_empty() && self.main.is_empty()
1188 }
1189
1190 pub fn capacity(&self) -> usize {
1192 self.total_capacity
1193 }
1194
1195 pub fn main_len(&self) -> usize {
1197 self.main.len()
1198 }
1199
1200 pub fn window_len(&self) -> usize {
1202 self.window.len()
1203 }
1204}
1205
1206#[derive(Debug)]
1218pub struct S3FifoWidthCache {
1219 cache: ftui_core::s3_fifo::S3Fifo<u64, S3FifoEntry>,
1220 hits: u64,
1221 misses: u64,
1222 total_capacity: usize,
1223}
1224
1225#[derive(Debug, Clone, Copy)]
1227struct S3FifoEntry {
1228 width: usize,
1229 fingerprint: u64,
1230}
1231
1232impl S3FifoWidthCache {
1233 pub fn new(capacity: usize) -> Self {
1235 let capacity = capacity.max(2);
1236 Self {
1237 cache: ftui_core::s3_fifo::S3Fifo::new(capacity),
1238 hits: 0,
1239 misses: 0,
1240 total_capacity: capacity,
1241 }
1242 }
1243
1244 #[must_use]
1246 pub fn with_default_capacity() -> Self {
1247 Self::new(DEFAULT_CACHE_CAPACITY)
1248 }
1249
1250 pub fn get_or_compute(&mut self, text: &str) -> usize {
1252 self.get_or_compute_with(text, crate::display_width)
1253 }
1254
1255 pub fn get_or_compute_with<F>(&mut self, text: &str, compute: F) -> usize
1257 where
1258 F: FnOnce(&str) -> usize,
1259 {
1260 let hash = hash_text(text);
1261 let fp = fingerprint_hash(text);
1262
1263 if let Some(entry) = self.cache.get(&hash) {
1264 if entry.fingerprint == fp {
1265 self.hits += 1;
1266 return entry.width;
1267 }
1268 self.cache.remove(&hash);
1270 }
1271
1272 self.misses += 1;
1274 let width = compute(text);
1275 self.cache.insert(
1276 hash,
1277 S3FifoEntry {
1278 width,
1279 fingerprint: fp,
1280 },
1281 );
1282 width
1283 }
1284
1285 pub fn contains(&self, text: &str) -> bool {
1287 let hash = hash_text(text);
1288 self.cache.contains_key(&hash)
1289 }
1290
1291 pub fn stats(&self) -> CacheStats {
1293 CacheStats {
1294 hits: self.hits,
1295 misses: self.misses,
1296 size: self.cache.len(),
1297 capacity: self.total_capacity,
1298 }
1299 }
1300
1301 pub fn clear(&mut self) {
1303 self.cache.clear();
1304 self.hits = 0;
1305 self.misses = 0;
1306 }
1307
1308 pub fn reset_stats(&mut self) {
1310 self.hits = 0;
1311 self.misses = 0;
1312 }
1313
1314 pub fn len(&self) -> usize {
1316 self.cache.len()
1317 }
1318
1319 pub fn is_empty(&self) -> bool {
1321 self.cache.is_empty()
1322 }
1323
1324 pub fn capacity(&self) -> usize {
1326 self.total_capacity
1327 }
1328}
1329
1330impl Default for S3FifoWidthCache {
1331 fn default() -> Self {
1332 Self::with_default_capacity()
1333 }
1334}
1335
1336#[cfg(test)]
1337mod s3_fifo_width_tests {
1338 use super::*;
1339
1340 #[test]
1341 fn s3fifo_new_cache_is_empty() {
1342 let cache = S3FifoWidthCache::new(100);
1343 assert!(cache.is_empty());
1344 assert_eq!(cache.len(), 0);
1345 }
1346
1347 #[test]
1348 fn s3fifo_get_or_compute_caches_value() {
1349 let mut cache = S3FifoWidthCache::new(100);
1350 let w1 = cache.get_or_compute("hello");
1351 assert_eq!(w1, 5);
1352 assert_eq!(cache.len(), 1);
1353
1354 let w2 = cache.get_or_compute("hello");
1355 assert_eq!(w2, 5);
1356 assert_eq!(cache.len(), 1);
1357
1358 let stats = cache.stats();
1359 assert_eq!(stats.hits, 1);
1360 assert_eq!(stats.misses, 1);
1361 }
1362
1363 #[test]
1364 fn s3fifo_different_strings() {
1365 let mut cache = S3FifoWidthCache::new(100);
1366 cache.get_or_compute("hello");
1367 cache.get_or_compute("world");
1368 cache.get_or_compute("foo");
1369 assert_eq!(cache.len(), 3);
1370 }
1371
1372 #[test]
1373 fn s3fifo_cjk_width() {
1374 let mut cache = S3FifoWidthCache::new(100);
1375 let w = cache.get_or_compute("你好");
1376 assert_eq!(w, 4);
1377 }
1378
1379 #[test]
1380 fn s3fifo_contains() {
1381 let mut cache = S3FifoWidthCache::new(100);
1382 assert!(!cache.contains("hello"));
1383 cache.get_or_compute("hello");
1384 assert!(cache.contains("hello"));
1385 }
1386
1387 #[test]
1388 fn s3fifo_clear_resets() {
1389 let mut cache = S3FifoWidthCache::new(100);
1390 cache.get_or_compute("hello");
1391 cache.get_or_compute("world");
1392 cache.clear();
1393 assert!(cache.is_empty());
1394 assert!(!cache.contains("hello"));
1395 let stats = cache.stats();
1396 assert_eq!(stats.hits, 0);
1397 assert_eq!(stats.misses, 0);
1398 }
1399
1400 #[test]
1401 fn s3fifo_produces_same_widths_as_lru() {
1402 let mut lru = WidthCache::new(100);
1403 let mut s3 = S3FifoWidthCache::new(100);
1404
1405 let texts = [
1406 "hello",
1407 "你好世界",
1408 "abc",
1409 "🎉🎉",
1410 "",
1411 " ",
1412 "a\tb",
1413 "mixed中english文",
1414 ];
1415
1416 for text in &texts {
1417 let lru_w = lru.get_or_compute(text);
1418 let s3_w = s3.get_or_compute(text);
1419 assert_eq!(lru_w, s3_w, "width mismatch for {:?}", text);
1420 }
1421 }
1422
1423 #[test]
1424 fn s3fifo_scan_resistance_preserves_hot_set() {
1425 let mut cache = S3FifoWidthCache::new(50);
1426
1427 let hot: Vec<String> = (0..20).map(|i| format!("hot_{i}")).collect();
1429 for text in &hot {
1430 cache.get_or_compute(text);
1431 cache.get_or_compute(text); }
1433
1434 for i in 0..200 {
1436 cache.get_or_compute(&format!("scan_{i}"));
1437 }
1438
1439 let mut survivors = 0;
1441 for text in &hot {
1442 if cache.contains(text) {
1443 survivors += 1;
1444 }
1445 }
1446 assert!(
1447 survivors > 5,
1448 "scan resistance: only {survivors}/20 hot items survived"
1449 );
1450 }
1451
1452 #[test]
1453 fn s3fifo_default_capacity() {
1454 let cache = S3FifoWidthCache::with_default_capacity();
1455 assert_eq!(cache.capacity(), DEFAULT_CACHE_CAPACITY);
1456 }
1457
1458 #[test]
1459 fn s3fifo_reset_stats() {
1460 let mut cache = S3FifoWidthCache::new(100);
1461 cache.get_or_compute("a");
1462 cache.get_or_compute("a");
1463 cache.reset_stats();
1464 let stats = cache.stats();
1465 assert_eq!(stats.hits, 0);
1466 assert_eq!(stats.misses, 0);
1467 }
1468}
1469
1470#[cfg(test)]
1471mod proptests {
1472 use super::*;
1473 use proptest::prelude::*;
1474
1475 proptest! {
1476 #[test]
1477 fn cached_width_matches_direct(s in "[a-zA-Z0-9 ]{1,50}") {
1478 let mut cache = WidthCache::new(100);
1479 let cached = cache.get_or_compute(&s);
1480 let direct = crate::display_width(&s);
1481 prop_assert_eq!(cached, direct);
1482 }
1483
1484 #[test]
1485 fn second_access_is_hit(s in "[a-zA-Z0-9]{1,20}") {
1486 let mut cache = WidthCache::new(100);
1487
1488 cache.get_or_compute(&s);
1489 let stats_before = cache.stats();
1490
1491 cache.get_or_compute(&s);
1492 let stats_after = cache.stats();
1493
1494 prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1495 prop_assert_eq!(stats_after.misses, stats_before.misses);
1496 }
1497
1498 #[test]
1499 fn lru_never_exceeds_capacity(
1500 strings in prop::collection::vec("[a-z]{1,5}", 10..100),
1501 capacity in 5usize..20
1502 ) {
1503 let mut cache = WidthCache::new(capacity);
1504
1505 for s in &strings {
1506 cache.get_or_compute(s);
1507 prop_assert!(cache.len() <= capacity);
1508 }
1509 }
1510
1511 #[test]
1512 fn preload_then_access_is_hit(s in "[a-zA-Z]{1,20}") {
1513 let mut cache = WidthCache::new(100);
1514
1515 cache.preload(&s);
1516 let stats_before = cache.stats();
1517
1518 cache.get_or_compute(&s);
1519 let stats_after = cache.stats();
1520
1521 prop_assert_eq!(stats_after.hits, stats_before.hits + 1);
1523 }
1524 }
1525}
1526
1527#[cfg(test)]
1532mod tinylfu_tests {
1533 use super::*;
1534
1535 #[test]
1538 fn unit_cms_single_key_count() {
1539 let mut cms = CountMinSketch::with_defaults();
1540 let h = hash_text("hello");
1541
1542 for _ in 0..5 {
1543 cms.increment(h);
1544 }
1545 assert_eq!(cms.estimate(h), 5);
1546 }
1547
1548 #[test]
1549 fn unit_cms_unseen_key_is_zero() {
1550 let cms = CountMinSketch::with_defaults();
1551 assert_eq!(cms.estimate(hash_text("never_seen")), 0);
1552 }
1553
1554 #[test]
1555 fn unit_cms_saturates_at_max() {
1556 let mut cms = CountMinSketch::with_defaults();
1557 let h = hash_text("hot");
1558
1559 for _ in 0..100 {
1560 cms.increment(h);
1561 }
1562 assert_eq!(cms.estimate(h), CMS_MAX_COUNT);
1563 }
1564
1565 #[test]
1566 fn unit_cms_bounds() {
1567 let mut cms = CountMinSketch::new(1024, 4, u64::MAX); let n: u64 = 1000;
1571
1572 for i in 0..n {
1574 cms.increment(i.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1));
1575 }
1576
1577 let target = 0xDEAD_BEEF_u64;
1579 for _ in 0..5 {
1580 cms.increment(target);
1581 }
1582
1583 let est = cms.estimate(target);
1584 let epsilon = std::f64::consts::E / 1024.0;
1585 let upper_bound = 5.0 + epsilon * (n + 5) as f64;
1586
1587 assert!(
1588 (est as f64) <= upper_bound,
1589 "estimate {} exceeds bound {:.1} (epsilon={:.5}, N={})",
1590 est,
1591 upper_bound,
1592 epsilon,
1593 n + 5,
1594 );
1595 assert!(est >= 5, "estimate {} should be >= true count 5", est);
1596 }
1597
1598 #[test]
1599 fn unit_cms_bounds_mass_test() {
1600 let mut cms = CountMinSketch::new(1024, 4, u64::MAX);
1601 let n = 2000u64;
1602
1603 let mut true_counts = vec![0u8; n as usize];
1604 for i in 0..n {
1605 let count = (i % 10 + 1) as u8;
1606 true_counts[i as usize] = count;
1607 for _ in 0..count {
1608 cms.increment(i);
1609 }
1610 }
1611
1612 let total = cms.total_increments();
1613 let epsilon = std::f64::consts::E / 1024.0;
1614 let mut violations = 0u32;
1615
1616 for i in 0..n {
1617 let est = cms.estimate(i);
1618 let true_c = true_counts[i as usize];
1619 let upper = true_c as f64 + epsilon * total as f64;
1620 if est as f64 > upper + 0.5 {
1621 violations += 1;
1622 }
1623 assert!(
1624 est >= true_c,
1625 "key {}: estimate {} < true count {}",
1626 i,
1627 est,
1628 true_c
1629 );
1630 }
1631
1632 let violation_rate = violations as f64 / n as f64;
1634 assert!(
1635 violation_rate <= 0.10,
1636 "violation rate {:.3} exceeds delta threshold",
1637 violation_rate,
1638 );
1639 }
1640
1641 #[test]
1642 fn unit_cms_halving_ages_counts() {
1643 let mut cms = CountMinSketch::new(64, 2, 100);
1644
1645 let h = hash_text("test");
1646 for _ in 0..10 {
1647 cms.increment(h);
1648 }
1649 assert_eq!(cms.estimate(h), 10);
1650
1651 for _ in 10..100 {
1653 cms.increment(hash_text("noise"));
1654 }
1655
1656 let est = cms.estimate(h);
1657 assert!(est <= 5, "After halving, estimate {} should be <= 5", est);
1658 }
1659
1660 #[test]
1661 fn unit_cms_monotone() {
1662 let mut cms = CountMinSketch::with_defaults();
1663 let h = hash_text("key");
1664
1665 let mut prev_est = 0u8;
1666 for _ in 0..CMS_MAX_COUNT {
1667 cms.increment(h);
1668 let est = cms.estimate(h);
1669 assert!(est >= prev_est, "estimate should be monotone");
1670 prev_est = est;
1671 }
1672 }
1673
1674 #[test]
1677 fn unit_doorkeeper_first_access_returns_false() {
1678 let mut dk = Doorkeeper::with_defaults();
1679 assert!(!dk.check_and_set(hash_text("new")));
1680 }
1681
1682 #[test]
1683 fn unit_doorkeeper_second_access_returns_true() {
1684 let mut dk = Doorkeeper::with_defaults();
1685 let h = hash_text("key");
1686 dk.check_and_set(h);
1687 assert!(dk.check_and_set(h));
1688 }
1689
1690 #[test]
1691 fn unit_doorkeeper_contains() {
1692 let mut dk = Doorkeeper::with_defaults();
1693 let h = hash_text("key");
1694 assert!(!dk.contains(h));
1695 dk.check_and_set(h);
1696 assert!(dk.contains(h));
1697 }
1698
1699 #[test]
1700 fn unit_doorkeeper_clear_resets() {
1701 let mut dk = Doorkeeper::with_defaults();
1702 let h = hash_text("key");
1703 dk.check_and_set(h);
1704 dk.clear();
1705 assert!(!dk.contains(h));
1706 assert!(!dk.check_and_set(h));
1707 }
1708
1709 #[test]
1710 fn unit_doorkeeper_false_positive_rate() {
1711 let mut dk = Doorkeeper::new(2048);
1712 let n = 100u64;
1713
1714 for i in 0..n {
1715 dk.check_and_set(i * 0x9E37_79B9 + 1);
1716 }
1717
1718 let mut false_positives = 0u32;
1719 for i in 0..1000 {
1720 let h = (i + 100_000) * 0x6A09_E667 + 7;
1721 if dk.contains(h) {
1722 false_positives += 1;
1723 }
1724 }
1725
1726 let fp_rate = false_positives as f64 / 1000.0;
1728 assert!(
1729 fp_rate < 0.15,
1730 "FP rate {:.3} too high (expected < 0.15)",
1731 fp_rate,
1732 );
1733 }
1734
1735 #[test]
1738 fn unit_admission_rule() {
1739 assert!(tinylfu_admit(5, 3)); assert!(!tinylfu_admit(3, 5)); assert!(!tinylfu_admit(3, 3)); }
1743
1744 #[test]
1745 fn unit_admission_rule_extremes() {
1746 assert!(tinylfu_admit(1, 0));
1747 assert!(!tinylfu_admit(0, 0));
1748 assert!(!tinylfu_admit(0, 1));
1749 assert!(tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT - 1));
1750 assert!(!tinylfu_admit(CMS_MAX_COUNT, CMS_MAX_COUNT));
1751 }
1752
1753 #[test]
1756 fn unit_fingerprint_guard() {
1757 let fp1 = fingerprint_hash("hello");
1758 let fp2 = fingerprint_hash("world");
1759 let fp3 = fingerprint_hash("hello");
1760
1761 assert_ne!(
1762 fp1, fp2,
1763 "Different strings should have different fingerprints"
1764 );
1765 assert_eq!(fp1, fp3, "Same string should have same fingerprint");
1766 }
1767
1768 #[test]
1769 fn unit_fingerprint_guard_collision_rate() {
1770 let mut fps = std::collections::HashSet::new();
1771 let n = 10_000;
1772
1773 for i in 0..n {
1774 let s = format!("string_{}", i);
1775 fps.insert(fingerprint_hash(&s));
1776 }
1777
1778 let collisions = n - fps.len();
1779 assert!(
1780 collisions == 0,
1781 "Expected 0 collisions in 10k items, got {}",
1782 collisions,
1783 );
1784 }
1785
1786 #[test]
1787 fn unit_fingerprint_independent_of_primary_hash() {
1788 let text = "test_string";
1789 let primary = hash_text(text);
1790 let secondary = fingerprint_hash(text);
1791
1792 assert_ne!(
1793 primary, secondary,
1794 "Fingerprint and primary hash should differ"
1795 );
1796 }
1797
1798 #[test]
1801 fn unit_doorkeeper_cms_pipeline() {
1802 let mut dk = Doorkeeper::with_defaults();
1803 let mut cms = CountMinSketch::with_defaults();
1804 let h = hash_text("item");
1805
1806 assert!(!dk.check_and_set(h));
1808 assert_eq!(cms.estimate(h), 0);
1809
1810 assert!(dk.check_and_set(h));
1812 cms.increment(h);
1813 assert_eq!(cms.estimate(h), 1);
1814
1815 assert!(dk.check_and_set(h));
1817 cms.increment(h);
1818 assert_eq!(cms.estimate(h), 2);
1819 }
1820
1821 #[test]
1822 fn unit_doorkeeper_filters_one_hit_wonders() {
1823 let mut dk = Doorkeeper::with_defaults();
1824 let mut cms = CountMinSketch::with_defaults();
1825
1826 for i in 0u64..100 {
1828 let h = i * 0x9E37_79B9 + 1;
1829 let seen = dk.check_and_set(h);
1830 if seen {
1831 cms.increment(h);
1832 }
1833 }
1834
1835 assert_eq!(cms.total_increments(), 0);
1836
1837 let h = 1; assert!(dk.check_and_set(h));
1840 cms.increment(h);
1841 assert_eq!(cms.total_increments(), 1);
1842 }
1843}
1844
1845#[cfg(test)]
1850mod tinylfu_impl_tests {
1851 use super::*;
1852
1853 #[test]
1854 fn basic_get_or_compute() {
1855 let mut cache = TinyLfuWidthCache::new(100);
1856 let w = cache.get_or_compute("hello");
1857 assert_eq!(w, 5);
1858 assert_eq!(cache.len(), 1);
1859
1860 let w2 = cache.get_or_compute("hello");
1861 assert_eq!(w2, 5);
1862 let stats = cache.stats();
1863 assert_eq!(stats.misses, 1);
1864 assert_eq!(stats.hits, 1);
1865 }
1866
1867 #[test]
1868 fn window_to_main_promotion() {
1869 let mut cache = TinyLfuWidthCache::new(100);
1872
1873 for _ in 0..10 {
1875 cache.get_or_compute("frequent");
1876 }
1877
1878 for i in 0..5 {
1880 cache.get_or_compute(&format!("item_{}", i));
1881 }
1882
1883 assert!(cache.contains("frequent"));
1885 assert!(cache.main_len() > 0 || cache.window_len() > 0);
1886 }
1887
1888 #[test]
1889 fn unit_window_promotion() {
1890 let mut cache = TinyLfuWidthCache::new(50);
1892
1893 for _ in 0..20 {
1895 cache.get_or_compute("hot");
1896 }
1897
1898 for i in 0..10 {
1900 cache.get_or_compute(&format!("filler_{}", i));
1901 }
1902
1903 assert!(cache.contains("hot"), "Frequent item should be retained");
1905 }
1906
1907 #[test]
1908 fn fingerprint_guard_detects_collision() {
1909 let mut cache = TinyLfuWidthCache::new(100);
1910
1911 let w = cache.get_or_compute_with("hello", |_| 42);
1913 assert_eq!(w, 42);
1914
1915 assert!(cache.contains("hello"));
1917 }
1918
1919 #[test]
1920 fn admission_rejects_infrequent() {
1921 let mut cache = TinyLfuWidthCache::new(10); for i in 0..9 {
1927 let s = format!("hot_{}", i);
1928 for _ in 0..5 {
1929 cache.get_or_compute(&s);
1930 }
1931 }
1932
1933 for i in 0..20 {
1936 cache.get_or_compute(&format!("cold_{}", i));
1937 }
1938
1939 let hot_survivors: usize = (0..9)
1941 .filter(|i| cache.contains(&format!("hot_{}", i)))
1942 .count();
1943 assert!(
1944 hot_survivors >= 5,
1945 "Expected most hot items to survive, got {}/9",
1946 hot_survivors
1947 );
1948 }
1949
1950 #[test]
1951 fn clear_empties_everything() {
1952 let mut cache = TinyLfuWidthCache::new(100);
1953 cache.get_or_compute("a");
1954 cache.get_or_compute("b");
1955 cache.clear();
1956 assert!(cache.is_empty());
1957 assert_eq!(cache.len(), 0);
1958 }
1959
1960 #[test]
1961 fn stats_reflect_usage() {
1962 let mut cache = TinyLfuWidthCache::new(100);
1963 cache.get_or_compute("a");
1964 cache.get_or_compute("a");
1965 cache.get_or_compute("b");
1966
1967 let stats = cache.stats();
1968 assert_eq!(stats.misses, 2);
1969 assert_eq!(stats.hits, 1);
1970 assert_eq!(stats.size, 2);
1971 }
1972
1973 #[test]
1974 fn capacity_is_respected() {
1975 let mut cache = TinyLfuWidthCache::new(20);
1976
1977 for i in 0..100 {
1978 cache.get_or_compute(&format!("item_{}", i));
1979 }
1980
1981 assert!(
1982 cache.len() <= 20,
1983 "Cache size {} exceeds capacity 20",
1984 cache.len()
1985 );
1986 }
1987
1988 #[test]
1989 fn reset_stats_works() {
1990 let mut cache = TinyLfuWidthCache::new(100);
1991 cache.get_or_compute("x");
1992 cache.get_or_compute("x");
1993 cache.reset_stats();
1994 let stats = cache.stats();
1995 assert_eq!(stats.hits, 0);
1996 assert_eq!(stats.misses, 0);
1997 }
1998
1999 #[test]
2000 fn perf_cache_hit_rate() {
2001 let mut cache = TinyLfuWidthCache::new(50);
2004
2005 for _ in 0..20 {
2007 for i in 0..10 {
2008 cache.get_or_compute(&format!("hot_{}", i));
2009 }
2010 }
2011
2012 for i in 0..100 {
2014 cache.get_or_compute(&format!("cold_{}", i));
2015 }
2016
2017 cache.reset_stats();
2019 for i in 0..10 {
2020 cache.get_or_compute(&format!("hot_{}", i));
2021 }
2022
2023 let stats = cache.stats();
2024 assert!(
2026 stats.hits >= 5,
2027 "Expected at least 5/10 hot items to hit, got {}",
2028 stats.hits
2029 );
2030 }
2031
2032 #[test]
2033 fn unicode_strings_work() {
2034 let mut cache = TinyLfuWidthCache::new(100);
2035 assert_eq!(cache.get_or_compute("日本語"), 6);
2036 assert_eq!(cache.get_or_compute("café"), 4);
2037 assert_eq!(cache.get_or_compute("日本語"), 6); assert_eq!(cache.stats().hits, 1);
2039 }
2040
2041 #[test]
2042 fn empty_string() {
2043 let mut cache = TinyLfuWidthCache::new(100);
2044 assert_eq!(cache.get_or_compute(""), 0);
2045 }
2046
2047 #[test]
2048 fn minimum_capacity() {
2049 let cache = TinyLfuWidthCache::new(0);
2050 assert!(cache.capacity() >= 2);
2051 }
2052}
2053
2054#[cfg(test)]
2060struct Lcg(u64);
2061
2062#[cfg(test)]
2063impl Lcg {
2064 fn new(seed: u64) -> Self {
2065 Self(seed)
2066 }
2067 fn next_u64(&mut self) -> u64 {
2068 self.0 = self
2069 .0
2070 .wrapping_mul(6_364_136_223_846_793_005)
2071 .wrapping_add(1);
2072 self.0
2073 }
2074 fn next_usize(&mut self, max: usize) -> usize {
2075 (self.next_u64() % (max as u64)) as usize
2076 }
2077}
2078
2079#[cfg(test)]
2084mod property_cache_equivalence {
2085 use super::*;
2086 use proptest::prelude::*;
2087
2088 proptest! {
2089 #[test]
2090 fn tinylfu_cached_equals_computed(s in "[a-zA-Z0-9 ]{1,80}") {
2091 let mut cache = TinyLfuWidthCache::new(200);
2092 let cached = cache.get_or_compute(&s);
2093 let direct = crate::display_width(&s);
2094 prop_assert_eq!(cached, direct,
2095 "TinyLFU returned {} but display_width says {} for {:?}", cached, direct, s);
2096 }
2097
2098 #[test]
2099 fn tinylfu_second_access_same_value(s in "[a-zA-Z0-9]{1,40}") {
2100 let mut cache = TinyLfuWidthCache::new(200);
2101 let first = cache.get_or_compute(&s);
2102 let second = cache.get_or_compute(&s);
2103 prop_assert_eq!(first, second,
2104 "First access returned {} but second returned {} for {:?}", first, second, s);
2105 }
2106
2107 #[test]
2108 fn tinylfu_never_exceeds_capacity(
2109 strings in prop::collection::vec("[a-z]{1,5}", 10..200),
2110 capacity in 10usize..50
2111 ) {
2112 let mut cache = TinyLfuWidthCache::new(capacity);
2113 for s in &strings {
2114 cache.get_or_compute(s);
2115 prop_assert!(cache.len() <= capacity,
2116 "Cache size {} exceeded capacity {}", cache.len(), capacity);
2117 }
2118 }
2119
2120 #[test]
2121 fn tinylfu_custom_fn_matches(s in "[a-z]{1,20}") {
2122 let mut cache = TinyLfuWidthCache::new(100);
2123 let custom_fn = |text: &str| text.len(); let cached = cache.get_or_compute_with(&s, custom_fn);
2125 prop_assert_eq!(cached, s.len(),
2126 "Custom fn: cached {} != expected {} for {:?}", cached, s.len(), s);
2127 }
2128 }
2129
2130 #[test]
2131 fn deterministic_seed_equivalence() {
2132 let mut rng = super::Lcg::new(0xDEAD_BEEF);
2134
2135 let mut cache1 = TinyLfuWidthCache::new(50);
2136 let mut results1 = Vec::new();
2137 for _ in 0..500 {
2138 let idx = rng.next_usize(100);
2139 let s = format!("key_{}", idx);
2140 results1.push(cache1.get_or_compute(&s));
2141 }
2142
2143 let mut rng2 = super::Lcg::new(0xDEAD_BEEF);
2144 let mut cache2 = TinyLfuWidthCache::new(50);
2145 let mut results2 = Vec::new();
2146 for _ in 0..500 {
2147 let idx = rng2.next_usize(100);
2148 let s = format!("key_{}", idx);
2149 results2.push(cache2.get_or_compute(&s));
2150 }
2151
2152 assert_eq!(
2153 results1, results2,
2154 "Deterministic seed must produce identical results"
2155 );
2156 }
2157
2158 #[test]
2159 fn both_caches_agree_on_widths() {
2160 let mut lru = WidthCache::new(200);
2163 let mut tlfu = TinyLfuWidthCache::new(200);
2164
2165 let inputs = [
2166 "",
2167 "hello",
2168 "日本語テスト",
2169 "café résumé",
2170 "a\tb",
2171 "🎉🎊🎈",
2172 "mixed日本eng",
2173 " spaces ",
2174 "AAAAAAAAAAAAAAAAAAAAAAAAA",
2175 "x",
2176 ];
2177
2178 for &s in &inputs {
2179 let w1 = lru.get_or_compute(s);
2180 let w2 = tlfu.get_or_compute(s);
2181 assert_eq!(
2182 w1, w2,
2183 "Width mismatch for {:?}: LRU={}, TinyLFU={}",
2184 s, w1, w2
2185 );
2186 }
2187 }
2188}
2189
2190#[cfg(test)]
2195mod e2e_cache_replay {
2196 use super::*;
2197 use std::time::Instant;
2198
2199 #[derive(Debug)]
2201 struct ReplayRecord {
2202 step: usize,
2203 key: String,
2204 width: usize,
2205 hit: bool,
2206 latency_ns: u128,
2207 }
2208
2209 impl ReplayRecord {
2210 fn to_jsonl(&self) -> String {
2211 format!(
2212 r#"{{"step":{},"key":"{}","width":{},"hit":{},"latency_ns":{}}}"#,
2213 self.step, self.key, self.width, self.hit, self.latency_ns,
2214 )
2215 }
2216 }
2217
2218 fn zipfian_workload(rng: &mut super::Lcg, n: usize, universe: usize) -> Vec<String> {
2219 (0..n)
2221 .map(|_| {
2222 let raw = rng.next_usize(universe * universe);
2223 let idx = (raw as f64).sqrt() as usize % universe;
2224 format!("item_{}", idx)
2225 })
2226 .collect()
2227 }
2228
2229 #[test]
2230 fn replay_zipfian_tinylfu() {
2231 let mut rng = super::Lcg::new(0x1234_5678);
2232 let workload = zipfian_workload(&mut rng, 2000, 200);
2233
2234 let mut cache = TinyLfuWidthCache::new(50);
2235 let mut records = Vec::with_capacity(workload.len());
2236
2237 for (i, key) in workload.iter().enumerate() {
2238 let stats_before = cache.stats();
2239 let t0 = Instant::now();
2240 let width = cache.get_or_compute(key);
2241 let elapsed = t0.elapsed().as_nanos();
2242 let stats_after = cache.stats();
2243 let hit = stats_after.hits > stats_before.hits;
2244
2245 records.push(ReplayRecord {
2246 step: i,
2247 key: key.clone(),
2248 width,
2249 hit,
2250 latency_ns: elapsed,
2251 });
2252 }
2253
2254 for r in &records[..5] {
2256 let line = r.to_jsonl();
2257 assert!(
2258 line.starts_with('{') && line.ends_with('}'),
2259 "Bad JSONL: {}",
2260 line
2261 );
2262 }
2263
2264 let total = records.len();
2266 let hits = records.iter().filter(|r| r.hit).count();
2267 let hit_rate = hits as f64 / total as f64;
2268
2269 assert!(
2272 hit_rate > 0.10,
2273 "Zipfian hit rate too low: {:.2}% ({}/{})",
2274 hit_rate * 100.0,
2275 hits,
2276 total
2277 );
2278 }
2279
2280 #[test]
2281 fn replay_zipfian_lru_comparison() {
2282 let mut rng = super::Lcg::new(0x1234_5678);
2283 let workload = zipfian_workload(&mut rng, 2000, 200);
2284
2285 let mut tlfu = TinyLfuWidthCache::new(50);
2286 let mut lru = WidthCache::new(50);
2287
2288 for key in &workload {
2289 tlfu.get_or_compute(key);
2290 lru.get_or_compute(key);
2291 }
2292
2293 let tlfu_stats = tlfu.stats();
2294 let lru_stats = lru.stats();
2295
2296 assert_eq!(tlfu_stats.hits + tlfu_stats.misses, 2000);
2298 assert_eq!(lru_stats.hits + lru_stats.misses, 2000);
2299
2300 assert!(
2303 tlfu_stats.hit_rate() >= lru_stats.hit_rate() * 0.8,
2304 "TinyLFU hit rate {:.2}% much worse than LRU {:.2}%",
2305 tlfu_stats.hit_rate() * 100.0,
2306 lru_stats.hit_rate() * 100.0,
2307 );
2308 }
2309
2310 #[test]
2311 fn replay_deterministic_reproduction() {
2312 let run = |seed: u64| -> Vec<bool> {
2314 let mut rng = super::Lcg::new(seed);
2315 let workload = zipfian_workload(&mut rng, 500, 100);
2316 let mut cache = TinyLfuWidthCache::new(30);
2317 let mut hits = Vec::with_capacity(500);
2318 for key in &workload {
2319 let before = cache.stats().hits;
2320 cache.get_or_compute(key);
2321 hits.push(cache.stats().hits > before);
2322 }
2323 hits
2324 };
2325
2326 let run1 = run(0xABCD_EF01);
2327 let run2 = run(0xABCD_EF01);
2328 assert_eq!(run1, run2, "Deterministic replay diverged");
2329 }
2330
2331 #[test]
2332 fn replay_uniform_workload() {
2333 let mut rng = super::Lcg::new(0x9999);
2336 let universe = 100;
2337 let cache_size = 25;
2338 let n = 5000;
2339
2340 let mut cache = TinyLfuWidthCache::new(cache_size);
2341
2342 for i in 0..universe {
2344 cache.get_or_compute(&format!("u_{}", i));
2345 }
2346
2347 cache.reset_stats();
2348 for _ in 0..n {
2349 let idx = rng.next_usize(universe);
2350 cache.get_or_compute(&format!("u_{}", idx));
2351 }
2352
2353 let stats = cache.stats();
2354 let hit_rate = stats.hit_rate();
2355 assert!(
2357 hit_rate > 0.10 && hit_rate < 0.60,
2358 "Uniform hit rate {:.2}% outside expected range",
2359 hit_rate * 100.0,
2360 );
2361 }
2362}
2363
2364#[cfg(test)]
2369mod perf_cache_overhead {
2370 use super::*;
2371 use std::time::Instant;
2372
2373 fn measure_latencies<F: FnMut(usize)>(n: usize, mut op: F) -> Vec<u128> {
2375 let mut latencies = Vec::with_capacity(n);
2376 for i in 0..n {
2377 let t0 = Instant::now();
2378 op(i);
2379 latencies.push(t0.elapsed().as_nanos());
2380 }
2381 latencies.sort_unstable();
2382 latencies
2383 }
2384
2385 fn p95(sorted: &[u128]) -> u128 {
2386 let len = sorted.len();
2387 let idx = ((len as f64 * 0.95) as usize).min(len.saturating_sub(1));
2388 sorted[idx]
2389 }
2390
2391 fn p99(sorted: &[u128]) -> u128 {
2392 let len = sorted.len();
2393 let idx = ((len as f64 * 0.99) as usize).min(len.saturating_sub(1));
2394 sorted[idx]
2395 }
2396
2397 fn median(sorted: &[u128]) -> u128 {
2398 sorted[sorted.len() / 2]
2399 }
2400
2401 #[allow(unexpected_cfgs)]
2402 fn perf_budget_ns(base_ns: u128) -> u128 {
2403 if cfg!(coverage) || cfg!(coverage_nightly) {
2404 base_ns.saturating_mul(10)
2405 } else {
2406 base_ns
2407 }
2408 }
2409
2410 #[test]
2411 fn perf_lru_hit_latency() {
2412 let mut cache = WidthCache::new(1000);
2413 for i in 0..100 {
2415 cache.get_or_compute(&format!("warm_{}", i));
2416 }
2417
2418 let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2419 let latencies = measure_latencies(10_000, |i| {
2420 let _ = cache.get_or_compute(&keys[i % 100]);
2421 });
2422
2423 let p95_ns = p95(&latencies);
2424 let budget_ns = perf_budget_ns(5_000);
2427 assert!(
2428 p95_ns < budget_ns,
2429 "LRU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2430 p95_ns,
2431 budget_ns,
2432 median(&latencies),
2433 p99(&latencies),
2434 );
2435 }
2436
2437 #[test]
2438 fn perf_tinylfu_hit_latency() {
2439 let mut cache = TinyLfuWidthCache::new(1000);
2440 for _ in 0..5 {
2442 for i in 0..100 {
2443 cache.get_or_compute(&format!("warm_{}", i));
2444 }
2445 }
2446
2447 let keys: Vec<String> = (0..100).map(|i| format!("warm_{}", i)).collect();
2448 let latencies = measure_latencies(10_000, |i| {
2449 let _ = cache.get_or_compute(&keys[i % 100]);
2450 });
2451
2452 let p95_ns = p95(&latencies);
2453 let budget_ns = perf_budget_ns(5_000);
2455 assert!(
2456 p95_ns < budget_ns,
2457 "TinyLFU hit p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2458 p95_ns,
2459 budget_ns,
2460 median(&latencies),
2461 p99(&latencies),
2462 );
2463 }
2464
2465 #[test]
2466 fn perf_tinylfu_miss_latency() {
2467 let mut cache = TinyLfuWidthCache::new(100);
2468 let keys: Vec<String> = (0..10_000).map(|i| format!("miss_{}", i)).collect();
2469
2470 let latencies = measure_latencies(10_000, |i| {
2471 let _ = cache.get_or_compute(&keys[i]);
2472 });
2473
2474 let p95_ns = p95(&latencies);
2475 let budget_ns = perf_budget_ns(20_000);
2478 assert!(
2479 p95_ns < budget_ns,
2480 "TinyLFU miss p95 = {}ns exceeds {}ns budget (median={}ns, p99={}ns)",
2481 p95_ns,
2482 budget_ns,
2483 median(&latencies),
2484 p99(&latencies),
2485 );
2486 }
2487
2488 #[test]
2489 fn perf_cms_increment_latency() {
2490 let mut cms = CountMinSketch::with_defaults();
2491 let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("k{}", i))).collect();
2492
2493 let latencies = measure_latencies(10_000, |i| {
2494 cms.increment(hashes[i]);
2495 });
2496
2497 let p95_ns = p95(&latencies);
2498 let budget_ns = perf_budget_ns(2_000);
2500 assert!(
2501 p95_ns < budget_ns,
2502 "CMS increment p95 = {}ns exceeds {}ns budget (median={}ns)",
2503 p95_ns,
2504 budget_ns,
2505 median(&latencies),
2506 );
2507 }
2508
2509 #[test]
2510 fn perf_doorkeeper_latency() {
2511 let mut dk = Doorkeeper::with_defaults();
2512 let hashes: Vec<u64> = (0..10_000).map(|i| hash_text(&format!("d{}", i))).collect();
2513
2514 let latencies = measure_latencies(10_000, |i| {
2515 let _ = dk.check_and_set(hashes[i]);
2516 });
2517
2518 let p95_ns = p95(&latencies);
2519 let budget_ns = perf_budget_ns(1_000);
2521 assert!(
2522 p95_ns < budget_ns,
2523 "Doorkeeper p95 = {}ns exceeds {}ns budget (median={}ns)",
2524 p95_ns,
2525 budget_ns,
2526 median(&latencies),
2527 );
2528 }
2529
2530 #[test]
2531 fn perf_fingerprint_hash_latency() {
2532 let keys: Vec<String> = (0..10_000).map(|i| format!("fp_{}", i)).collect();
2533
2534 let latencies = measure_latencies(10_000, |i| {
2535 let _ = fingerprint_hash(&keys[i]);
2536 });
2537
2538 let p95_ns = p95(&latencies);
2539 let budget_ns = perf_budget_ns(1_000);
2541 assert!(
2542 p95_ns < budget_ns,
2543 "fingerprint_hash p95 = {}ns exceeds {}ns budget (median={}ns)",
2544 p95_ns,
2545 budget_ns,
2546 median(&latencies),
2547 );
2548 }
2549}