Skip to main content

typf_core/
cache.rs

1//! Scan-resistant caching with TinyLFU and byte-weighted eviction
2//!
3//! Uses Moka's TinyLFU eviction policy to handle scan workloads gracefully.
4//! Unlike timestamp-based eviction, TinyLFU tracks access frequency and
5//! rejects one-time "scan" entries that would pollute the cache.
6//!
7//! **Byte-weighted eviction**: Caches track actual memory usage, not just
8//! entry counts. A 4MB emoji bitmap consumes 4000x more quota than a 1KB
9//! glyph. This prevents memory explosions from pathological fonts.
10//!
11//! This prevents unbounded memory growth when processing many unique fonts
12//! (e.g., font matching across hundreds of candidates).
13
14use crate::glyph_cache::GlyphCacheKey;
15use crate::shaping_cache::ShapingCacheKey;
16use moka::sync::Cache;
17use parking_lot::RwLock;
18use std::hash::Hash;
19use std::sync::Arc;
20use std::sync::OnceLock;
21use std::time::{Duration, Instant};
22
23/// Global, lazily initialized instance of the cache manager.
24static CACHE_MANAGER: OnceLock<CacheManager> = OnceLock::new();
25
26/// Returns a reference to the global `CacheManager` instance.
27///
28/// This function will initialize the `CacheManager` on its first call
29/// if it hasn't been initialized already.
30pub fn get_cache_manager() -> &'static CacheManager {
31    CACHE_MANAGER.get_or_init(CacheManager::new)
32}
33
34/// Default cache byte limit: 512 MB
35///
36/// Can be overridden via `TYPF_CACHE_MAX_BYTES` environment variable.
37pub const DEFAULT_CACHE_MAX_BYTES: u64 = 512 * 1024 * 1024;
38
39/// Scan-resistant cache backed by Moka's TinyLFU
40///
41/// TinyLFU tracks frequency of both hits and misses, rejecting
42/// infrequent "scan" accesses that would pollute a pure LRU cache.
43pub struct MultiLevelCache<K, V>
44where
45    K: Hash + Eq + Send + Sync + Clone + 'static,
46    V: Clone + Send + Sync + 'static,
47{
48    cache: Cache<K, V>,
49    stats: Arc<RwLock<CacheMetrics>>,
50}
51
52impl<K, V> std::fmt::Debug for MultiLevelCache<K, V>
53where
54    K: Hash + Eq + Send + Sync + Clone + 'static,
55    V: Clone + Send + Sync + 'static,
56{
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        f.debug_struct("MultiLevelCache")
59            .field("entry_count", &self.cache.entry_count())
60            .field("hit_rate", &self.hit_rate())
61            .finish()
62    }
63}
64
65impl<K, V> MultiLevelCache<K, V>
66where
67    K: Hash + Eq + Send + Sync + Clone + 'static,
68    V: Clone + Send + Sync + 'static,
69{
70    /// Build a scan-resistant cache with specified capacity
71    ///
72    /// The `l1_size` and `l2_size` parameters are combined for total capacity.
73    /// Moka's TinyLFU internally manages hot/cold separation.
74    pub fn new(l1_size: usize, l2_size: usize) -> Self {
75        let total_capacity = (l1_size + l2_size) as u64;
76        let cache = Cache::builder()
77            .max_capacity(total_capacity)
78            // TinyLFU is the default, but be explicit
79            .eviction_policy(moka::policy::EvictionPolicy::tiny_lfu())
80            // Time-to-idle: evict entries not accessed for 10 minutes
81            .time_to_idle(Duration::from_secs(600))
82            .build();
83
84        Self {
85            cache,
86            stats: Arc::new(RwLock::new(CacheMetrics::default())),
87        }
88    }
89
90    /// Look up a cached value
91    ///
92    /// TinyLFU admission policy means frequently-accessed keys stay cached
93    /// while scan-like one-time accesses are rejected.
94    pub fn get(&self, key: &K) -> Option<V> {
95        let start = Instant::now();
96        let mut stats = self.stats.write();
97        stats.total_requests += 1;
98
99        if let Some(value) = self.cache.get(key) {
100            stats.l1_hits += 1; // Count all hits as "L1" for API compatibility
101            stats.total_l1_time += start.elapsed();
102            Some(value)
103        } else {
104            stats.misses += 1;
105            None
106        }
107    }
108
109    /// Store a value in the cache
110    ///
111    /// Note: TinyLFU may reject this entry if the key hasn't been
112    /// seen frequently enough. This is intentional for scan resistance.
113    pub fn insert(&self, key: K, value: V) {
114        self.cache.insert(key, value);
115    }
116
117    /// Cache hit rate (0.0 to 1.0)
118    pub fn hit_rate(&self) -> f64 {
119        let stats = self.stats.read();
120        if stats.total_requests == 0 {
121            0.0
122        } else {
123            let hits = stats.l1_hits + stats.l2_hits;
124            hits as f64 / stats.total_requests as f64
125        }
126    }
127
128    /// Average access time for cache hits
129    pub fn avg_access_time(&self) -> Duration {
130        let stats = self.stats.read();
131        let total_time = stats.total_l1_time + stats.total_l2_time;
132        let total_hits = stats.l1_hits + stats.l2_hits;
133        if total_hits == 0 {
134            Duration::ZERO
135        } else {
136            total_time / total_hits as u32
137        }
138    }
139
140    /// Full performance snapshot
141    pub fn metrics(&self) -> CacheMetrics {
142        self.stats.read().clone()
143    }
144
145    /// Number of entries currently in cache
146    pub fn len(&self) -> usize {
147        self.cache.entry_count() as usize
148    }
149
150    /// Check if cache is empty
151    pub fn is_empty(&self) -> bool {
152        self.cache.entry_count() == 0
153    }
154
155    /// Clear all entries and reset stats
156    pub fn clear(&self) {
157        self.cache.invalidate_all();
158        let mut stats = self.stats.write();
159        *stats = CacheMetrics::default();
160    }
161
162    /// Force pending operations to complete (for testing)
163    #[cfg(test)]
164    pub fn sync(&self) {
165        self.cache.run_pending_tasks();
166    }
167}
168
169/// Basic cache statistics
170#[derive(Debug, Clone)]
171pub struct CacheStats {
172    pub size: usize,
173    pub capacity: usize,
174    pub total_hits: u32,
175    pub hit_rate: f32,
176}
177
178/// Performance metrics for cache operations
179#[derive(Debug, Clone, Default)]
180pub struct CacheMetrics {
181    pub total_requests: u64,
182    pub l1_hits: u64,
183    pub l2_hits: u64,
184    pub misses: u64,
185    pub total_l1_time: Duration,
186    pub total_l2_time: Duration,
187}
188
189impl CacheMetrics {
190    pub fn hit_rate(&self) -> f64 {
191        if self.total_requests == 0 {
192            0.0
193        } else {
194            (self.l1_hits + self.l2_hits) as f64 / self.total_requests as f64
195        }
196    }
197
198    pub fn l1_hit_rate(&self) -> f64 {
199        if self.total_requests == 0 {
200            0.0
201        } else {
202            self.l1_hits as f64 / self.total_requests as f64
203        }
204    }
205}
206
207/// Get the cache byte limit from environment or use default.
208///
209/// Set `TYPF_CACHE_MAX_BYTES` to override (e.g., "268435456" for 256MB).
210pub fn get_cache_max_bytes() -> u64 {
211    std::env::var("TYPF_CACHE_MAX_BYTES")
212        .ok()
213        .and_then(|s| s.parse().ok())
214        .unwrap_or(DEFAULT_CACHE_MAX_BYTES)
215}
216
217/// Byte-weighted cache for RenderOutput values
218///
219/// Unlike entry-count caches, this tracks actual memory usage.
220/// A 4MB emoji bitmap consumes 4000x more quota than a 1KB glyph.
221pub struct RenderOutputCache<K>
222where
223    K: Hash + Eq + Send + Sync + Clone + 'static,
224{
225    cache: Cache<K, crate::types::RenderOutput>,
226    stats: Arc<RwLock<CacheMetrics>>,
227    max_bytes: u64,
228}
229
230impl<K> std::fmt::Debug for RenderOutputCache<K>
231where
232    K: Hash + Eq + Send + Sync + Clone + 'static,
233{
234    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235        f.debug_struct("RenderOutputCache")
236            .field("entry_count", &self.cache.entry_count())
237            .field("weighted_size", &self.cache.weighted_size())
238            .field("max_bytes", &self.max_bytes)
239            .field("hit_rate", &self.hit_rate())
240            .finish()
241    }
242}
243
244impl<K> RenderOutputCache<K>
245where
246    K: Hash + Eq + Send + Sync + Clone + 'static,
247{
248    /// Create a byte-weighted cache with specified maximum bytes.
249    pub fn new(max_bytes: u64) -> Self {
250        let cache = Cache::builder()
251            .max_capacity(max_bytes)
252            .weigher(|_key: &K, value: &crate::types::RenderOutput| {
253                // Weight = byte size, minimum 1 to avoid division issues
254                value.byte_size().max(1) as u32
255            })
256            .eviction_policy(moka::policy::EvictionPolicy::tiny_lfu())
257            .time_to_idle(Duration::from_secs(600))
258            .build();
259
260        Self {
261            cache,
262            stats: Arc::new(RwLock::new(CacheMetrics::default())),
263            max_bytes,
264        }
265    }
266
267    /// Create a cache with the default byte limit (512 MB or env override).
268    pub fn with_default_limit() -> Self {
269        Self::new(get_cache_max_bytes())
270    }
271
272    /// Look up a cached render output.
273    pub fn get(&self, key: &K) -> Option<crate::types::RenderOutput> {
274        let start = Instant::now();
275        let mut stats = self.stats.write();
276        stats.total_requests += 1;
277
278        if let Some(value) = self.cache.get(key) {
279            stats.l1_hits += 1;
280            stats.total_l1_time += start.elapsed();
281            Some(value)
282        } else {
283            stats.misses += 1;
284            None
285        }
286    }
287
288    /// Store a render output in the cache.
289    ///
290    /// Large outputs may be rejected by TinyLFU if not accessed frequently.
291    pub fn insert(&self, key: K, value: crate::types::RenderOutput) {
292        self.cache.insert(key, value);
293    }
294
295    /// Cache hit rate (0.0 to 1.0).
296    pub fn hit_rate(&self) -> f64 {
297        let stats = self.stats.read();
298        if stats.total_requests == 0 {
299            0.0
300        } else {
301            stats.l1_hits as f64 / stats.total_requests as f64
302        }
303    }
304
305    /// Current weighted size in bytes.
306    pub fn weighted_size(&self) -> u64 {
307        self.cache.weighted_size()
308    }
309
310    /// Number of entries in cache.
311    pub fn entry_count(&self) -> u64 {
312        self.cache.entry_count()
313    }
314
315    /// Performance metrics.
316    pub fn metrics(&self) -> CacheMetrics {
317        self.stats.read().clone()
318    }
319
320    /// Clear all entries.
321    pub fn clear(&self) {
322        self.cache.invalidate_all();
323        let mut stats = self.stats.write();
324        *stats = CacheMetrics::default();
325    }
326
327    /// Force pending operations to complete (for testing).
328    #[cfg(test)]
329    pub fn sync(&self) {
330        self.cache.run_pending_tasks();
331    }
332}
333
334/// Centralized cache manager for shaping and glyph caches
335pub struct CacheManager {
336    pub shaping_cache: MultiLevelCache<ShapingCacheKey, Arc<Vec<u8>>>,
337    pub glyph_cache: MultiLevelCache<GlyphCacheKey, Arc<Vec<u8>>>,
338}
339
340impl CacheManager {
341    /// Create a manager with sensible default sizes
342    ///
343    /// Default capacities are conservative to prevent memory issues:
344    /// - Shaping: 10,100 entries (shapes are larger)
345    /// - Glyphs: 101,000 entries (individual glyphs are smaller)
346    pub fn new() -> Self {
347        Self {
348            shaping_cache: MultiLevelCache::new(100, 10_000),
349            glyph_cache: MultiLevelCache::new(1000, 100_000),
350        }
351    }
352
353    /// Look up previously shaped text
354    pub fn get_shaped(&self, key: &ShapingCacheKey) -> Option<Arc<Vec<u8>>> {
355        self.shaping_cache.get(key)
356    }
357
358    /// Save shaping results for next time
359    pub fn cache_shaped(&self, key: ShapingCacheKey, data: Arc<Vec<u8>>) {
360        self.shaping_cache.insert(key, data);
361    }
362
363    /// Find a rendered glyph we cached earlier
364    pub fn get_glyph(&self, key: &GlyphCacheKey) -> Option<Arc<Vec<u8>>> {
365        self.glyph_cache.get(key)
366    }
367
368    /// Remember this glyph for future renders
369    pub fn cache_glyph(&self, key: GlyphCacheKey, data: Arc<Vec<u8>>) {
370        self.glyph_cache.insert(key, data);
371    }
372
373    /// Human-readable performance report
374    pub fn report_metrics(&self) -> String {
375        let shaping = self.shaping_cache.metrics();
376        let glyph = self.glyph_cache.metrics();
377
378        format!(
379            "Cache Performance (TinyLFU):\n\
380             Shaping Cache:\n\
381             - Hit Rate: {:.2}%\n\
382             - Entries: {}\n\
383             - Average Access: {:?}\n\
384             Glyph Cache:\n\
385             - Hit Rate: {:.2}%\n\
386             - Entries: {}\n\
387             - Average Access: {:?}",
388            shaping.hit_rate() * 100.0,
389            self.shaping_cache.len(),
390            self.shaping_cache.avg_access_time(),
391            glyph.hit_rate() * 100.0,
392            self.glyph_cache.len(),
393            self.glyph_cache.avg_access_time(),
394        )
395    }
396
397    /// Clears all entries in both shaping and glyph caches, and resets their statistics.
398    pub fn clear_all(&self) {
399        self.shaping_cache.clear();
400        self.glyph_cache.clear();
401    }
402}
403
404impl Default for CacheManager {
405    fn default() -> Self {
406        Self::new()
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413
414    #[test]
415    fn test_cache_insert_and_get() {
416        let cache: MultiLevelCache<String, String> = MultiLevelCache::new(10, 100);
417
418        cache.insert("key1".to_string(), "value1".to_string());
419        cache.insert("key2".to_string(), "value2".to_string());
420
421        assert_eq!(cache.get(&"key1".to_string()), Some("value1".to_string()));
422        assert_eq!(cache.get(&"key2".to_string()), Some("value2".to_string()));
423        assert_eq!(cache.get(&"key3".to_string()), None);
424    }
425
426    #[test]
427    fn test_cache_metrics() {
428        let cache: MultiLevelCache<u32, String> = MultiLevelCache::new(10, 100);
429
430        cache.insert(1, "one".to_string());
431        cache.insert(2, "two".to_string());
432
433        assert_eq!(cache.get(&1), Some("one".to_string()));
434        assert_eq!(cache.get(&2), Some("two".to_string()));
435        assert_eq!(cache.get(&3), None);
436
437        let metrics = cache.metrics();
438        assert_eq!(metrics.total_requests, 3);
439        assert_eq!(metrics.l1_hits, 2);
440        assert_eq!(metrics.misses, 1);
441    }
442
443    #[test]
444    fn test_cache_clear() {
445        let cache: MultiLevelCache<u32, String> = MultiLevelCache::new(10, 100);
446
447        cache.insert(1, "one".to_string());
448        cache.sync(); // Force pending insert to complete
449        assert!(!cache.is_empty());
450
451        cache.clear();
452        cache.sync(); // Force pending invalidation to complete
453        assert!(cache.is_empty());
454        assert_eq!(cache.get(&1), None);
455    }
456
457    #[test]
458    fn test_scan_resistance_concept() {
459        // TinyLFU tracks frequency - items accessed multiple times
460        // are more likely to stay cached than one-time scans.
461        // This test verifies the cache works; actual scan resistance
462        // is handled by Moka's internal TinyLFU implementation.
463        let cache: MultiLevelCache<u32, String> = MultiLevelCache::new(5, 5);
464
465        // Simulate a "hot" key accessed multiple times
466        cache.insert(1, "hot".to_string());
467        for _ in 0..10 {
468            cache.get(&1);
469        }
470
471        // Simulate scan of many unique keys
472        for i in 100..200 {
473            cache.insert(i, format!("scan_{}", i));
474        }
475
476        // Hot key should still be accessible (TinyLFU protects it)
477        // Note: This is probabilistic - TinyLFU may or may not keep it
478        // The important thing is the cache doesn't grow unbounded
479        assert!(cache.len() <= 10, "Cache should respect capacity limit");
480    }
481
482    #[test]
483    fn test_render_output_byte_size() {
484        use crate::types::{BitmapData, BitmapFormat, RenderOutput, VectorData, VectorFormat};
485
486        // Test bitmap byte size (should be data.len())
487        let bitmap = RenderOutput::Bitmap(BitmapData {
488            width: 100,
489            height: 100,
490            format: BitmapFormat::Rgba8,
491            data: vec![0u8; 40_000], // 100x100x4 = 40KB
492        });
493        assert_eq!(bitmap.byte_size(), 40_000);
494
495        // Test vector byte size (should be data.len())
496        let vector = RenderOutput::Vector(VectorData {
497            format: VectorFormat::Svg,
498            data: "<svg>test</svg>".to_string(),
499        });
500        assert_eq!(vector.byte_size(), 15);
501
502        // Test JSON byte size
503        let json = RenderOutput::Json(r#"{"test": true}"#.to_string());
504        assert_eq!(json.byte_size(), 14);
505    }
506
507    #[test]
508    fn test_byte_weighted_cache_respects_limit() {
509        use crate::types::{BitmapData, BitmapFormat, RenderOutput};
510
511        // Create a cache with 100KB limit
512        let cache: RenderOutputCache<u32> = RenderOutputCache::new(100_000);
513
514        // Insert entries totaling ~150KB (should evict some)
515        for i in 0..15 {
516            let output = RenderOutput::Bitmap(BitmapData {
517                width: 50,
518                height: 50,
519                format: BitmapFormat::Rgba8,
520                data: vec![i as u8; 10_000], // 10KB each
521            });
522            cache.insert(i, output);
523        }
524        cache.sync(); // Force pending operations
525
526        // Weighted size should be at or below the limit
527        assert!(
528            cache.weighted_size() <= 100_000,
529            "Cache weighted size {} should be <= 100KB",
530            cache.weighted_size()
531        );
532    }
533
534    #[test]
535    fn test_byte_weighted_cache_large_item_eviction() {
536        use crate::types::{BitmapData, BitmapFormat, RenderOutput};
537
538        // Create a cache with 50KB limit
539        let cache: RenderOutputCache<u32> = RenderOutputCache::new(50_000);
540
541        // Insert a small item
542        let small = RenderOutput::Bitmap(BitmapData {
543            width: 10,
544            height: 10,
545            format: BitmapFormat::Gray8,
546            data: vec![0u8; 100], // 100 bytes
547        });
548        cache.insert(1, small);
549
550        // Insert a large item (40KB)
551        let large = RenderOutput::Bitmap(BitmapData {
552            width: 100,
553            height: 100,
554            format: BitmapFormat::Rgba8,
555            data: vec![0u8; 40_000], // 40KB
556        });
557        cache.insert(2, large);
558        cache.sync();
559
560        // Both should fit (100 + 40KB = ~40KB < 50KB limit)
561        assert!(cache.weighted_size() <= 50_000);
562    }
563}