Skip to main content

oxidize_pdf/memory/
cache.rs

1//! LRU cache implementation for PDF objects
2//!
3//! Provides efficient caching of frequently accessed PDF objects to reduce
4//! repeated parsing and memory allocations.
5
6use crate::objects::ObjectId;
7use crate::parser::PdfObject;
8use std::collections::{HashMap, VecDeque};
9use std::sync::{Arc, RwLock};
10
11/// Generic LRU (Least Recently Used) cache
12pub struct LruCache<K: Clone + Eq + std::hash::Hash, V> {
13    capacity: usize,
14    map: HashMap<K, V>,
15    order: VecDeque<K>,
16}
17
18impl<K: Clone + Eq + std::hash::Hash, V> LruCache<K, V> {
19    /// Create a new LRU cache with specified capacity
20    pub fn new(capacity: usize) -> Self {
21        Self {
22            capacity,
23            map: HashMap::with_capacity(capacity),
24            order: VecDeque::with_capacity(capacity),
25        }
26    }
27
28    /// Get a value from the cache
29    pub fn get(&mut self, key: &K) -> Option<&V> {
30        if self.map.contains_key(key) {
31            // Move to front (most recently used)
32            self.order.retain(|k| k != key);
33            self.order.push_front(key.clone());
34            self.map.get(key)
35        } else {
36            None
37        }
38    }
39
40    /// Put a value into the cache
41    pub fn put(&mut self, key: K, value: V) {
42        // Handle zero capacity
43        if self.capacity == 0 {
44            return;
45        }
46
47        if self.map.contains_key(&key) {
48            // Update existing
49            self.order.retain(|k| k != &key);
50        } else if self.map.len() >= self.capacity {
51            // Evict least recently used
52            if let Some(lru_key) = self.order.pop_back() {
53                self.map.remove(&lru_key);
54            }
55        }
56
57        self.map.insert(key.clone(), value);
58        self.order.push_front(key);
59    }
60
61    /// Clear the cache
62    pub fn clear(&mut self) {
63        self.map.clear();
64        self.order.clear();
65    }
66
67    /// Get the current number of items in the cache
68    pub fn len(&self) -> usize {
69        self.map.len()
70    }
71
72    /// Check if the cache is empty
73    pub fn is_empty(&self) -> bool {
74        self.map.is_empty()
75    }
76}
77
78/// Thread-safe cache for PDF objects
79pub struct ObjectCache {
80    cache: Arc<RwLock<LruCache<ObjectId, Arc<PdfObject>>>>,
81}
82
83impl ObjectCache {
84    /// Create a new object cache
85    pub fn new(capacity: usize) -> Self {
86        Self {
87            cache: Arc::new(RwLock::new(LruCache::new(capacity))),
88        }
89    }
90
91    /// Get an object from the cache
92    pub fn get(&self, id: &ObjectId) -> Option<Arc<PdfObject>> {
93        if let Ok(mut cache) = self.cache.write() {
94            cache.get(id).cloned()
95        } else {
96            None
97        }
98    }
99
100    /// Store an object in the cache
101    pub fn put(&self, id: ObjectId, object: Arc<PdfObject>) {
102        if let Ok(mut cache) = self.cache.write() {
103            cache.put(id, object);
104        }
105    }
106
107    /// Clear the cache
108    pub fn clear(&self) {
109        if let Ok(mut cache) = self.cache.write() {
110            cache.clear();
111        }
112    }
113
114    /// Get cache statistics
115    pub fn stats(&self) -> CacheStats {
116        if let Ok(cache) = self.cache.read() {
117            CacheStats {
118                size: cache.len(),
119                capacity: cache.capacity,
120            }
121        } else {
122            CacheStats::default()
123        }
124    }
125}
126
127/// Cache statistics
128#[derive(Debug, Clone, Default)]
129pub struct CacheStats {
130    /// Current number of cached items
131    pub size: usize,
132    /// Maximum capacity
133    pub capacity: usize,
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn test_lru_cache_basic() {
142        let mut cache = LruCache::new(3);
143
144        // Test insertion
145        cache.put(1, "one");
146        cache.put(2, "two");
147        cache.put(3, "three");
148
149        assert_eq!(cache.len(), 3);
150        assert_eq!(cache.get(&1), Some(&"one"));
151        assert_eq!(cache.get(&2), Some(&"two"));
152        assert_eq!(cache.get(&3), Some(&"three"));
153    }
154
155    #[test]
156    fn test_lru_cache_eviction() {
157        let mut cache = LruCache::new(3);
158
159        cache.put(1, "one");
160        cache.put(2, "two");
161        cache.put(3, "three");
162
163        // This should evict the least recently used (1)
164        cache.put(4, "four");
165
166        assert_eq!(cache.len(), 3);
167        assert_eq!(cache.get(&1), None);
168        assert_eq!(cache.get(&2), Some(&"two"));
169        assert_eq!(cache.get(&3), Some(&"three"));
170        assert_eq!(cache.get(&4), Some(&"four"));
171    }
172
173    #[test]
174    fn test_lru_cache_access_order() {
175        let mut cache = LruCache::new(3);
176
177        cache.put(1, "one");
178        cache.put(2, "two");
179        cache.put(3, "three");
180
181        // Access 1, making it recently used
182        assert_eq!(cache.get(&1), Some(&"one"));
183
184        // Add 4, should evict 2 (least recently used)
185        cache.put(4, "four");
186
187        assert_eq!(cache.get(&1), Some(&"one"));
188        assert_eq!(cache.get(&2), None);
189        assert_eq!(cache.get(&3), Some(&"three"));
190        assert_eq!(cache.get(&4), Some(&"four"));
191    }
192
193    #[test]
194    fn test_lru_cache_update() {
195        let mut cache = LruCache::new(3);
196
197        cache.put(1, "one");
198        cache.put(2, "two");
199        cache.put(3, "three");
200
201        // Update existing key
202        cache.put(2, "two-updated");
203
204        assert_eq!(cache.len(), 3);
205        assert_eq!(cache.get(&2), Some(&"two-updated"));
206
207        // 2 should now be most recently used
208        cache.put(4, "four");
209
210        assert_eq!(cache.get(&1), None); // Evicted
211        assert_eq!(cache.get(&2), Some(&"two-updated"));
212    }
213
214    #[test]
215    fn test_lru_cache_clear() {
216        let mut cache = LruCache::new(3);
217
218        cache.put(1, "one");
219        cache.put(2, "two");
220        cache.put(3, "three");
221
222        cache.clear();
223
224        assert_eq!(cache.len(), 0);
225        assert!(cache.is_empty());
226        assert_eq!(cache.get(&1), None);
227    }
228
229    #[test]
230    fn test_object_cache() {
231        let cache = ObjectCache::new(10);
232
233        let obj1 = Arc::new(PdfObject::Integer(42));
234        let obj2 = Arc::new(PdfObject::String(crate::parser::PdfString::new(
235            b"test".to_vec(),
236        )));
237
238        let id1 = ObjectId::new(1, 0);
239        let id2 = ObjectId::new(2, 0);
240
241        cache.put(id1, obj1.clone());
242        cache.put(id2, obj2.clone());
243
244        assert_eq!(cache.get(&id1), Some(obj1));
245        assert_eq!(cache.get(&id2), Some(obj2));
246
247        let stats = cache.stats();
248        assert_eq!(stats.size, 2);
249        assert_eq!(stats.capacity, 10);
250    }
251
252    #[test]
253    fn test_object_cache_clear() {
254        let cache = ObjectCache::new(5);
255
256        let obj = Arc::new(PdfObject::Boolean(true));
257        let id = ObjectId::new(1, 0);
258
259        cache.put(id, obj.clone());
260        assert_eq!(cache.get(&id), Some(obj));
261
262        cache.clear();
263        assert_eq!(cache.get(&id), None);
264
265        let stats = cache.stats();
266        assert_eq!(stats.size, 0);
267    }
268
269    #[test]
270    fn test_lru_cache_zero_capacity() {
271        let mut cache = LruCache::new(0);
272
273        cache.put(1, "one");
274        assert_eq!(cache.len(), 0);
275        assert!(cache.is_empty());
276        assert_eq!(cache.get(&1), None);
277    }
278
279    #[test]
280    fn test_lru_cache_single_capacity() {
281        let mut cache = LruCache::new(1);
282
283        cache.put(1, "one");
284        assert_eq!(cache.len(), 1);
285        assert_eq!(cache.get(&1), Some(&"one"));
286
287        cache.put(2, "two");
288        assert_eq!(cache.len(), 1);
289        assert_eq!(cache.get(&1), None);
290        assert_eq!(cache.get(&2), Some(&"two"));
291    }
292
293    #[test]
294    fn test_lru_cache_repeated_access() {
295        let mut cache = LruCache::new(3);
296
297        cache.put(1, "one");
298        cache.put(2, "two");
299        cache.put(3, "three");
300
301        // Access key 1 multiple times
302        for _ in 0..5 {
303            assert_eq!(cache.get(&1), Some(&"one"));
304        }
305
306        // Add new item, should evict key 2 (least recently used)
307        cache.put(4, "four");
308
309        assert_eq!(cache.get(&1), Some(&"one"));
310        assert_eq!(cache.get(&2), None);
311        assert_eq!(cache.get(&3), Some(&"three"));
312        assert_eq!(cache.get(&4), Some(&"four"));
313    }
314
315    #[test]
316    fn test_lru_cache_get_nonexistent() {
317        let mut cache = LruCache::new(3);
318
319        cache.put(1, "one");
320        cache.put(2, "two");
321
322        assert_eq!(cache.get(&3), None);
323        assert_eq!(cache.get(&99), None);
324
325        // Cache should remain unchanged
326        assert_eq!(cache.len(), 2);
327        assert_eq!(cache.get(&1), Some(&"one"));
328        assert_eq!(cache.get(&2), Some(&"two"));
329    }
330
331    #[test]
332    fn test_lru_cache_large_capacity() {
333        let mut cache = LruCache::new(1000);
334
335        // Fill cache with items
336        for i in 0..500 {
337            cache.put(i, format!("value_{i}"));
338        }
339
340        assert_eq!(cache.len(), 500);
341
342        // Access all items to verify they're all there
343        for i in 0..500 {
344            assert_eq!(cache.get(&i), Some(&format!("value_{i}")));
345        }
346    }
347
348    #[test]
349    fn test_lru_cache_complex_eviction_pattern() {
350        let mut cache = LruCache::new(4);
351
352        // Fill cache
353        cache.put(1, "one");
354        cache.put(2, "two");
355        cache.put(3, "three");
356        cache.put(4, "four");
357
358        // Access keys in specific order
359        assert_eq!(cache.get(&2), Some(&"two"));
360        assert_eq!(cache.get(&4), Some(&"four"));
361
362        // Add new items - should evict 1 and 3
363        cache.put(5, "five");
364        cache.put(6, "six");
365
366        assert_eq!(cache.get(&1), None);
367        assert_eq!(cache.get(&2), Some(&"two"));
368        assert_eq!(cache.get(&3), None);
369        assert_eq!(cache.get(&4), Some(&"four"));
370        assert_eq!(cache.get(&5), Some(&"five"));
371        assert_eq!(cache.get(&6), Some(&"six"));
372    }
373
374    #[test]
375    fn test_lru_cache_update_preserves_capacity() {
376        let mut cache = LruCache::new(2);
377
378        cache.put(1, "one");
379        cache.put(2, "two");
380
381        // Update key 1 - this should move it to the front
382        cache.put(1, "one_updated");
383
384        assert_eq!(cache.len(), 2);
385
386        // Add new key - since we updated key 1 most recently, key 2 should be evicted
387        cache.put(3, "three");
388
389        // Check that key 1 (most recently updated) is still there
390        assert_eq!(cache.get(&1), Some(&"one_updated"));
391        // Check that key 2 (least recently used) was evicted
392        assert_eq!(cache.get(&2), None);
393        // Check that key 3 (newly added) is there
394        assert_eq!(cache.get(&3), Some(&"three"));
395    }
396
397    #[test]
398    fn test_lru_cache_with_string_keys() {
399        let mut cache = LruCache::new(3);
400
401        cache.put("key1".to_string(), 1);
402        cache.put("key2".to_string(), 2);
403        cache.put("key3".to_string(), 3);
404
405        assert_eq!(cache.get(&"key1".to_string()), Some(&1));
406        assert_eq!(cache.get(&"key2".to_string()), Some(&2));
407        assert_eq!(cache.get(&"key3".to_string()), Some(&3));
408
409        cache.put("key4".to_string(), 4);
410
411        assert_eq!(cache.get(&"key1".to_string()), None);
412        assert_eq!(cache.get(&"key4".to_string()), Some(&4));
413    }
414
415    #[test]
416    fn test_object_cache_with_different_object_types() {
417        let cache = ObjectCache::new(10);
418
419        let int_obj = Arc::new(PdfObject::Integer(42));
420        let bool_obj = Arc::new(PdfObject::Boolean(false));
421        let null_obj = Arc::new(PdfObject::Null);
422        let real_obj = Arc::new(PdfObject::Real(3.14));
423
424        let id1 = ObjectId::new(1, 0);
425        let id2 = ObjectId::new(2, 0);
426        let id3 = ObjectId::new(3, 0);
427        let id4 = ObjectId::new(4, 0);
428
429        cache.put(id1, int_obj.clone());
430        cache.put(id2, bool_obj.clone());
431        cache.put(id3, null_obj.clone());
432        cache.put(id4, real_obj.clone());
433
434        assert_eq!(cache.get(&id1), Some(int_obj));
435        assert_eq!(cache.get(&id2), Some(bool_obj));
436        assert_eq!(cache.get(&id3), Some(null_obj));
437        assert_eq!(cache.get(&id4), Some(real_obj));
438
439        let stats = cache.stats();
440        assert_eq!(stats.size, 4);
441    }
442
443    #[test]
444    fn test_object_cache_eviction() {
445        let cache = ObjectCache::new(2);
446
447        let obj1 = Arc::new(PdfObject::Integer(1));
448        let obj2 = Arc::new(PdfObject::Integer(2));
449        let obj3 = Arc::new(PdfObject::Integer(3));
450
451        let id1 = ObjectId::new(1, 0);
452        let id2 = ObjectId::new(2, 0);
453        let id3 = ObjectId::new(3, 0);
454
455        cache.put(id1, obj1);
456        cache.put(id2, obj2.clone());
457
458        let stats = cache.stats();
459        assert_eq!(stats.size, 2);
460
461        // This should evict the first object
462        cache.put(id3, obj3.clone());
463
464        let stats = cache.stats();
465        assert_eq!(stats.size, 2);
466        assert_eq!(cache.get(&id1), None);
467        assert_eq!(cache.get(&id2), Some(obj2));
468        assert_eq!(cache.get(&id3), Some(obj3));
469    }
470
471    #[test]
472    fn test_object_cache_get_nonexistent() {
473        let cache = ObjectCache::new(5);
474
475        let obj = Arc::new(PdfObject::Integer(42));
476        let id1 = ObjectId::new(1, 0);
477        let id2 = ObjectId::new(2, 0);
478
479        cache.put(id1, obj.clone());
480
481        assert_eq!(cache.get(&id1), Some(obj));
482        assert_eq!(cache.get(&id2), None);
483
484        let stats = cache.stats();
485        assert_eq!(stats.size, 1);
486    }
487
488    #[test]
489    fn test_object_cache_update_existing() {
490        let cache = ObjectCache::new(3);
491
492        let obj1 = Arc::new(PdfObject::Integer(42));
493        let obj2 = Arc::new(PdfObject::Integer(100));
494        let id = ObjectId::new(1, 0);
495
496        cache.put(id, obj1);
497        cache.put(id, obj2.clone());
498
499        assert_eq!(cache.get(&id), Some(obj2));
500
501        let stats = cache.stats();
502        assert_eq!(stats.size, 1);
503    }
504
505    #[test]
506    fn test_object_cache_with_generation_numbers() {
507        let cache = ObjectCache::new(5);
508
509        let obj1 = Arc::new(PdfObject::Integer(1));
510        let obj2 = Arc::new(PdfObject::Integer(2));
511
512        let id1_gen0 = ObjectId::new(1, 0);
513        let id1_gen1 = ObjectId::new(1, 1);
514
515        cache.put(id1_gen0, obj1.clone());
516        cache.put(id1_gen1, obj2.clone());
517
518        // Different generations should be treated as different keys
519        assert_eq!(cache.get(&id1_gen0), Some(obj1));
520        assert_eq!(cache.get(&id1_gen1), Some(obj2));
521
522        let stats = cache.stats();
523        assert_eq!(stats.size, 2);
524    }
525
526    #[test]
527    fn test_cache_stats_debug_clone_default() {
528        let stats = CacheStats {
529            size: 5,
530            capacity: 10,
531        };
532
533        let debug_str = format!("{stats:?}");
534        assert!(debug_str.contains("CacheStats"));
535        assert!(debug_str.contains("5"));
536        assert!(debug_str.contains("10"));
537
538        let cloned = stats;
539        assert_eq!(cloned.size, 5);
540        assert_eq!(cloned.capacity, 10);
541
542        let default_stats = CacheStats::default();
543        assert_eq!(default_stats.size, 0);
544        assert_eq!(default_stats.capacity, 0);
545    }
546
547    #[test]
548    fn test_object_cache_stats_after_operations() {
549        let cache = ObjectCache::new(3);
550
551        // Initially empty
552        let stats = cache.stats();
553        assert_eq!(stats.size, 0);
554        assert_eq!(stats.capacity, 3);
555
556        // Add one item
557        let obj1 = Arc::new(PdfObject::Integer(1));
558        let id1 = ObjectId::new(1, 0);
559        cache.put(id1, obj1);
560
561        let stats = cache.stats();
562        assert_eq!(stats.size, 1);
563        assert_eq!(stats.capacity, 3);
564
565        // Fill to capacity
566        let obj2 = Arc::new(PdfObject::Integer(2));
567        let obj3 = Arc::new(PdfObject::Integer(3));
568        let id2 = ObjectId::new(2, 0);
569        let id3 = ObjectId::new(3, 0);
570
571        cache.put(id2, obj2);
572        cache.put(id3, obj3);
573
574        let stats = cache.stats();
575        assert_eq!(stats.size, 3);
576        assert_eq!(stats.capacity, 3);
577
578        // Clear cache
579        cache.clear();
580
581        let stats = cache.stats();
582        assert_eq!(stats.size, 0);
583        assert_eq!(stats.capacity, 3);
584    }
585
586    #[test]
587    fn test_lru_cache_stress_test() {
588        let mut cache = LruCache::new(100);
589
590        // Fill cache beyond capacity
591        for i in 0..200 {
592            cache.put(i, format!("value_{i}"));
593        }
594
595        // Should only contain last 100 items
596        assert_eq!(cache.len(), 100);
597
598        for i in 0..100 {
599            assert_eq!(cache.get(&i), None);
600        }
601
602        for i in 100..200 {
603            assert_eq!(cache.get(&i), Some(&format!("value_{i}")));
604        }
605    }
606
607    #[test]
608    fn test_object_cache_concurrent_access_simulation() {
609        use std::sync::Arc as StdArc;
610        use std::thread;
611
612        let cache = StdArc::new(ObjectCache::new(10));
613        let mut handles = vec![];
614
615        // Simulate concurrent access (though not truly concurrent in test)
616        for i in 0..5 {
617            let cache_clone = cache.clone();
618            let handle = thread::spawn(move || {
619                let obj = Arc::new(PdfObject::Integer(i));
620                let id = ObjectId::new(i as u32, 0);
621
622                cache_clone.put(id, obj.clone());
623                assert_eq!(cache_clone.get(&id), Some(obj));
624            });
625            handles.push(handle);
626        }
627
628        for handle in handles {
629            handle.join().unwrap();
630        }
631
632        let stats = cache.stats();
633        assert_eq!(stats.size, 5);
634    }
635}