Skip to main content

oxirs_arq/
query_cache_lru.rs

1//! LRU Query Cache
2//!
3//! This module provides a focused, generic LRU cache for compiled SPARQL
4//! query artifacts.  It differs from `query_plan_cache` in that it:
5//!
6//! - Exposes a simple `get` / `insert` / `evict_oldest` / `hit_rate` API
7//! - Uses a doubly-tracked HashMap + VecDeque for O(1) amortised LRU operations
8//! - Is generic over the cached value type `T`
9//!
10//! # Key Types
11//!
12//! - [`CacheFingerprint`] — structural hash of an algebra expression
13//! - [`CacheEntry<T>`] — a cached value with access tracking
14//! - [`LruQueryCache<T>`] — generic LRU cache with hit/miss statistics
15//! - [`QueryCacheManager`] — combines fingerprinting + caching for string plans
16//! - [`CacheManagerStats`] — snapshot of cache health metrics
17
18use crate::algebra::Algebra;
19use std::collections::{hash_map::DefaultHasher, HashMap, VecDeque};
20use std::hash::{Hash, Hasher};
21use std::time::Instant;
22
23// ---------------------------------------------------------------------------
24// CacheFingerprint
25// ---------------------------------------------------------------------------
26
27/// A structural hash of a SPARQL algebra expression used as a cache key.
28///
29/// Two algebra expressions that are structurally identical (same operators,
30/// same variables and constants in the same positions) produce the same
31/// fingerprint.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
33pub struct CacheFingerprint(pub u64);
34
35impl CacheFingerprint {
36    /// Compute a fingerprint from a SPARQL algebra tree.
37    ///
38    /// Uses `DefaultHasher` on the `Debug` representation of the algebra.
39    /// This is deterministic within a single process run and is sufficient
40    /// for an in-process LRU cache.
41    pub fn from_algebra(algebra: &Algebra) -> Self {
42        let mut hasher = DefaultHasher::new();
43        format!("{algebra:?}").hash(&mut hasher);
44        Self(hasher.finish())
45    }
46
47    /// Create a fingerprint from a raw 64-bit value (for testing).
48    pub fn from_raw(raw: u64) -> Self {
49        Self(raw)
50    }
51
52    /// Return the underlying hash value.
53    pub fn raw(&self) -> u64 {
54        self.0
55    }
56}
57
58impl std::fmt::Display for CacheFingerprint {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        write!(f, "Fingerprint({:016x})", self.0)
61    }
62}
63
64// ---------------------------------------------------------------------------
65// CacheEntry
66// ---------------------------------------------------------------------------
67
68/// A single entry in the LRU cache.
69#[derive(Debug, Clone)]
70pub struct CacheEntry<T> {
71    /// The cached value.
72    pub value: T,
73    /// Number of times this entry has been accessed.
74    pub hit_count: u64,
75    /// When this entry was first inserted.
76    pub created_at: Instant,
77    /// When this entry was last accessed.
78    pub last_accessed: Instant,
79}
80
81impl<T> CacheEntry<T> {
82    /// Create a new entry wrapping `value`.
83    pub fn new(value: T) -> Self {
84        let now = Instant::now();
85        Self {
86            value,
87            hit_count: 0,
88            created_at: now,
89            last_accessed: now,
90        }
91    }
92
93    /// Record an access and return the updated hit count.
94    pub fn touch(&mut self) -> u64 {
95        self.hit_count += 1;
96        self.last_accessed = Instant::now();
97        self.hit_count
98    }
99
100    /// Age of this entry in seconds since creation.
101    pub fn age_secs(&self) -> u64 {
102        self.created_at.elapsed().as_secs()
103    }
104
105    /// Time since last access in seconds.
106    pub fn idle_secs(&self) -> u64 {
107        self.last_accessed.elapsed().as_secs()
108    }
109}
110
111// ---------------------------------------------------------------------------
112// LruQueryCache
113// ---------------------------------------------------------------------------
114
115/// Generic LRU cache keyed by [`CacheFingerprint`].
116///
117/// Eviction policy: on `evict_oldest`, the entry with the oldest
118/// `last_accessed` timestamp is removed.  The `insertion_order` deque
119/// maintains insertion ordering for O(1) oldest-insertion eviction fallback.
120///
121/// Hit/miss statistics are tracked across the lifetime of the cache.
122pub struct LruQueryCache<T> {
123    entries: HashMap<CacheFingerprint, CacheEntry<T>>,
124    /// Tracks insertion order for O(1) amortised LRU candidate search.
125    insertion_order: VecDeque<CacheFingerprint>,
126    /// Maximum number of entries.
127    max_size: usize,
128    total_hits: u64,
129    total_misses: u64,
130}
131
132impl<T: Clone> LruQueryCache<T> {
133    /// Create a new cache with the given maximum number of entries.
134    pub fn new(max_size: usize) -> Self {
135        Self {
136            entries: HashMap::new(),
137            insertion_order: VecDeque::new(),
138            max_size: max_size.max(1),
139            total_hits: 0,
140            total_misses: 0,
141        }
142    }
143
144    // ------------------------------------------------------------------
145    // Core operations
146    // ------------------------------------------------------------------
147
148    /// Look up a value by fingerprint.
149    ///
150    /// Records a hit or miss and updates the entry's access time on hit.
151    pub fn get(&mut self, key: &CacheFingerprint) -> Option<&T> {
152        if let Some(entry) = self.entries.get_mut(key) {
153            entry.touch();
154            self.total_hits += 1;
155            Some(&self.entries[key].value)
156        } else {
157            self.total_misses += 1;
158            None
159        }
160    }
161
162    /// Insert a value into the cache.
163    ///
164    /// If the cache is full and the key is not already present, `evict_oldest`
165    /// is called before insertion.
166    pub fn insert(&mut self, key: CacheFingerprint, value: T) {
167        if self.entries.len() >= self.max_size && !self.entries.contains_key(&key) {
168            self.evict_oldest();
169        }
170        if !self.insertion_order.contains(&key) {
171            self.insertion_order.push_back(key);
172        }
173        self.entries.insert(key, CacheEntry::new(value));
174    }
175
176    /// Evict the least-recently-used entry (the one with the oldest
177    /// `last_accessed` timestamp).
178    ///
179    /// Falls back to evicting the oldest-inserted entry if all timestamps are
180    /// equal (e.g., in tests).
181    pub fn evict_oldest(&mut self) {
182        if self.entries.is_empty() {
183            return;
184        }
185        // Find the entry with the oldest last_accessed.
186        let oldest_key = self
187            .entries
188            .iter()
189            .min_by_key(|(_, entry)| entry.last_accessed)
190            .map(|(k, _)| *k);
191
192        if let Some(key) = oldest_key {
193            self.entries.remove(&key);
194            self.insertion_order.retain(|k| *k != key);
195        }
196    }
197
198    // ------------------------------------------------------------------
199    // Statistics
200    // ------------------------------------------------------------------
201
202    /// Fraction of lookups that were cache hits: `hits / (hits + misses)`.
203    ///
204    /// Returns `0.0` if no lookups have occurred.
205    pub fn hit_rate(&self) -> f64 {
206        let total = self.total_hits + self.total_misses;
207        if total == 0 {
208            0.0
209        } else {
210            self.total_hits as f64 / total as f64
211        }
212    }
213
214    /// Total number of cache hits.
215    pub fn total_hits(&self) -> u64 {
216        self.total_hits
217    }
218
219    /// Total number of cache misses.
220    pub fn total_misses(&self) -> u64 {
221        self.total_misses
222    }
223
224    /// Number of entries currently in the cache.
225    pub fn len(&self) -> usize {
226        self.entries.len()
227    }
228
229    /// Whether the cache is empty.
230    pub fn is_empty(&self) -> bool {
231        self.entries.is_empty()
232    }
233
234    /// Remove all entries from the cache (does not reset hit/miss counters).
235    pub fn clear(&mut self) {
236        self.entries.clear();
237        self.insertion_order.clear();
238    }
239
240    /// Maximum number of entries the cache can hold.
241    pub fn max_size(&self) -> usize {
242        self.max_size
243    }
244}
245
246// ---------------------------------------------------------------------------
247// CompiledQueryCache type alias
248// ---------------------------------------------------------------------------
249
250/// Pre-compiled query plan cache.  Uses `Vec<u8>` as a stand-in for compiled
251/// plan bytecode or serialised plan structures.
252pub type CompiledQueryCache = LruQueryCache<Vec<u8>>;
253
254// ---------------------------------------------------------------------------
255// QueryCacheManager
256// ---------------------------------------------------------------------------
257
258/// Combines [`CacheFingerprint`] computation with [`LruQueryCache`] for easy
259/// use: supply an `&Algebra` and get (or compute) a cached string plan.
260pub struct QueryCacheManager {
261    cache: LruQueryCache<String>,
262}
263
264impl QueryCacheManager {
265    /// Create a new manager with the given cache capacity.
266    pub fn new(max_size: usize) -> Self {
267        Self {
268            cache: LruQueryCache::new(max_size),
269        }
270    }
271
272    /// Return the cached plan for `algebra`, or compute it with `compute` and
273    /// insert it.
274    ///
275    /// Returns a clone of the (possibly just-inserted) plan string.
276    pub fn get_or_insert(&mut self, algebra: &Algebra, compute: impl FnOnce() -> String) -> String {
277        let key = CacheFingerprint::from_algebra(algebra);
278        if let Some(existing) = self.cache.get(&key) {
279            existing.clone()
280        } else {
281            let plan = compute();
282            self.cache.insert(key, plan.clone());
283            plan
284        }
285    }
286
287    /// Return a snapshot of cache statistics.
288    pub fn stats(&self) -> CacheManagerStats {
289        CacheManagerStats {
290            hit_rate: self.cache.hit_rate(),
291            entries: self.cache.len(),
292            total_hits: self.cache.total_hits(),
293            total_misses: self.cache.total_misses(),
294        }
295    }
296
297    /// Evict the oldest entry from the cache.
298    pub fn evict_oldest(&mut self) {
299        self.cache.evict_oldest();
300    }
301
302    /// Clear the cache.
303    pub fn clear(&mut self) {
304        self.cache.clear();
305    }
306
307    /// Number of cached plans.
308    pub fn len(&self) -> usize {
309        self.cache.len()
310    }
311
312    /// Whether the cache is empty.
313    pub fn is_empty(&self) -> bool {
314        self.cache.is_empty()
315    }
316}
317
318// ---------------------------------------------------------------------------
319// CacheManagerStats
320// ---------------------------------------------------------------------------
321
322/// Snapshot of cache health metrics.
323#[derive(Debug, Clone, PartialEq)]
324pub struct CacheManagerStats {
325    /// Fraction of lookups that were hits.
326    pub hit_rate: f64,
327    /// Number of entries currently in the cache.
328    pub entries: usize,
329    /// Cumulative cache hits.
330    pub total_hits: u64,
331    /// Cumulative cache misses.
332    pub total_misses: u64,
333}
334
335impl CacheManagerStats {
336    /// Whether the cache has a high hit rate (≥ 80%).
337    pub fn is_healthy(&self) -> bool {
338        self.total_hits + self.total_misses == 0 || self.hit_rate >= 0.8
339    }
340}
341
342// ---------------------------------------------------------------------------
343// Tests
344// ---------------------------------------------------------------------------
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349    use crate::algebra::{Algebra, Term, TriplePattern};
350    use oxirs_core::model::NamedNode;
351
352    fn var_term(name: &str) -> Term {
353        use oxirs_core::model::Variable;
354        Term::Variable(Variable::new(name).expect("valid variable"))
355    }
356
357    fn make_bgp(label: &str) -> Algebra {
358        use oxirs_core::model::Variable;
359        Algebra::Bgp(vec![TriplePattern {
360            subject: Term::Variable(Variable::new(label).expect("valid variable")),
361            predicate: Term::Iri(NamedNode::new("http://ex.org/p").expect("valid IRI")),
362            object: var_term("o"),
363        }])
364    }
365
366    // ------------------------------------------------------------------
367    // CacheFingerprint tests
368    // ------------------------------------------------------------------
369
370    #[test]
371    fn test_fingerprint_same_algebra_same_hash() {
372        let a = make_bgp("s");
373        let b = make_bgp("s");
374        assert_eq!(
375            CacheFingerprint::from_algebra(&a),
376            CacheFingerprint::from_algebra(&b)
377        );
378    }
379
380    #[test]
381    fn test_fingerprint_different_algebra_different_hash() {
382        let a = make_bgp("s");
383        let b = make_bgp("t");
384        assert_ne!(
385            CacheFingerprint::from_algebra(&a),
386            CacheFingerprint::from_algebra(&b)
387        );
388    }
389
390    #[test]
391    fn test_fingerprint_from_raw() {
392        let fp = CacheFingerprint::from_raw(42);
393        assert_eq!(fp.raw(), 42);
394    }
395
396    #[test]
397    fn test_fingerprint_display() {
398        let fp = CacheFingerprint::from_raw(255);
399        let s = format!("{fp}");
400        assert!(s.starts_with("Fingerprint("));
401    }
402
403    // ------------------------------------------------------------------
404    // CacheEntry tests
405    // ------------------------------------------------------------------
406
407    #[test]
408    fn test_cache_entry_new() {
409        let entry: CacheEntry<i32> = CacheEntry::new(42);
410        assert_eq!(entry.value, 42);
411        assert_eq!(entry.hit_count, 0);
412    }
413
414    #[test]
415    fn test_cache_entry_touch() {
416        let mut entry: CacheEntry<&str> = CacheEntry::new("hello");
417        assert_eq!(entry.touch(), 1);
418        assert_eq!(entry.touch(), 2);
419        assert_eq!(entry.hit_count, 2);
420    }
421
422    #[test]
423    fn test_cache_entry_age() {
424        let entry: CacheEntry<()> = CacheEntry::new(());
425        assert!(entry.age_secs() < 5);
426    }
427
428    // ------------------------------------------------------------------
429    // LruQueryCache tests
430    // ------------------------------------------------------------------
431
432    #[test]
433    fn test_cache_new_empty() {
434        let cache: LruQueryCache<String> = LruQueryCache::new(10);
435        assert!(cache.is_empty());
436        assert_eq!(cache.len(), 0);
437    }
438
439    #[test]
440    fn test_cache_insert_and_get() {
441        let mut cache: LruQueryCache<String> = LruQueryCache::new(10);
442        let key = CacheFingerprint::from_raw(1);
443        cache.insert(key, "plan_a".to_string());
444        let result = cache.get(&key);
445        assert_eq!(result, Some(&"plan_a".to_string()));
446    }
447
448    #[test]
449    fn test_cache_get_missing_returns_none() {
450        let mut cache: LruQueryCache<String> = LruQueryCache::new(10);
451        let key = CacheFingerprint::from_raw(999);
452        assert!(cache.get(&key).is_none());
453    }
454
455    #[test]
456    fn test_cache_hit_miss_counts() {
457        let mut cache: LruQueryCache<String> = LruQueryCache::new(10);
458        let key = CacheFingerprint::from_raw(1);
459        cache.insert(key, "plan".to_string());
460        cache.get(&key);
461        cache.get(&key);
462        let miss_key = CacheFingerprint::from_raw(2);
463        cache.get(&miss_key);
464        assert_eq!(cache.total_hits(), 2);
465        assert_eq!(cache.total_misses(), 1);
466    }
467
468    #[test]
469    fn test_cache_hit_rate_all_hits() {
470        let mut cache: LruQueryCache<i32> = LruQueryCache::new(10);
471        let key = CacheFingerprint::from_raw(1);
472        cache.insert(key, 42);
473        cache.get(&key);
474        cache.get(&key);
475        assert!((cache.hit_rate() - 1.0).abs() < 1e-9);
476    }
477
478    #[test]
479    fn test_cache_hit_rate_all_misses() {
480        let mut cache: LruQueryCache<i32> = LruQueryCache::new(10);
481        let key = CacheFingerprint::from_raw(1);
482        cache.get(&key);
483        assert!((cache.hit_rate() - 0.0).abs() < 1e-9);
484    }
485
486    #[test]
487    fn test_cache_hit_rate_no_lookups() {
488        let cache: LruQueryCache<i32> = LruQueryCache::new(10);
489        assert_eq!(cache.hit_rate(), 0.0);
490    }
491
492    #[test]
493    fn test_cache_evict_oldest() {
494        let mut cache: LruQueryCache<String> = LruQueryCache::new(10);
495        let key1 = CacheFingerprint::from_raw(1);
496        let key2 = CacheFingerprint::from_raw(2);
497        cache.insert(key1, "plan1".to_string());
498        cache.insert(key2, "plan2".to_string());
499        // Access key2 so key1 is the least recently used.
500        cache.get(&key2);
501        cache.evict_oldest();
502        assert_eq!(cache.len(), 1);
503        // key1 should be evicted (older last_accessed).
504        assert!(cache.entries.contains_key(&key2));
505    }
506
507    #[test]
508    fn test_cache_evict_oldest_empty() {
509        let mut cache: LruQueryCache<String> = LruQueryCache::new(10);
510        // Should not panic.
511        cache.evict_oldest();
512        assert!(cache.is_empty());
513    }
514
515    #[test]
516    fn test_cache_max_size_triggers_eviction() {
517        let mut cache: LruQueryCache<String> = LruQueryCache::new(3);
518        for i in 0..5 {
519            cache.insert(CacheFingerprint::from_raw(i as u64), format!("plan_{i}"));
520        }
521        assert!(cache.len() <= 3, "cache should not exceed max_size");
522    }
523
524    #[test]
525    fn test_cache_clear() {
526        let mut cache: LruQueryCache<String> = LruQueryCache::new(10);
527        cache.insert(CacheFingerprint::from_raw(1), "plan".to_string());
528        cache.clear();
529        assert!(cache.is_empty());
530    }
531
532    #[test]
533    fn test_cache_max_size() {
534        let cache: LruQueryCache<String> = LruQueryCache::new(50);
535        assert_eq!(cache.max_size(), 50);
536    }
537
538    // ------------------------------------------------------------------
539    // QueryCacheManager tests
540    // ------------------------------------------------------------------
541
542    #[test]
543    fn test_manager_get_or_insert_miss_then_hit() {
544        let mut manager = QueryCacheManager::new(10);
545        let algebra = make_bgp("s");
546
547        let mut compute_count = 0usize;
548        let plan = manager.get_or_insert(&algebra, || {
549            compute_count += 1;
550            "SELECT * WHERE { ?s ?p ?o }".to_string()
551        });
552        assert_eq!(plan, "SELECT * WHERE { ?s ?p ?o }");
553
554        // Second call should hit the cache (compute_count stays 1).
555        let plan2 = manager.get_or_insert(&algebra, || {
556            compute_count += 1;
557            "SHOULD NOT BE CALLED".to_string()
558        });
559        assert_eq!(plan2, "SELECT * WHERE { ?s ?p ?o }");
560        assert_eq!(compute_count, 1, "compute should only be called once");
561    }
562
563    #[test]
564    fn test_manager_different_algebras_separate_entries() {
565        let mut manager = QueryCacheManager::new(10);
566        let a1 = make_bgp("s");
567        let a2 = make_bgp("t");
568        let plan1 = manager.get_or_insert(&a1, || "plan_s".to_string());
569        let plan2 = manager.get_or_insert(&a2, || "plan_t".to_string());
570        assert_ne!(plan1, plan2);
571        assert_eq!(manager.len(), 2);
572    }
573
574    #[test]
575    fn test_manager_stats_hit_rate() {
576        let mut manager = QueryCacheManager::new(10);
577        let algebra = make_bgp("s");
578        manager.get_or_insert(&algebra, || "plan".to_string()); // miss
579        manager.get_or_insert(&algebra, || unreachable!()); // hit
580        let stats = manager.stats();
581        assert!(stats.total_hits >= 1);
582        assert!(stats.total_misses >= 1);
583        assert!(stats.hit_rate > 0.0);
584    }
585
586    #[test]
587    fn test_manager_evict_oldest() {
588        let mut manager = QueryCacheManager::new(2);
589        let a1 = make_bgp("s");
590        let a2 = make_bgp("t");
591        manager.get_or_insert(&a1, || "plan1".to_string());
592        manager.get_or_insert(&a2, || "plan2".to_string());
593        manager.evict_oldest();
594        assert_eq!(manager.len(), 1);
595    }
596
597    #[test]
598    fn test_manager_clear() {
599        let mut manager = QueryCacheManager::new(10);
600        let algebra = make_bgp("s");
601        manager.get_or_insert(&algebra, || "plan".to_string());
602        manager.clear();
603        assert!(manager.is_empty());
604    }
605
606    // ------------------------------------------------------------------
607    // CacheManagerStats tests
608    // ------------------------------------------------------------------
609
610    #[test]
611    fn test_cache_manager_stats_healthy() {
612        let stats = CacheManagerStats {
613            hit_rate: 0.9,
614            entries: 5,
615            total_hits: 9,
616            total_misses: 1,
617        };
618        assert!(stats.is_healthy());
619    }
620
621    #[test]
622    fn test_cache_manager_stats_unhealthy() {
623        let stats = CacheManagerStats {
624            hit_rate: 0.3,
625            entries: 5,
626            total_hits: 3,
627            total_misses: 7,
628        };
629        assert!(!stats.is_healthy());
630    }
631
632    #[test]
633    fn test_cache_manager_stats_empty_is_healthy() {
634        let stats = CacheManagerStats {
635            hit_rate: 0.0,
636            entries: 0,
637            total_hits: 0,
638            total_misses: 0,
639        };
640        assert!(
641            stats.is_healthy(),
642            "empty cache should be considered healthy"
643        );
644    }
645}