vibesql_storage/
columnar_cache.rs

1//! Columnar Cache - LRU Cache for Columnar Table Representations
2//!
3//! This module provides a database-level LRU cache for columnar table representations,
4//! enabling automatic workload adaptation for analytical queries without manual configuration.
5//!
6//! ## Design Goals
7//!
8//! 1. **Arc-based sharing** - Avoid clone overhead by sharing cached data
9//! 2. **Memory-bounded** - Configurable memory budget with LRU eviction
10//! 3. **Database-level** - Global view enables smart eviction decisions
11//! 4. **Statistics** - Monitor cache effectiveness for tuning
12//!
13//! ## Usage
14//!
15//! The cache is integrated at the `Database` level and used transparently by the executor
16//! when performing analytical queries. Tables are automatically cached on first access
17//! and evicted when the memory budget is exceeded.
18//!
19//! ## Thread Safety
20//!
21//! Uses `parking_lot::RwLock` on native platforms and `std::sync::RwLock` on WASM
22//! for thread-safe access to the cache.
23
24use std::sync::Arc;
25#[cfg(target_arch = "wasm32")]
26use std::sync::RwLock;
27
28// Platform-specific synchronization primitives
29#[cfg(not(target_arch = "wasm32"))]
30use parking_lot::RwLock;
31
32use crate::ColumnarTable;
33
34/// Normalize table name for cache key (case-insensitive matching)
35/// Uses uppercase to match SQLite/SQL standard behavior
36#[inline]
37fn normalize_cache_key(table_name: &str) -> String {
38    table_name.to_uppercase()
39}
40
41/// Statistics for monitoring cache effectiveness
42#[derive(Debug, Clone, Default)]
43pub struct CacheStats {
44    /// Number of cache hits
45    pub hits: u64,
46    /// Number of cache misses
47    pub misses: u64,
48    /// Number of cache evictions
49    pub evictions: u64,
50    /// Number of conversions performed (subset of misses that resulted in caching)
51    pub conversions: u64,
52    /// Number of invalidations (due to table modifications)
53    pub invalidations: u64,
54}
55
56impl CacheStats {
57    /// Get the cache hit rate as a percentage (0.0 to 100.0)
58    pub fn hit_rate(&self) -> f64 {
59        let total = self.hits + self.misses;
60        if total == 0 {
61            0.0
62        } else {
63            (self.hits as f64 / total as f64) * 100.0
64        }
65    }
66}
67
68/// Entry in the columnar cache
69struct CacheEntry {
70    /// The cached columnar table data
71    data: Arc<ColumnarTable>,
72    /// Size in bytes (cached to avoid recomputation)
73    size_bytes: usize,
74}
75
76/// LRU cache for columnar table representations
77///
78/// Provides memory-bounded caching of columnar table data with automatic
79/// eviction when the memory budget is exceeded. Tables are stored as `Arc`
80/// to enable zero-copy sharing between queries.
81///
82/// # Memory Management
83///
84/// The cache tracks memory usage based on `ColumnarTable::size_in_bytes()`.
85/// When inserting a new entry would exceed the budget, the least recently
86/// used entries are evicted until there's sufficient space.
87///
88/// # Example
89///
90/// ```text
91/// use vibesql_storage::columnar_cache::ColumnarCache;
92///
93/// // Create a cache with 256MB budget
94/// let cache = ColumnarCache::new(256 * 1024 * 1024);
95///
96/// // Get or create columnar representation
97/// if let Some(columnar) = cache.get("lineitem") {
98///     // Use cached data
99/// } else {
100///     // Convert and cache
101///     let columnar = table.scan_columnar()?;
102///     cache.insert("lineitem", columnar);
103/// }
104/// ```
105pub struct ColumnarCache {
106    /// LRU cache: table_name -> CacheEntry
107    /// The lru crate handles ordering, we just need to track size
108    cache: RwLock<lru::LruCache<String, CacheEntry>>,
109    /// Memory budget in bytes
110    max_memory: usize,
111    /// Current memory usage in bytes
112    current_memory: RwLock<usize>,
113    /// Cache statistics
114    stats: RwLock<CacheStats>,
115}
116
117impl ColumnarCache {
118    /// Create a new columnar cache with the specified memory budget
119    ///
120    /// # Arguments
121    /// * `max_memory` - Maximum memory budget in bytes
122    ///
123    /// # Example
124    /// ```text
125    /// // 256MB cache
126    /// let cache = ColumnarCache::new(256 * 1024 * 1024);
127    /// ```
128    pub fn new(max_memory: usize) -> Self {
129        // Use a reasonable capacity (1000 tables) - we manage eviction via memory budget
130        // The LRU library can't handle usize::MAX due to internal hash table allocation
131        let capacity = std::num::NonZeroUsize::new(1000).unwrap();
132        ColumnarCache {
133            cache: RwLock::new(lru::LruCache::new(capacity)),
134            max_memory,
135            current_memory: RwLock::new(0),
136            stats: RwLock::new(CacheStats::default()),
137        }
138    }
139
140    /// Get a cached columnar table representation
141    ///
142    /// Returns `Some(Arc<ColumnarTable>)` if the table is cached, `None` otherwise.
143    /// Accessing a cached entry marks it as recently used.
144    /// Table names are normalized for case-insensitive matching.
145    pub fn get(&self, table_name: &str) -> Option<Arc<ColumnarTable>> {
146        let key = normalize_cache_key(table_name);
147        #[cfg(not(target_arch = "wasm32"))]
148        {
149            let mut cache = self.cache.write();
150            let mut stats = self.stats.write();
151
152            if let Some(entry) = cache.get(&key) {
153                stats.hits += 1;
154                Some(Arc::clone(&entry.data))
155            } else {
156                stats.misses += 1;
157                None
158            }
159        }
160
161        #[cfg(target_arch = "wasm32")]
162        {
163            let mut cache = self.cache.write().unwrap();
164            let mut stats = self.stats.write().unwrap();
165
166            if let Some(entry) = cache.get(&key) {
167                stats.hits += 1;
168                Some(Arc::clone(&entry.data))
169            } else {
170                stats.misses += 1;
171                None
172            }
173        }
174    }
175
176    /// Insert or update a columnar table in the cache
177    ///
178    /// If the table is already cached, the existing entry is updated.
179    /// If inserting would exceed the memory budget, least recently used
180    /// entries are evicted until there's sufficient space.
181    /// Table names are normalized for case-insensitive matching.
182    ///
183    /// # Arguments
184    /// * `table_name` - Name of the table
185    /// * `columnar` - The columnar table data to cache
186    ///
187    /// # Returns
188    /// The Arc-wrapped columnar table (for immediate use)
189    pub fn insert(&self, table_name: &str, columnar: ColumnarTable) -> Arc<ColumnarTable> {
190        let key = normalize_cache_key(table_name);
191        let size_bytes = columnar.size_in_bytes();
192        let data = Arc::new(columnar);
193
194        #[cfg(not(target_arch = "wasm32"))]
195        {
196            let mut cache = self.cache.write();
197            let mut current_memory = self.current_memory.write();
198            let mut stats = self.stats.write();
199
200            // Remove existing entry if present
201            if let Some(old_entry) = cache.pop(&key) {
202                *current_memory = current_memory.saturating_sub(old_entry.size_bytes);
203            }
204
205            // Evict until we have space (or cache is empty)
206            while *current_memory + size_bytes > self.max_memory {
207                if let Some((_, evicted)) = cache.pop_lru() {
208                    *current_memory = current_memory.saturating_sub(evicted.size_bytes);
209                    stats.evictions += 1;
210                } else {
211                    // Cache is empty, can't evict more
212                    break;
213                }
214            }
215
216            // Insert new entry
217            let entry = CacheEntry { data: Arc::clone(&data), size_bytes };
218            cache.put(key, entry);
219            *current_memory += size_bytes;
220            stats.conversions += 1;
221        }
222
223        #[cfg(target_arch = "wasm32")]
224        {
225            let mut cache = self.cache.write().unwrap();
226            let mut current_memory = self.current_memory.write().unwrap();
227            let mut stats = self.stats.write().unwrap();
228
229            // Remove existing entry if present
230            if let Some(old_entry) = cache.pop(&key) {
231                *current_memory = current_memory.saturating_sub(old_entry.size_bytes);
232            }
233
234            // Evict until we have space (or cache is empty)
235            while *current_memory + size_bytes > self.max_memory {
236                if let Some((_, evicted)) = cache.pop_lru() {
237                    *current_memory = current_memory.saturating_sub(evicted.size_bytes);
238                    stats.evictions += 1;
239                } else {
240                    // Cache is empty, can't evict more
241                    break;
242                }
243            }
244
245            // Insert new entry
246            let entry = CacheEntry { data: Arc::clone(&data), size_bytes };
247            cache.put(key, entry);
248            *current_memory += size_bytes;
249            stats.conversions += 1;
250        }
251
252        data
253    }
254
255    /// Invalidate a cached table entry
256    ///
257    /// Called when a table is modified (INSERT/UPDATE/DELETE) to ensure
258    /// the cache doesn't serve stale data.
259    /// Table names are normalized for case-insensitive matching.
260    pub fn invalidate(&self, table_name: &str) {
261        let key = normalize_cache_key(table_name);
262        #[cfg(not(target_arch = "wasm32"))]
263        {
264            let mut cache = self.cache.write();
265            let mut current_memory = self.current_memory.write();
266            let mut stats = self.stats.write();
267
268            if let Some(entry) = cache.pop(&key) {
269                *current_memory = current_memory.saturating_sub(entry.size_bytes);
270                stats.invalidations += 1;
271            }
272        }
273
274        #[cfg(target_arch = "wasm32")]
275        {
276            let mut cache = self.cache.write().unwrap();
277            let mut current_memory = self.current_memory.write().unwrap();
278            let mut stats = self.stats.write().unwrap();
279
280            if let Some(entry) = cache.pop(&key) {
281                *current_memory = current_memory.saturating_sub(entry.size_bytes);
282                stats.invalidations += 1;
283            }
284        }
285    }
286
287    /// Clear all cached entries
288    pub fn clear(&self) {
289        #[cfg(not(target_arch = "wasm32"))]
290        {
291            let mut cache = self.cache.write();
292            let mut current_memory = self.current_memory.write();
293            cache.clear();
294            *current_memory = 0;
295        }
296
297        #[cfg(target_arch = "wasm32")]
298        {
299            let mut cache = self.cache.write().unwrap();
300            let mut current_memory = self.current_memory.write().unwrap();
301            cache.clear();
302            *current_memory = 0;
303        }
304    }
305
306    /// Get cache statistics
307    pub fn stats(&self) -> CacheStats {
308        #[cfg(not(target_arch = "wasm32"))]
309        {
310            self.stats.read().clone()
311        }
312
313        #[cfg(target_arch = "wasm32")]
314        {
315            self.stats.read().unwrap().clone()
316        }
317    }
318
319    /// Get current memory usage in bytes
320    pub fn memory_usage(&self) -> usize {
321        #[cfg(not(target_arch = "wasm32"))]
322        {
323            *self.current_memory.read()
324        }
325
326        #[cfg(target_arch = "wasm32")]
327        {
328            *self.current_memory.read().unwrap()
329        }
330    }
331
332    /// Get the memory budget in bytes
333    pub fn max_memory(&self) -> usize {
334        self.max_memory
335    }
336
337    /// Get the number of cached tables
338    pub fn len(&self) -> usize {
339        #[cfg(not(target_arch = "wasm32"))]
340        {
341            self.cache.read().len()
342        }
343
344        #[cfg(target_arch = "wasm32")]
345        {
346            self.cache.read().unwrap().len()
347        }
348    }
349
350    /// Check if the cache is empty
351    pub fn is_empty(&self) -> bool {
352        self.len() == 0
353    }
354
355    /// Check if a table is cached
356    /// Table names are normalized for case-insensitive matching.
357    pub fn contains(&self, table_name: &str) -> bool {
358        let key = normalize_cache_key(table_name);
359        #[cfg(not(target_arch = "wasm32"))]
360        {
361            self.cache.read().contains(&key)
362        }
363
364        #[cfg(target_arch = "wasm32")]
365        {
366            self.cache.read().unwrap().contains(&key)
367        }
368    }
369}
370
371impl std::fmt::Debug for ColumnarCache {
372    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
373        f.debug_struct("ColumnarCache")
374            .field("max_memory", &self.max_memory)
375            .field("current_memory", &self.memory_usage())
376            .field("entries", &self.len())
377            .field("stats", &self.stats())
378            .finish()
379    }
380}
381
382// Clone creates a new cache with same configuration but empty data
383impl Clone for ColumnarCache {
384    fn clone(&self) -> Self {
385        ColumnarCache::new(self.max_memory)
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use vibesql_types::SqlValue;
392
393    use super::*;
394    use crate::Row;
395
396    fn create_test_columnar(rows: usize) -> ColumnarTable {
397        let row_data: Vec<Row> = (0..rows)
398            .map(|i| {
399                Row::new(vec![
400                    SqlValue::Integer(i as i64),
401                    SqlValue::Varchar(arcstr::ArcStr::from(format!("name_{}", i))),
402                ])
403            })
404            .collect();
405        let column_names = vec!["id".to_string(), "name".to_string()];
406        ColumnarTable::from_rows(&row_data, &column_names).unwrap()
407    }
408
409    #[test]
410    fn test_cache_basic_operations() {
411        let cache = ColumnarCache::new(1024 * 1024); // 1MB
412
413        // Initially empty
414        assert!(cache.is_empty());
415        assert_eq!(cache.len(), 0);
416        assert!(cache.get("test").is_none());
417
418        // Insert
419        let columnar = create_test_columnar(100);
420        let _ = cache.insert("test", columnar);
421
422        // Should be cached now
423        assert!(!cache.is_empty());
424        assert_eq!(cache.len(), 1);
425        assert!(cache.contains("test"));
426        assert!(cache.get("test").is_some());
427
428        // Stats should reflect operations
429        let stats = cache.stats();
430        assert_eq!(stats.hits, 1); // The get above
431        assert_eq!(stats.misses, 1); // The initial get
432        assert_eq!(stats.conversions, 1);
433    }
434
435    #[test]
436    fn test_cache_invalidation() {
437        let cache = ColumnarCache::new(1024 * 1024);
438
439        let columnar = create_test_columnar(100);
440        let _ = cache.insert("test", columnar);
441        assert!(cache.contains("test"));
442
443        cache.invalidate("test");
444        assert!(!cache.contains("test"));
445
446        let stats = cache.stats();
447        assert_eq!(stats.invalidations, 1);
448    }
449
450    #[test]
451    fn test_cache_eviction() {
452        // Small cache that can only hold one entry
453        let cache = ColumnarCache::new(1024); // 1KB
454
455        // Insert first entry
456        let columnar1 = create_test_columnar(10);
457        let _ = cache.insert("table1", columnar1);
458        assert!(cache.contains("table1"));
459
460        // Insert second entry - should evict first due to memory pressure
461        let columnar2 = create_test_columnar(10);
462        let _ = cache.insert("table2", columnar2);
463
464        // At least one table should be evicted if memory is tight
465        // The exact behavior depends on sizes
466        let stats = cache.stats();
467        // Either evictions occurred or both fit
468        assert!(stats.evictions > 0 || cache.len() == 2);
469    }
470
471    #[test]
472    fn test_cache_arc_sharing() {
473        let cache = ColumnarCache::new(1024 * 1024);
474
475        let columnar = create_test_columnar(100);
476        let arc1 = cache.insert("test", columnar);
477        let arc2 = cache.get("test").unwrap();
478
479        // Both should point to the same data
480        assert!(Arc::ptr_eq(&arc1, &arc2));
481
482        // Reference count should be 3 (cache entry + arc1 + arc2)
483        assert_eq!(Arc::strong_count(&arc1), 3);
484    }
485
486    #[test]
487    fn test_cache_clear() {
488        let cache = ColumnarCache::new(1024 * 1024);
489
490        let _ = cache.insert("table1", create_test_columnar(10));
491        let _ = cache.insert("table2", create_test_columnar(10));
492
493        assert_eq!(cache.len(), 2);
494        assert!(cache.memory_usage() > 0);
495
496        cache.clear();
497
498        assert_eq!(cache.len(), 0);
499        assert_eq!(cache.memory_usage(), 0);
500    }
501
502    #[test]
503    fn test_cache_update_existing() {
504        let cache = ColumnarCache::new(1024 * 1024);
505
506        let columnar1 = create_test_columnar(10);
507        let _ = cache.insert("test", columnar1);
508        let memory1 = cache.memory_usage();
509
510        // Update with larger entry
511        let columnar2 = create_test_columnar(100);
512        let _ = cache.insert("test", columnar2);
513        let memory2 = cache.memory_usage();
514
515        // Memory should have changed
516        assert_ne!(memory1, memory2);
517
518        // Should still be just one entry
519        assert_eq!(cache.len(), 1);
520    }
521
522    #[test]
523    fn test_hit_rate() {
524        let stats =
525            CacheStats { hits: 80, misses: 20, evictions: 0, conversions: 0, invalidations: 0 };
526
527        assert!((stats.hit_rate() - 80.0).abs() < 0.001);
528    }
529
530    #[test]
531    fn test_hit_rate_empty() {
532        let stats = CacheStats::default();
533        assert_eq!(stats.hit_rate(), 0.0);
534    }
535}