Skip to main content

hdf5_reader/
cache.rs

1use std::sync::Arc;
2
3use lru::LruCache;
4use parking_lot::Mutex;
5use smallvec::SmallVec;
6use std::num::NonZeroUsize;
7
8/// Key for the chunk cache: (dataset object header address, chunk offset tuple).
9///
10/// Uses `SmallVec<[u64; 4]>` to avoid heap allocation for datasets with up to
11/// 4 dimensions (the common case for climate/science data).
12#[derive(Debug, Clone, PartialEq, Eq, Hash)]
13pub struct ChunkKey {
14    pub dataset_addr: u64,
15    pub chunk_offsets: SmallVec<[u64; 4]>,
16}
17
18/// LRU cache for decompressed chunks.
19///
20/// Thread-safe via `parking_lot::Mutex` (non-poisoning). Values are
21/// `Arc<Vec<u8>>` so multiple readers can share the same decompressed chunk data.
22pub struct ChunkCache {
23    inner: Mutex<ChunkCacheState>,
24    max_bytes: usize,
25}
26
27struct ChunkCacheState {
28    cache: LruCache<ChunkKey, Arc<Vec<u8>>>,
29    current_bytes: usize,
30}
31
32impl ChunkCache {
33    /// Create a new chunk cache.
34    ///
35    /// - `max_bytes`: maximum total bytes of decompressed data to cache (default 64 MiB)
36    /// - `max_slots`: maximum number of entries (default 521)
37    pub fn new(max_bytes: usize, max_slots: usize) -> Self {
38        let slots = NonZeroUsize::new(max_slots).unwrap_or(NonZeroUsize::new(521).unwrap());
39        ChunkCache {
40            inner: Mutex::new(ChunkCacheState {
41                cache: LruCache::new(slots),
42                current_bytes: 0,
43            }),
44            max_bytes,
45        }
46    }
47
48    /// Get a cached chunk, if present. Promotes the entry in LRU order.
49    pub fn get(&self, key: &ChunkKey) -> Option<Arc<Vec<u8>>> {
50        let mut cache = self.inner.lock();
51        cache.cache.get(key).cloned()
52    }
53
54    /// Insert a chunk into the cache. Evicts LRU entries if over capacity.
55    pub fn insert(&self, key: ChunkKey, data: Vec<u8>) -> Arc<Vec<u8>> {
56        let data_len = data.len();
57        let arc = Arc::new(data);
58
59        if self.max_bytes == 0 || data_len > self.max_bytes {
60            return arc;
61        }
62
63        let mut state = self.inner.lock();
64        // Evict until we have room
65        while state.current_bytes + data_len > self.max_bytes && !state.cache.is_empty() {
66            if let Some((_, evicted)) = state.cache.pop_lru() {
67                state.current_bytes = state.current_bytes.saturating_sub(evicted.len());
68            }
69        }
70
71        state.current_bytes += data_len;
72        state.cache.put(key, arc.clone());
73
74        arc
75    }
76}
77
78impl Default for ChunkCache {
79    fn default() -> Self {
80        Self::new(64 * 1024 * 1024, 521)
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn test_cache_insert_and_get() {
90        let cache = ChunkCache::new(1024, 10);
91        let key = ChunkKey {
92            dataset_addr: 100,
93            chunk_offsets: SmallVec::from_vec(vec![0, 0]),
94        };
95        cache.insert(key.clone(), vec![1, 2, 3]);
96        let val = cache.get(&key).unwrap();
97        assert_eq!(&**val, &[1, 2, 3]);
98    }
99
100    #[test]
101    fn test_cache_eviction() {
102        let cache = ChunkCache::new(10, 10); // 10 bytes max
103        for i in 0..5 {
104            let key = ChunkKey {
105                dataset_addr: 100,
106                chunk_offsets: SmallVec::from_vec(vec![i]),
107            };
108            cache.insert(key, vec![0; 4]); // 4 bytes each
109        }
110        // Should have evicted older entries to stay under 10 bytes
111        // At most 2 entries of 4 bytes each = 8 bytes
112        let first_key = ChunkKey {
113            dataset_addr: 100,
114            chunk_offsets: SmallVec::from_vec(vec![0]),
115        };
116        assert!(cache.get(&first_key).is_none()); // should be evicted
117    }
118
119    #[test]
120    fn test_cache_disabled_bypasses_storage() {
121        let cache = ChunkCache::new(0, 10);
122        let key = ChunkKey {
123            dataset_addr: 100,
124            chunk_offsets: SmallVec::from_vec(vec![0]),
125        };
126        cache.insert(key.clone(), vec![1, 2, 3]);
127        assert!(cache.get(&key).is_none());
128    }
129
130    #[test]
131    fn test_cache_promotes_on_get() {
132        // Verify that get() promotes entries in LRU order (the bug fix).
133        let cache = ChunkCache::new(12, 10); // room for 3 entries of 4 bytes
134        let key_a = ChunkKey {
135            dataset_addr: 1,
136            chunk_offsets: SmallVec::from_vec(vec![0]),
137        };
138        let key_b = ChunkKey {
139            dataset_addr: 2,
140            chunk_offsets: SmallVec::from_vec(vec![0]),
141        };
142        let key_c = ChunkKey {
143            dataset_addr: 3,
144            chunk_offsets: SmallVec::from_vec(vec![0]),
145        };
146
147        cache.insert(key_a.clone(), vec![0; 4]); // LRU order: a
148        cache.insert(key_b.clone(), vec![0; 4]); // LRU order: a, b
149        cache.insert(key_c.clone(), vec![0; 4]); // LRU order: a, b, c
150
151        // Access key_a to promote it
152        assert!(cache.get(&key_a).is_some()); // LRU order: b, c, a
153
154        // Insert a new entry that forces eviction
155        let key_d = ChunkKey {
156            dataset_addr: 4,
157            chunk_offsets: SmallVec::from_vec(vec![0]),
158        };
159        cache.insert(key_d, vec![0; 4]); // Should evict b (LRU)
160
161        assert!(cache.get(&key_a).is_some()); // a was promoted, should survive
162        assert!(cache.get(&key_b).is_none()); // b was LRU, should be evicted
163    }
164}