Skip to main content

modde_sources/
cache.rs

1//! In-memory byte cache for extracted archive entries, keyed by archive hash
2//! and inner path, with a configurable byte budget and LRU eviction.
3
4use std::num::NonZeroUsize;
5use std::sync::atomic::{AtomicU64, Ordering};
6
7use bytes::Bytes;
8use lru::LruCache;
9use parking_lot::Mutex;
10
11/// Cache key identifying one extracted entry: its source archive hash plus the
12/// inner path within that archive.
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14pub struct ByteCacheKey {
15    pub archive_hash: u64,
16    pub inner_path: String,
17}
18
19/// Byte-budgeted LRU cache of extracted archive entry contents.
20pub struct ByteLruCache {
21    map: Mutex<LruCache<ByteCacheKey, Bytes>>,
22    bytes_budget: AtomicU64,
23    bytes_used: AtomicU64,
24}
25
26impl ByteLruCache {
27    /// Build a cache sized from the `MODDE_BYTE_CACHE_MIB` environment variable
28    /// (defaulting to 512 MiB).
29    #[must_use]
30    pub fn from_env() -> Self {
31        let mib = std::env::var("MODDE_BYTE_CACHE_MIB")
32            .ok()
33            .and_then(|v| v.parse::<u64>().ok())
34            .unwrap_or(512);
35        Self::new(mib * 1024 * 1024)
36    }
37
38    /// Build a cache holding at most `bytes_budget` bytes of cached entries.
39    #[must_use]
40    pub fn new(bytes_budget: u64) -> Self {
41        Self {
42            map: Mutex::new(LruCache::new(NonZeroUsize::new(1024).unwrap())),
43            bytes_budget: AtomicU64::new(bytes_budget),
44            bytes_used: AtomicU64::new(0),
45        }
46    }
47
48    /// Return the cached bytes for `key`, marking the entry most-recently-used.
49    #[must_use]
50    pub fn get(&self, key: &ByteCacheKey) -> Option<Bytes> {
51        self.map.lock().get(key).cloned()
52    }
53
54    /// Insert `bytes` under `key`, evicting least-recently-used entries to stay
55    /// within budget; returns the inserted `bytes` for chaining.
56    pub fn insert(&self, key: ByteCacheKey, bytes: Bytes) -> Bytes {
57        let len = bytes.len() as u64;
58        let budget = self.bytes_budget.load(Ordering::Relaxed);
59        if len > budget {
60            return bytes;
61        }
62
63        let mut map = self.map.lock();
64        if let Some(old) = map.put(key, bytes.clone()) {
65            self.bytes_used
66                .fetch_sub(old.len() as u64, Ordering::Relaxed);
67        }
68        self.bytes_used.fetch_add(len, Ordering::Relaxed);
69
70        while self.bytes_used.load(Ordering::Relaxed) > budget {
71            let Some((_key, evicted)) = map.pop_lru() else {
72                break;
73            };
74            self.bytes_used
75                .fetch_sub(evicted.len() as u64, Ordering::Relaxed);
76        }
77        bytes
78    }
79
80    /// Drop all cached entries belonging to the given `archive_hash`.
81    pub fn invalidate_archive(&self, archive_hash: u64) {
82        let mut map = self.map.lock();
83        let keys = map
84            .iter()
85            .filter_map(|(key, _)| (key.archive_hash == archive_hash).then_some(key.clone()))
86            .collect::<Vec<_>>();
87        for key in keys {
88            if let Some(value) = map.pop(&key) {
89                self.bytes_used
90                    .fetch_sub(value.len() as u64, Ordering::Relaxed);
91            }
92        }
93    }
94
95    /// Return the total number of bytes currently held in the cache.
96    #[must_use]
97    pub fn bytes_used(&self) -> u64 {
98        self.bytes_used.load(Ordering::Relaxed)
99    }
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105
106    #[test]
107    fn cache_respects_budget() {
108        let cache = ByteLruCache::new(32);
109        for i in 0..10 {
110            cache.insert(
111                ByteCacheKey {
112                    archive_hash: 1,
113                    inner_path: format!("{i}.bin"),
114                },
115                Bytes::from(vec![i; 8]),
116            );
117        }
118        assert!(cache.bytes_used() <= 32);
119    }
120}