mount/
cache.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Shared blob cache for `ContentAddressedMount`.
3//!
4//! The cache holds materialised file bytes keyed by `ContentHash` so a
5//! hot kernel `read` syscall is a pointer-bump + slice copy rather than
6//! a full `get_blob` + `decompress` chain. It's the difference between
7//! mount reads beating `std::fs::read` (warm) and being ~10× slower
8//! (cold) — see `benches/mount_read_paths.rs`.
9//!
10//! Why a separate, shared pool instead of owning the cache inline?
11//!
12//! Heddle's content-addressed model means two threads forked from the
13//! same parent share *every* blob hash on the parts of the tree they
14//! haven't diverged on yet. If each mount carries its own LRU, every
15//! freshly-opened mount starts cold even when a sibling mount in the
16//! same process just decompressed the exact same bytes a millisecond
17//! ago. By making the cache an `Arc<BlobCachePool>` the daemon can
18//! attach one pool to itself, hand it to every new mount, and every
19//! cache-hot blob anywhere in the process is hot for the new mount
20//! too. Cap stays the same; hit rate goes up.
21//!
22//! The pool is byte-bounded, not entry-bounded — a 256 MiB cap holds
23//! roughly 25 × 10 MiB blobs or 250 000 × 1 KiB blobs, whichever the
24//! workload happens to be. Eviction is LRU. A single blob larger than
25//! the cap bypasses the cache entirely so one giant file can't
26//! evict the rest of the working set.
27
28use std::sync::{
29    Mutex,
30    atomic::{AtomicU64, Ordering},
31};
32
33use bytes::Bytes;
34use lru::LruCache;
35use objects::{object::ContentHash, sync::LockExt};
36
37/// Default cap. Picked so a typical agent workspace fits in memory
38/// without the cache dominating RSS — for daemon deployments the
39/// recommended sizing is `min(4 GiB, 25% of physical RAM)`, set via
40/// [`BlobCachePool::with_capacity`].
41pub const DEFAULT_BLOB_CACHE_BYTES: usize = 256 * 1024 * 1024;
42
43/// Process-shared blob cache. Construct once per `Repository` (or once
44/// per daemon process) and hand the same `Arc<BlobCachePool>` to every
45/// [`crate::ContentAddressedMount`] that wants to share its warm
46/// state with sibling mounts.
47///
48/// Clone-cheap: it's just `Arc<Inner>` under the hood.
49pub struct BlobCachePool {
50    inner: Mutex<BlobCacheInner>,
51    cap_bytes: usize,
52    hits: AtomicU64,
53    misses: AtomicU64,
54    inserts: AtomicU64,
55}
56
57struct BlobCacheInner {
58    lru: LruCache<ContentHash, Bytes>,
59    /// Running sum of `Bytes.len()` over every live entry. Kept
60    /// in sync with `lru` mutations so eviction can stop the moment
61    /// the working total drops back under `cap_bytes`.
62    bytes: usize,
63}
64
65impl BlobCachePool {
66    /// Construct a pool with [`DEFAULT_BLOB_CACHE_BYTES`] cap. Suitable
67    /// for tests, CLI one-shots, and any caller that doesn't have a
68    /// sizing strategy.
69    pub fn with_default_capacity() -> Self {
70        Self::with_capacity(DEFAULT_BLOB_CACHE_BYTES)
71    }
72
73    /// Construct a pool with an explicit byte cap. Daemon callers
74    /// should size this from physical RAM (see module docs).
75    pub fn with_capacity(cap_bytes: usize) -> Self {
76        Self {
77            inner: Mutex::new(BlobCacheInner {
78                lru: LruCache::unbounded(),
79                bytes: 0,
80            }),
81            cap_bytes,
82            hits: AtomicU64::new(0),
83            misses: AtomicU64::new(0),
84            inserts: AtomicU64::new(0),
85        }
86    }
87
88    /// Look up a blob. Returns `Some(Arc::clone)` on hit, `None` on
89    /// miss. Bumps the entry to MRU.
90    pub(crate) fn get(&self, hash: &ContentHash) -> Option<Bytes> {
91        let mut guard = self.inner.lock_or_poisoned();
92        match guard.lru.get(hash).cloned() {
93            Some(bytes) => {
94                self.hits.fetch_add(1, Ordering::Relaxed);
95                Some(bytes)
96            }
97            None => {
98                self.misses.fetch_add(1, Ordering::Relaxed);
99                None
100            }
101        }
102    }
103
104    /// Insert a freshly-loaded blob. Evicts LRU entries until the
105    /// total falls back below cap. A blob larger than cap bypasses
106    /// the cache entirely so one giant file can't displace the rest
107    /// of the working set.
108    pub(crate) fn insert(&self, hash: ContentHash, bytes: Bytes) {
109        let size = bytes.len();
110        if size > self.cap_bytes {
111            return;
112        }
113        let mut guard = self.inner.lock_or_poisoned();
114        if let Some(prev) = guard.lru.put(hash, bytes) {
115            guard.bytes = guard.bytes.saturating_sub(prev.len());
116        }
117        guard.bytes += size;
118        while guard.bytes > self.cap_bytes {
119            let Some((_evicted_hash, evicted)) = guard.lru.pop_lru() else {
120                break;
121            };
122            guard.bytes = guard.bytes.saturating_sub(evicted.len());
123        }
124        self.inserts.fetch_add(1, Ordering::Relaxed);
125    }
126
127    /// Drop every cached entry. Used by benchmarks that need to
128    /// measure the true cold path.
129    pub fn clear(&self) {
130        let mut guard = self.inner.lock_or_poisoned();
131        guard.lru.clear();
132        guard.bytes = 0;
133    }
134
135    /// Byte cap configured at construction time.
136    pub fn cap_bytes(&self) -> usize {
137        self.cap_bytes
138    }
139
140    /// Current resident bytes across all entries. Cheap (single lock
141    /// acquire) but not free — meant for diagnostics, not hot-path
142    /// budgeting.
143    pub fn resident_bytes(&self) -> usize {
144        self.inner.lock_or_poisoned().bytes
145    }
146
147    /// Number of distinct blobs currently cached. Same caveat as
148    /// `resident_bytes`.
149    pub fn entry_count(&self) -> usize {
150        self.inner.lock_or_poisoned().lru.len()
151    }
152
153    /// Cumulative hit / miss / insert counters since the pool was
154    /// constructed. Useful for sizing decisions: a pool with
155    /// `inserts >> hits` is undersized, a pool with `hits >> inserts`
156    /// is right-sized for its workload.
157    pub fn stats(&self) -> BlobCacheStats {
158        BlobCacheStats {
159            hits: self.hits.load(Ordering::Relaxed),
160            misses: self.misses.load(Ordering::Relaxed),
161            inserts: self.inserts.load(Ordering::Relaxed),
162        }
163    }
164}
165
166#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
167pub struct BlobCacheStats {
168    pub hits: u64,
169    pub misses: u64,
170    pub inserts: u64,
171}
172
173#[cfg(test)]
174mod tests {
175    use std::sync::Arc;
176
177    use super::*;
178
179    fn h(byte: u8) -> ContentHash {
180        let mut buf = [0u8; 32];
181        buf.iter_mut().for_each(|b| *b = byte);
182        ContentHash::from_bytes(buf)
183    }
184
185    fn payload(byte: u8, len: usize) -> Bytes {
186        Bytes::from(vec![byte; len])
187    }
188
189    #[test]
190    fn round_trip_get_hit_miss() {
191        let pool = BlobCachePool::with_capacity(1024);
192        assert!(pool.get(&h(1)).is_none());
193        pool.insert(h(1), payload(0xAA, 64));
194        let hit = pool.get(&h(1)).expect("should hit");
195        assert_eq!(&hit[..], &vec![0xAA; 64][..]);
196        let stats = pool.stats();
197        assert_eq!(stats.hits, 1);
198        assert_eq!(stats.misses, 1);
199        assert_eq!(stats.inserts, 1);
200    }
201
202    #[test]
203    fn byte_bound_evicts_lru_until_under_cap() {
204        let pool = BlobCachePool::with_capacity(300);
205        pool.insert(h(1), payload(0x01, 100));
206        pool.insert(h(2), payload(0x02, 100));
207        pool.insert(h(3), payload(0x03, 100));
208        assert_eq!(pool.resident_bytes(), 300);
209        pool.insert(h(4), payload(0x04, 100));
210        // h(1) is the LRU after the inserts; it should have been
211        // evicted to keep us under cap.
212        assert!(pool.get(&h(1)).is_none());
213        assert!(pool.get(&h(4)).is_some());
214        assert_eq!(pool.resident_bytes(), 300);
215    }
216
217    #[test]
218    fn oversized_blob_bypasses_cache() {
219        let pool = BlobCachePool::with_capacity(256);
220        // Existing entry should survive.
221        pool.insert(h(1), payload(0x01, 200));
222        // 500 > cap → bypassed, no eviction.
223        pool.insert(h(2), payload(0x02, 500));
224        assert!(pool.get(&h(1)).is_some());
225        assert!(pool.get(&h(2)).is_none());
226        assert_eq!(pool.resident_bytes(), 200);
227    }
228
229    #[test]
230    fn shared_pool_visible_across_clones() {
231        let pool = Arc::new(BlobCachePool::with_capacity(1024));
232        let a = Arc::clone(&pool);
233        let b = Arc::clone(&pool);
234        a.insert(h(1), payload(0xAA, 64));
235        assert!(b.get(&h(1)).is_some());
236    }
237}
mount/cache.rs

mount/
cache.rs