Skip to main content

obj_core/pager/
cache.rs

1//! Bounded LRU page cache.
2//!
3//! Frames are allocated up front at [`Cache::new`] and reused for the
4//! lifetime of the pager — no heap allocation occurs on the read hot
5//! path (power-of-ten Rule 3). The LRU ordering is maintained as a
6//! doubly-linked list embedded in the same `Vec<Frame>` that holds the
7//! buffers; the index-typed pointers make every traversal a small
8//! bounded integer hop (Rule 2).
9//!
10//! This cache is not thread-safe. The pager wraps it in `&mut self`
11//! methods; multi-process locking lands in M6.
12
13#![forbid(unsafe_code)]
14
15use std::collections::HashMap;
16
17use crate::pager::page::{Page, PageId};
18
19/// `usize::MAX` used as the "null index" sentinel inside the LRU
20/// linked list. No frame can have this index because the cache size
21/// is bounded well below `usize::MAX / 2` in any realistic build.
22const NIL: usize = usize::MAX;
23
24/// A cache frame: a pre-allocated page buffer plus metadata for the
25/// LRU bookkeeping. `page_id == None` means the frame is empty (its
26/// buffer contents are irrelevant).
27#[derive(Debug)]
28struct Frame {
29    page_id: Option<PageId>,
30    buffer: Page,
31    dirty: bool,
32    prev: usize,
33    next: usize,
34}
35
36/// Outcome of [`Cache::insert`].
37#[derive(Debug)]
38pub struct Evicted {
39    /// Page-id that was evicted, if any.
40    pub page_id: PageId,
41    /// `true` if the evicted frame was dirty and must be flushed.
42    pub dirty: bool,
43    /// Snapshot of the evicted page's bytes (the caller writes this
44    /// to disk when `dirty` is set).
45    pub buffer: Page,
46}
47
48/// Fixed-capacity LRU cache keyed by [`PageId`].
49#[derive(Debug)]
50pub struct Cache {
51    frames: Vec<Frame>,
52    /// `PageId` → index into `frames`. The map's capacity is set at
53    /// construction; `HashMap::insert` only allocates when the
54    /// capacity is exceeded, which cannot happen because `frames.len()`
55    /// is fixed and every key in the map corresponds to a frame.
56    index: HashMap<PageId, usize>,
57    /// Head of the LRU list (most-recently-used frame).
58    head: usize,
59    /// Tail of the LRU list (least-recently-used frame).
60    tail: usize,
61    /// Frames whose `page_id` is `None`, available without eviction.
62    free_head: usize,
63}
64
65impl Cache {
66    /// Construct a cache with `capacity` pre-allocated frames.
67    ///
68    /// `capacity` must be at least 1. Eviction kicks in once all
69    /// frames are bound to a page.
70    #[must_use]
71    pub fn new(capacity: usize) -> Self {
72        debug_assert!(capacity >= 1, "cache must have at least 1 frame");
73        let cap = capacity.max(1);
74        let mut frames = Vec::with_capacity(cap);
75        // Initialise the free-list: every frame chained via `next`.
76        for i in 0..cap {
77            frames.push(Frame {
78                page_id: None,
79                buffer: Page::zeroed(),
80                dirty: false,
81                prev: NIL,
82                next: if i + 1 == cap { NIL } else { i + 1 },
83            });
84        }
85        Self {
86            frames,
87            index: HashMap::with_capacity(cap),
88            head: NIL,
89            tail: NIL,
90            free_head: 0,
91        }
92    }
93
94    /// Number of frames in the cache.
95    #[must_use]
96    pub fn capacity(&self) -> usize {
97        self.frames.len()
98    }
99
100    /// Get a read-only reference to a cached page, if present, and
101    /// mark it as most-recently-used. No allocation occurs.
102    pub fn get(&mut self, id: PageId) -> Option<&Page> {
103        let idx = *self.index.get(&id)?;
104        self.unlink_lru(idx);
105        self.push_front(idx);
106        Some(&self.frames[idx].buffer)
107    }
108
109    /// Read-only peek that does NOT touch the LRU order. Used by
110    /// `Pager::read_cache_or_main` (which holds a shared `&Pager`
111    /// borrow on the snapshot-read path); a mutating `get` would
112    /// be a borrow-checker error there.
113    #[must_use]
114    pub fn peek(&self, id: PageId) -> Option<&Page> {
115        let idx = *self.index.get(&id)?;
116        Some(&self.frames[idx].buffer)
117    }
118
119    /// Get a mutable reference to a cached page, marking it dirty and
120    /// most-recently-used.
121    pub fn get_mut(&mut self, id: PageId) -> Option<&mut Page> {
122        let idx = *self.index.get(&id)?;
123        self.unlink_lru(idx);
124        self.push_front(idx);
125        self.frames[idx].dirty = true;
126        Some(&mut self.frames[idx].buffer)
127    }
128
129    /// Insert `(id, buffer)` into the cache. If the cache is full,
130    /// evicts the LRU frame; the returned [`Evicted`] is the caller's
131    /// responsibility to write back if `dirty` is set.
132    ///
133    /// `dirty` controls whether the newly-inserted frame is considered
134    /// modified. Pages loaded from disk should pass `false`; pages
135    /// allocated fresh by `alloc_page` should pass `true`.
136    pub fn insert(&mut self, id: PageId, buffer: Page, dirty: bool) -> Option<Evicted> {
137        debug_assert!(
138            !self.index.contains_key(&id),
139            "caller must remove an existing key before reinserting",
140        );
141        let (idx, evicted) = self.acquire_frame();
142        self.frames[idx].page_id = Some(id);
143        self.frames[idx].buffer = buffer;
144        self.frames[idx].dirty = dirty;
145        self.push_front(idx);
146        self.index.insert(id, idx);
147        evicted
148    }
149
150    /// Acquire a frame for a fresh insert. Prefers a free frame;
151    /// otherwise evicts the LRU.
152    fn acquire_frame(&mut self) -> (usize, Option<Evicted>) {
153        if self.free_head != NIL {
154            let idx = self.free_head;
155            self.free_head = self.frames[idx].next;
156            self.frames[idx].next = NIL;
157            self.frames[idx].prev = NIL;
158            return (idx, None);
159        }
160        // No free frames — evict tail.
161        let tail = self.tail;
162        debug_assert!(tail != NIL, "tail must exist when no free frames");
163        self.unlink_lru(tail);
164        let dirty = std::mem::replace(&mut self.frames[tail].dirty, false);
165        let buffer = std::mem::take(&mut self.frames[tail].buffer);
166        // The tail frame must be bound to a page-id by invariant: a
167        // frame is on the LRU list iff `page_id.is_some()` and it
168        // appears in `self.index`. If the invariant is violated we
169        // still return the buffer; `debug_assert!` catches the
170        // inconsistency in debug builds.
171        if let Some(evicted_id) = self.frames[tail].page_id.take() {
172            self.index.remove(&evicted_id);
173            (
174                tail,
175                Some(Evicted {
176                    page_id: evicted_id,
177                    dirty,
178                    buffer,
179                }),
180            )
181        } else {
182            debug_assert!(false, "tail frame on LRU list must be bound");
183            (tail, None)
184        }
185    }
186
187    /// Force-evict `id` if present, returning its buffer + dirty flag.
188    /// Used by `free_page` to invalidate the cache entry.
189    pub fn evict(&mut self, id: PageId) -> Option<Evicted> {
190        let idx = self.index.remove(&id)?;
191        self.unlink_lru(idx);
192        let dirty = std::mem::replace(&mut self.frames[idx].dirty, false);
193        let buffer = std::mem::take(&mut self.frames[idx].buffer);
194        let page_id = self.frames[idx].page_id.take().unwrap_or(id);
195        debug_assert_eq!(
196            page_id, id,
197            "frame referenced by index must carry the same id",
198        );
199        // Return the frame to the free-list.
200        self.frames[idx].next = self.free_head;
201        self.free_head = idx;
202        Some(Evicted {
203            page_id,
204            dirty,
205            buffer,
206        })
207    }
208
209    /// Move `idx` to the head of the LRU list. Assumes the frame is
210    /// already detached (or about to be).
211    fn push_front(&mut self, idx: usize) {
212        debug_assert_ne!(idx, NIL);
213        self.frames[idx].prev = NIL;
214        self.frames[idx].next = self.head;
215        if self.head != NIL {
216            self.frames[self.head].prev = idx;
217        }
218        self.head = idx;
219        if self.tail == NIL {
220            self.tail = idx;
221        }
222    }
223
224    /// Detach `idx` from its current position in the LRU list. The
225    /// frame remains in `self.frames` and `self.index`.
226    fn unlink_lru(&mut self, idx: usize) {
227        let (prev, next) = (self.frames[idx].prev, self.frames[idx].next);
228        if prev != NIL {
229            self.frames[prev].next = next;
230        } else if self.head == idx {
231            self.head = next;
232        }
233        if next != NIL {
234            self.frames[next].prev = prev;
235        } else if self.tail == idx {
236            self.tail = prev;
237        }
238        self.frames[idx].prev = NIL;
239        self.frames[idx].next = NIL;
240    }
241
242    /// Iterate over `(page_id, buffer)` for every dirty bound frame,
243    /// returning ownership of each buffer. Used at close to flush dirty
244    /// pages. The iterator is bounded by `self.capacity()` (Rule 2).
245    pub fn drain_dirty(&mut self) -> impl Iterator<Item = (PageId, Page)> + '_ {
246        let cap = self.frames.len();
247        (0..cap).filter_map(|idx| {
248            if !self.frames[idx].dirty {
249                return None;
250            }
251            self.frames[idx].dirty = false;
252            let id = self.frames[idx].page_id?;
253            let buf = std::mem::take(&mut self.frames[idx].buffer);
254            // Mark the frame empty so we don't double-emit on a second drain.
255            self.frames[idx].page_id = None;
256            self.index.remove(&id);
257            self.unlink_lru(idx);
258            self.frames[idx].next = self.free_head;
259            self.free_head = idx;
260            Some((id, buf))
261        })
262    }
263
264    /// Capacity-checked LRU order, for tests. Returns `PageId`s in
265    /// MRU → LRU order.
266    #[must_use]
267    #[cfg(test)]
268    pub fn lru_order(&self) -> Vec<PageId> {
269        let mut out = Vec::with_capacity(self.frames.len());
270        let mut idx = self.head;
271        let mut bound = self.frames.len() + 1;
272        while idx != NIL && bound > 0 {
273            if let Some(id) = self.frames[idx].page_id {
274                out.push(id);
275            }
276            idx = self.frames[idx].next;
277            bound -= 1;
278        }
279        out
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::Cache;
286    use crate::pager::page::{Page, PageId};
287
288    fn id(n: u64) -> PageId {
289        PageId::new(n).expect("non-zero")
290    }
291
292    fn page_with(byte: u8) -> Page {
293        let mut p = Page::zeroed();
294        p.as_bytes_mut()[0] = byte;
295        p
296    }
297
298    #[test]
299    fn empty_cache_misses() {
300        let mut c = Cache::new(4);
301        assert!(c.get(id(1)).is_none());
302    }
303
304    #[test]
305    fn insert_then_get() {
306        let mut c = Cache::new(2);
307        assert!(c.insert(id(1), page_with(0xAA), false).is_none());
308        assert_eq!(c.get(id(1)).map(|p| p.as_bytes()[0]), Some(0xAA));
309    }
310
311    #[test]
312    fn lru_eviction_is_deterministic() {
313        let mut c = Cache::new(3);
314        for n in 1u8..=3 {
315            assert!(c.insert(id(u64::from(n)), page_with(n), false).is_none());
316        }
317        // Order is now 3, 2, 1 (MRU first). Touch id(1) to move it.
318        let _ = c.get(id(1));
319        // Order: 1, 3, 2. Insert id(4) → evict id(2).
320        let ev = c.insert(id(4), page_with(4), false).expect("eviction");
321        assert_eq!(ev.page_id, id(2));
322        assert!(!ev.dirty);
323        // Verify final LRU order.
324        assert_eq!(c.lru_order(), vec![id(4), id(1), id(3)]);
325    }
326
327    #[test]
328    fn dirty_eviction_returns_buffer() {
329        let mut c = Cache::new(1);
330        let _ = c.insert(id(1), page_with(0xAB), true);
331        let ev = c.insert(id(2), page_with(0xCD), false).expect("evict");
332        assert_eq!(ev.page_id, id(1));
333        assert!(ev.dirty);
334        assert_eq!(ev.buffer.as_bytes()[0], 0xAB);
335    }
336
337    #[test]
338    fn evict_releases_frame() {
339        let mut c = Cache::new(2);
340        let _ = c.insert(id(1), page_with(1), true);
341        let _ = c.insert(id(2), page_with(2), false);
342        let ev = c.evict(id(1)).expect("evict");
343        assert!(ev.dirty);
344        // After explicit evict the frame returns to the free-list, so
345        // inserting two more does NOT evict id(2).
346        assert!(c.insert(id(3), page_with(3), false).is_none());
347        assert!(c.get(id(2)).is_some());
348    }
349
350    #[test]
351    fn drain_dirty_yields_each_dirty_page_once() {
352        let mut c = Cache::new(4);
353        let _ = c.insert(id(1), page_with(1), true);
354        let _ = c.insert(id(2), page_with(2), false);
355        let _ = c.insert(id(3), page_with(3), true);
356        let mut drained: Vec<u64> = c.drain_dirty().map(|(id, _)| id.get()).collect();
357        drained.sort_unstable();
358        assert_eq!(drained, vec![1, 3]);
359        // Second drain yields nothing.
360        let again: Vec<_> = c.drain_dirty().collect();
361        assert!(again.is_empty());
362    }
363}