Skip to main content

sqlrite/sql/pager/
freelist.rs

1//! Persisted free-page list (SQLR-6).
2//!
3//! After `DROP TABLE` / `DROP INDEX` / `ALTER TABLE DROP COLUMN`, the next
4//! `save_database` no longer references the dropped object's pages. Without
5//! a freelist those pages would either be re-numbered (every later table
6//! shifts down → high write amplification) or stranded as orphans on disk.
7//!
8//! The freelist solves both: pages no longer referenced from `sqlrite_master`
9//! go on a persisted stack rooted at `header.freelist_head`, so subsequent
10//! saves can pull from there before extending the file. The unrelated tables
11//! that didn't change keep their page numbers and their pages re-stage byte-
12//! identical → the diff pager skips writing them.
13//!
14//! ## On-disk layout
15//!
16//! Each freelist trunk page carries the standard 7-byte page header with
17//! `page_type = PAGE_TYPE_FREELIST_TRUNK (5)` and `next_page` pointing at the
18//! next trunk in the chain (`0` = end). The 4089-byte payload area holds:
19//!
20//! ```text
21//!   0..2      count: u16 LE       — number of free leaf-page IDs that follow
22//!   2..2+4*N  page_ids[N]: u32 LE — free leaf-page numbers, ascending
23//! ```
24//!
25//! A trunk holds up to `(PAYLOAD_PER_PAGE - 2) / 4 = 1021` free page IDs.
26//! Larger freelists chain across multiple trunks. The trunk pages themselves
27//! are part of the live page set (they hold metadata) and are *not* on the
28//! freelist they encode.
29
30use std::collections::VecDeque;
31
32use crate::error::{Result, SQLRiteError};
33use crate::sql::pager::page::{PAGE_HEADER_SIZE, PAGE_SIZE, PAYLOAD_PER_PAGE};
34use crate::sql::pager::pager::Pager;
35
36/// Page-type tag for a freelist trunk page. Distinct from existing page tags
37/// (`2 = TableLeaf`, `3 = Overflow`, `4 = InteriorNode`); `1` was retired.
38pub const PAGE_TYPE_FREELIST_TRUNK: u8 = 5;
39
40/// Maximum number of free page IDs a single trunk page can hold.
41/// `PAYLOAD_PER_PAGE` is 4089; we reserve 2 bytes for the count and 4 bytes
42/// per ID, giving `(4089 - 2) / 4 = 1021`.
43pub const FREELIST_IDS_PER_TRUNK: usize = (PAYLOAD_PER_PAGE - 2) / 4;
44
45/// Encodes a single freelist trunk page into the given buffer.
46///
47/// `next_trunk` is the page number of the next trunk in the chain, or `0`
48/// to mark the end. `page_ids` must have at most `FREELIST_IDS_PER_TRUNK`
49/// entries.
50pub fn encode_trunk(buf: &mut [u8; PAGE_SIZE], next_trunk: u32, page_ids: &[u32]) -> Result<()> {
51    if page_ids.len() > FREELIST_IDS_PER_TRUNK {
52        return Err(SQLRiteError::Internal(format!(
53            "freelist trunk overflow: {} ids exceeds capacity {}",
54            page_ids.len(),
55            FREELIST_IDS_PER_TRUNK
56        )));
57    }
58    // Zero out the buffer; trailing payload bytes after the encoded IDs are
59    // unused and must be deterministic so the diff pager can skip an
60    // unchanged trunk.
61    buf.fill(0);
62    buf[0] = PAGE_TYPE_FREELIST_TRUNK;
63    buf[1..5].copy_from_slice(&next_trunk.to_le_bytes());
64    // Per-page `payload_length` field (bytes 5..7) is unused for trunks —
65    // the count field inside the payload self-describes the entries.
66    buf[5..7].copy_from_slice(&0u16.to_le_bytes());
67    let count = page_ids.len() as u16;
68    buf[PAGE_HEADER_SIZE..PAGE_HEADER_SIZE + 2].copy_from_slice(&count.to_le_bytes());
69    let mut off = PAGE_HEADER_SIZE + 2;
70    for id in page_ids {
71        buf[off..off + 4].copy_from_slice(&id.to_le_bytes());
72        off += 4;
73    }
74    Ok(())
75}
76
77/// Decodes one freelist trunk page. Returns `(next_trunk, page_ids)`.
78fn decode_trunk(buf: &[u8; PAGE_SIZE]) -> Result<(u32, Vec<u32>)> {
79    if buf[0] != PAGE_TYPE_FREELIST_TRUNK {
80        return Err(SQLRiteError::General(format!(
81            "expected freelist trunk page (tag {PAGE_TYPE_FREELIST_TRUNK}), got tag {}",
82            buf[0]
83        )));
84    }
85    let next_trunk = u32::from_le_bytes(buf[1..5].try_into().unwrap());
86    let count = u16::from_le_bytes(
87        buf[PAGE_HEADER_SIZE..PAGE_HEADER_SIZE + 2]
88            .try_into()
89            .unwrap(),
90    ) as usize;
91    if count > FREELIST_IDS_PER_TRUNK {
92        return Err(SQLRiteError::General(format!(
93            "freelist trunk count {count} exceeds capacity {FREELIST_IDS_PER_TRUNK}"
94        )));
95    }
96    let mut ids = Vec::with_capacity(count);
97    let mut off = PAGE_HEADER_SIZE + 2;
98    for _ in 0..count {
99        let id = u32::from_le_bytes(buf[off..off + 4].try_into().unwrap());
100        ids.push(id);
101        off += 4;
102    }
103    Ok((next_trunk, ids))
104}
105
106/// Walks the freelist chain rooted at `head` and returns every free leaf-page
107/// ID. The trunk pages themselves are *not* included — they're live metadata.
108/// Returns the trunk page numbers separately so the caller can release them
109/// (the next save re-encodes the freelist from scratch and frees the old
110/// trunks for reuse).
111///
112/// `head == 0` → empty freelist; returns `(vec![], vec![])`.
113pub fn read_freelist(pager: &Pager, head: u32) -> Result<(Vec<u32>, Vec<u32>)> {
114    let mut leaves: Vec<u32> = Vec::new();
115    let mut trunks: Vec<u32> = Vec::new();
116    let mut cursor = head;
117    let mut visited: std::collections::HashSet<u32> = std::collections::HashSet::new();
118    while cursor != 0 {
119        if !visited.insert(cursor) {
120            return Err(SQLRiteError::General(format!(
121                "freelist cycle detected at trunk page {cursor}"
122            )));
123        }
124        let buf = pager.read_page(cursor).ok_or_else(|| {
125            SQLRiteError::General(format!(
126                "freelist trunk page {cursor} is past page_count or unreadable"
127            ))
128        })?;
129        let (next, ids) = decode_trunk(buf)?;
130        leaves.extend(ids);
131        trunks.push(cursor);
132        cursor = next;
133    }
134    Ok((leaves, trunks))
135}
136
137/// Stages the freelist chain into the pager. `free_pages` is the full set
138/// of pages that need persisted-on-the-freelist treatment — both the trunk
139/// pages (metadata) and the leaf entries (the actually-free leaves).
140///
141/// The chain consumes some of `free_pages` as its own trunks: a trunk
142/// page IS a free page that's been temporarily borrowed for metadata.
143/// Returns the new `freelist_head` (or `0` if `free_pages` is empty).
144///
145/// Encoding: `T = ceil(N / (IDS_PER_TRUNK + 1))` trunks; each trunk
146/// (except possibly the last) carries `IDS_PER_TRUNK` leaf IDs. Leaves
147/// are stored ascending within each trunk for deterministic on-disk
148/// bytes (so an unchanged freelist re-stages byte-identical → diff skip).
149pub fn stage_freelist(pager: &mut Pager, free_pages: Vec<u32>) -> Result<u32> {
150    if free_pages.is_empty() {
151        return Ok(0);
152    }
153    // Sort + dedup so the on-disk ordering is deterministic. A duplicate
154    // in the freelist would be a serious bug elsewhere (double-free), but
155    // dedupping here is a cheap defensive guardrail.
156    let mut ids = free_pages;
157    ids.sort_unstable();
158    ids.dedup();
159
160    // Solve N = T + L where L = number of leaf slots used, T = number of
161    // trunks, and L ≤ T * IDS_PER_TRUNK. Smallest T satisfying that is
162    // ceil(N / (IDS_PER_TRUNK + 1)) — the +1 accounts for the trunk
163    // page itself absorbing one of the N pages.
164    let n = ids.len();
165    let t = n.div_ceil(FREELIST_IDS_PER_TRUNK + 1);
166
167    // Take the highest-numbered T pages as trunks. This keeps the leaf
168    // IDs ascending within each trunk and matches the "drain low first"
169    // policy of the allocator on subsequent saves.
170    let leaves_count = n - t;
171    let trunk_pages: Vec<u32> = ids.split_off(leaves_count);
172    let leaves = ids;
173
174    // Lay out leaves across trunks, IDS_PER_TRUNK per trunk.
175    let mut chunks: Vec<&[u32]> = leaves.chunks(FREELIST_IDS_PER_TRUNK).collect();
176    // If there are more trunks than chunks (e.g. N=1 → T=1, L=0), pad
177    // with empty chunks so every trunk gets staged.
178    while chunks.len() < trunk_pages.len() {
179        chunks.push(&[]);
180    }
181
182    for (i, chunk) in chunks.iter().enumerate() {
183        let next = if i + 1 < trunk_pages.len() {
184            trunk_pages[i + 1]
185        } else {
186            0
187        };
188        let mut buf = [0u8; PAGE_SIZE];
189        encode_trunk(&mut buf, next, chunk)?;
190        pager.stage_page(trunk_pages[i], buf);
191    }
192
193    Ok(trunk_pages[0])
194}
195
196/// Helper: collect a freelist into a `VecDeque<u32>` sorted ascending —
197/// the ordering the `PageAllocator` uses to draw next free pages from.
198pub fn freelist_to_deque(leaves: Vec<u32>) -> VecDeque<u32> {
199    let mut sorted = leaves;
200    sorted.sort_unstable();
201    sorted.dedup();
202    VecDeque::from(sorted)
203}
204
205/// Auto-VACUUM (SQLR-10) does not fire on databases below this many
206/// pages. The 4 KiB page size makes 16 pages = 64 KiB — small enough
207/// that the cost of a full-file rewrite is negligible if the user
208/// genuinely wants it (manual `VACUUM;` still works), but large enough
209/// that single-table churn doesn't blow past the threshold and trigger
210/// a noisy compact every few statements.
211pub const MIN_PAGES_FOR_AUTO_VACUUM: u32 = 16;
212
213/// Returns `true` if the on-disk freelist (counting both leaf and
214/// trunk pages — they're all reclaimable bytes) exceeds `threshold`
215/// of `header.page_count`, i.e. the file is bloated enough to be
216/// worth compacting. Returns `false` for tiny databases (under
217/// [`MIN_PAGES_FOR_AUTO_VACUUM`]) and for empty freelists, both as
218/// fast paths to keep the auto-VACUUM hook cheap on the common case.
219///
220/// This is a read-only inspection of the pager's current header and
221/// freelist chain — it does not mutate any state. The caller is
222/// responsible for actually invoking
223/// [`crate::sql::pager::vacuum_database`] when this returns `true`.
224pub fn should_auto_vacuum(pager: &Pager, threshold: f32) -> Result<bool> {
225    let header = pager.header();
226    if header.page_count < MIN_PAGES_FOR_AUTO_VACUUM {
227        return Ok(false);
228    }
229    if header.freelist_head == 0 {
230        return Ok(false);
231    }
232    let (leaves, trunks) = read_freelist(pager, header.freelist_head)?;
233    let free_pages = leaves.len() + trunks.len();
234    let ratio = free_pages as f32 / header.page_count as f32;
235    Ok(ratio > threshold)
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn empty_freelist_round_trip() {
244        let mut buf = [0u8; PAGE_SIZE];
245        encode_trunk(&mut buf, 0, &[]).unwrap();
246        let (next, ids) = decode_trunk(&buf).unwrap();
247        assert_eq!(next, 0);
248        assert!(ids.is_empty());
249    }
250
251    #[test]
252    fn single_chunk_round_trip() {
253        let mut buf = [0u8; PAGE_SIZE];
254        let pages = [3u32, 7, 12, 99];
255        encode_trunk(&mut buf, 42, &pages).unwrap();
256        let (next, ids) = decode_trunk(&buf).unwrap();
257        assert_eq!(next, 42);
258        assert_eq!(ids, pages);
259    }
260
261    #[test]
262    fn full_chunk_fits_capacity() {
263        let mut buf = [0u8; PAGE_SIZE];
264        let pages: Vec<u32> = (1..=FREELIST_IDS_PER_TRUNK as u32).collect();
265        encode_trunk(&mut buf, 0, &pages).unwrap();
266        let (next, ids) = decode_trunk(&buf).unwrap();
267        assert_eq!(next, 0);
268        assert_eq!(ids.len(), FREELIST_IDS_PER_TRUNK);
269        assert_eq!(ids[0], 1);
270        assert_eq!(
271            ids[FREELIST_IDS_PER_TRUNK - 1],
272            FREELIST_IDS_PER_TRUNK as u32
273        );
274    }
275
276    #[test]
277    fn over_capacity_errors() {
278        let mut buf = [0u8; PAGE_SIZE];
279        let pages: Vec<u32> = (1..=(FREELIST_IDS_PER_TRUNK as u32 + 1)).collect();
280        let err = encode_trunk(&mut buf, 0, &pages).unwrap_err();
281        assert!(format!("{err}").contains("freelist trunk overflow"));
282    }
283
284    #[test]
285    fn wrong_tag_errors_on_decode() {
286        let mut buf = [0u8; PAGE_SIZE];
287        buf[0] = 2; // TableLeaf, not freelist
288        let err = decode_trunk(&buf).unwrap_err();
289        assert!(format!("{err}").contains("expected freelist trunk page"));
290    }
291}