sqlrite-engine 0.7.0

Light version of SQLite developed with Rust. Published as `sqlrite-engine` on crates.io; import as `use sqlrite::…`.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
//! Overflow storage for cells that don't fit on one table-leaf page.
//!
//! Two pieces live here:
//!
//! - [`OverflowRef`] — the on-page marker that replaces a full cell when
//!   the cell's body is too large to keep inline. It carries the rowid
//!   (so the page's slot directory stays rowid-ordered), the total size
//!   of the external body, and a pointer to the first overflow page of
//!   the chain that holds the body.
//! - [`write_overflow_chain`] / [`read_overflow_chain`] — turn raw bytes
//!   into a chain of `Overflow`-typed pages and back. Each overflow page
//!   reuses the existing 7-byte page header (type tag + next-page + payload
//!   length) — we're not adding a new page format.
//!
//! **Decision to inline the rowid and NOT inline any of the body.** SQLite's
//! leaf-cell scheme keeps a prefix of the body inline before spilling, so
//! small lookups by rowid don't need a chain walk. We'd still have to chase
//! the chain for most columns anyway, so for simplicity this implementation
//! spills the entire body. A later optimization can split cells at a
//! threshold and keep a prefix inline without changing the page layout.
//!
//! **Overflow threshold.** Inserting a cell whose encoded length is more
//! than roughly a quarter of the page payload area (≈ 1000 bytes) is a
//! good candidate for overflow — on a ~4 KiB page you can still keep at
//! least 3-4 cells per page. The exact threshold is the caller's choice;
//! this module just exposes [`OVERFLOW_THRESHOLD`] as a suggestion.

use crate::error::{Result, SQLRiteError};
use crate::sql::pager::cell::{Cell, KIND_LOCAL, KIND_OVERFLOW};
use crate::sql::pager::page::{PAGE_HEADER_SIZE, PAGE_SIZE, PAYLOAD_PER_PAGE, PageType};
use crate::sql::pager::pager::Pager;
use crate::sql::pager::varint;

/// Inline cell-body size above which the caller should consider overflowing.
/// Sized so at least 4 inline cells can coexist on a page alongside their
/// slot directory.
pub const OVERFLOW_THRESHOLD: usize = PAYLOAD_PER_PAGE / 4;

/// On-page marker that stands in for a cell whose body lives in an overflow
/// chain. Rowid is inlined so the page's binary search over slots still
/// works without chasing the chain.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OverflowRef {
    pub rowid: i64,
    /// Exact byte count that `read_overflow_chain` must produce; the
    /// caller then feeds those bytes to `LocalCellBody::decode`.
    pub total_body_len: u64,
    /// First page of the Overflow-type chain carrying the body.
    pub first_overflow_page: u32,
}

impl OverflowRef {
    /// Serializes the reference using the shared
    /// `[cell_length varint | kind_tag | body]` prefix; `kind_tag` is
    /// always `KIND_OVERFLOW` for this type.
    pub fn encode(&self) -> Vec<u8> {
        let mut body = Vec::with_capacity(1 + varint::MAX_VARINT_BYTES * 2 + 4);
        body.push(KIND_OVERFLOW);
        varint::write_i64(&mut body, self.rowid);
        varint::write_u64(&mut body, self.total_body_len);
        body.extend_from_slice(&self.first_overflow_page.to_le_bytes());

        let mut out = Vec::with_capacity(body.len() + varint::MAX_VARINT_BYTES);
        varint::write_u64(&mut out, body.len() as u64);
        out.extend_from_slice(&body);
        out
    }

    pub fn decode(buf: &[u8], pos: usize) -> Result<(OverflowRef, usize)> {
        let (body_len, len_bytes) = varint::read_u64(buf, pos)?;
        let body_start = pos + len_bytes;
        let body_end = body_start
            .checked_add(body_len as usize)
            .ok_or_else(|| SQLRiteError::Internal("overflow ref length overflow".to_string()))?;
        if body_end > buf.len() {
            return Err(SQLRiteError::Internal(format!(
                "overflow ref extends past buffer: needs {body_start}..{body_end}, have {}",
                buf.len()
            )));
        }

        let body = &buf[body_start..body_end];
        if body.first().copied() != Some(KIND_OVERFLOW) {
            return Err(SQLRiteError::Internal(format!(
                "OverflowRef::decode called on non-overflow entry (kind_tag = {:#x})",
                body.first().copied().unwrap_or(0)
            )));
        }
        let mut cur = 1usize;
        let (rowid, n) = varint::read_i64(body, cur)?;
        cur += n;
        let (total_body_len, n) = varint::read_u64(body, cur)?;
        cur += n;
        if cur + 4 > body.len() {
            return Err(SQLRiteError::Internal(
                "overflow ref truncated before first_overflow_page".to_string(),
            ));
        }
        let first_overflow_page = u32::from_le_bytes(body[cur..cur + 4].try_into().unwrap());
        cur += 4;
        if cur != body.len() {
            return Err(SQLRiteError::Internal(format!(
                "overflow ref had {} trailing bytes",
                body.len() - cur
            )));
        }
        Ok((
            OverflowRef {
                rowid,
                total_body_len,
                first_overflow_page,
            },
            body_end - pos,
        ))
    }
}

/// An on-page entry: either a full local cell, or a pointer to an overflow
/// chain carrying the cell's body.
#[derive(Debug, Clone, PartialEq)]
pub enum PagedEntry {
    Local(Cell),
    Overflow(OverflowRef),
}

impl PagedEntry {
    pub fn rowid(&self) -> i64 {
        match self {
            PagedEntry::Local(c) => c.rowid,
            PagedEntry::Overflow(r) => r.rowid,
        }
    }

    pub fn encode(&self) -> Result<Vec<u8>> {
        match self {
            PagedEntry::Local(c) => c.encode(),
            PagedEntry::Overflow(r) => Ok(r.encode()),
        }
    }

    /// Dispatches on the kind tag and returns the appropriate variant.
    ///
    /// Only `KIND_LOCAL` and `KIND_OVERFLOW` are valid here — `PagedEntry`
    /// is the table-leaf-cell type, so any other kind means a caller
    /// pointed the wrong decoder at this page (the slot directory layout
    /// is shared across leaf B-Trees, but secondary-index, HNSW, and
    /// FTS leaves carry kind-specific cells decoded by `IndexCell::decode`,
    /// `HnswNodeCell::decode`, and `FtsPostingCell::decode` respectively).
    /// The named-kind error makes that mistake obvious next time.
    pub fn decode(buf: &[u8], pos: usize) -> Result<(PagedEntry, usize)> {
        let kind = Cell::peek_kind(buf, pos)?;
        match kind {
            KIND_LOCAL => {
                let (c, n) = Cell::decode(buf, pos)?;
                Ok((PagedEntry::Local(c), n))
            }
            KIND_OVERFLOW => {
                let (r, n) = OverflowRef::decode(buf, pos)?;
                Ok((PagedEntry::Overflow(r), n))
            }
            other => Err(SQLRiteError::Internal(format!(
                "PagedEntry::decode at offset {pos} got kind tag {other:#x} ({}); \
                 expected KIND_LOCAL (0x01) or KIND_OVERFLOW (0x02). \
                 The caller is reading a {} page with the table-leaf decoder.",
                kind_name(other),
                kind_btree_hint(other),
            ))),
        }
    }
}

/// Human-readable label for a cell kind tag — used in error messages
/// to make wrong-decoder mistakes self-explanatory.
fn kind_name(tag: u8) -> &'static str {
    match tag {
        crate::sql::pager::cell::KIND_LOCAL => "KIND_LOCAL",
        crate::sql::pager::cell::KIND_OVERFLOW => "KIND_OVERFLOW",
        crate::sql::pager::cell::KIND_INTERIOR => "KIND_INTERIOR",
        crate::sql::pager::cell::KIND_INDEX => "KIND_INDEX",
        crate::sql::pager::cell::KIND_HNSW => "KIND_HNSW",
        crate::sql::pager::cell::KIND_FTS_POSTING => "KIND_FTS_POSTING",
        _ => "unknown kind",
    }
}

/// Hint pointing at which B-Tree owns a cell of the given kind.
fn kind_btree_hint(tag: u8) -> &'static str {
    match tag {
        crate::sql::pager::cell::KIND_INTERIOR => "B-Tree interior",
        crate::sql::pager::cell::KIND_INDEX => "secondary-index",
        crate::sql::pager::cell::KIND_HNSW => "HNSW",
        crate::sql::pager::cell::KIND_FTS_POSTING => "FTS",
        _ => "non-table",
    }
}

/// Writes `bytes` into a chain of Overflow-typed pages, drawing each
/// page number from the supplied [`PageAllocator`]. Returns the page
/// number of the first link in the chain (the value to record in the
/// `OverflowRef` cell on the owning leaf).
///
/// Pages no longer have to be consecutive — the chain is followed by
/// `next_page` pointers, and the allocator may hand out pages from a
/// freelist or preferred pool that aren't sequential.
pub fn write_overflow_chain(
    pager: &mut Pager,
    bytes: &[u8],
    alloc: &mut crate::sql::pager::allocator::PageAllocator,
) -> Result<u32> {
    if bytes.is_empty() {
        return Err(SQLRiteError::Internal(
            "refusing to write an empty overflow chain — caller should inline instead".to_string(),
        ));
    }
    // Allocate every page in the chain up front so each stage_page call
    // already knows the successor's page number.
    let chunks: Vec<&[u8]> = bytes.chunks(PAYLOAD_PER_PAGE).collect();
    let pages: Vec<u32> = (0..chunks.len()).map(|_| alloc.allocate()).collect();
    for (i, chunk) in chunks.iter().enumerate() {
        let next = if i + 1 < pages.len() { pages[i + 1] } else { 0 };
        pager.stage_page(pages[i], encode_overflow_page(next, chunk)?);
    }
    Ok(pages[0])
}

/// Walks an overflow chain starting at `first_page` and concatenates its
/// payload bytes. Reads exactly `total_body_len` bytes — a mismatch between
/// what the chain carries and what the OverflowRef claims is a corruption
/// error.
pub fn read_overflow_chain(pager: &Pager, first_page: u32, total_body_len: u64) -> Result<Vec<u8>> {
    let mut out = Vec::with_capacity(total_body_len as usize);
    let mut current = first_page;
    while current != 0 {
        let raw = pager.read_page(current).ok_or_else(|| {
            SQLRiteError::Internal(format!("overflow chain references missing page {current}"))
        })?;
        let ty_byte = raw[0];
        if ty_byte != PageType::Overflow as u8 {
            return Err(SQLRiteError::Internal(format!(
                "page {current} was supposed to be Overflow but is type {ty_byte}"
            )));
        }
        let next = u32::from_le_bytes(raw[1..5].try_into().unwrap());
        let payload_len = u16::from_le_bytes(raw[5..7].try_into().unwrap()) as usize;
        if payload_len > PAYLOAD_PER_PAGE {
            return Err(SQLRiteError::Internal(format!(
                "overflow page {current} reports payload_len {payload_len} > max"
            )));
        }
        out.extend_from_slice(&raw[PAGE_HEADER_SIZE..PAGE_HEADER_SIZE + payload_len]);
        current = next;
    }
    if out.len() as u64 != total_body_len {
        return Err(SQLRiteError::Internal(format!(
            "overflow chain produced {} bytes, OverflowRef claimed {total_body_len}",
            out.len()
        )));
    }
    Ok(out)
}

/// Encodes a single `Overflow`-typed page holding `payload` bytes. Shared
/// with the rest of the pager via the standard 7-byte page header layout.
fn encode_overflow_page(next: u32, payload: &[u8]) -> Result<[u8; PAGE_SIZE]> {
    if payload.len() > PAYLOAD_PER_PAGE {
        return Err(SQLRiteError::Internal(format!(
            "overflow page payload {} exceeds max {PAYLOAD_PER_PAGE}",
            payload.len()
        )));
    }
    let mut buf = [0u8; PAGE_SIZE];
    buf[0] = PageType::Overflow as u8;
    buf[1..5].copy_from_slice(&next.to_le_bytes());
    buf[5..7].copy_from_slice(&(payload.len() as u16).to_le_bytes());
    buf[PAGE_HEADER_SIZE..PAGE_HEADER_SIZE + payload.len()].copy_from_slice(payload);
    Ok(buf)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::sql::db::table::Value;

    fn tmp_path(name: &str) -> std::path::PathBuf {
        let mut p = std::env::temp_dir();
        let pid = std::process::id();
        let nanos = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .map(|d| d.as_nanos())
            .unwrap_or(0);
        p.push(format!("sqlrite-overflow-{pid}-{nanos}-{name}.sqlrite"));
        p
    }

    #[test]
    fn overflow_ref_round_trip() {
        let r = OverflowRef {
            rowid: 42,
            total_body_len: 123_456,
            first_overflow_page: 7,
        };
        let bytes = r.encode();
        let (back, consumed) = OverflowRef::decode(&bytes, 0).unwrap();
        assert_eq!(back, r);
        assert_eq!(consumed, bytes.len());
    }

    #[test]
    fn paged_entry_dispatches_on_kind() {
        let local = Cell::new(1, vec![Some(Value::Integer(10))]);
        let local_bytes = local.encode().unwrap();
        let (decoded, _) = PagedEntry::decode(&local_bytes, 0).unwrap();
        assert_eq!(decoded, PagedEntry::Local(local));

        let overflow = OverflowRef {
            rowid: 2,
            total_body_len: 5000,
            first_overflow_page: 13,
        };
        let overflow_bytes = overflow.encode();
        let (decoded, _) = PagedEntry::decode(&overflow_bytes, 0).unwrap();
        assert_eq!(decoded, PagedEntry::Overflow(overflow));
    }

    /// SQLR-1 — `PagedEntry::decode` is the table-leaf-cell decoder.
    /// Pointing it at a secondary-index leaf used to surface as a
    /// cryptic `unknown paged-entry kind tag 0x4`. The new error
    /// message names the offending kind and the B-Tree the caller
    /// is mistakenly walking.
    #[test]
    fn paged_entry_decode_rejects_index_kind_with_clear_error() {
        use crate::sql::pager::index_cell::IndexCell;
        let ic = IndexCell::new(42, Value::Text("alice".into()));
        let bytes = ic.encode().unwrap();
        let err = PagedEntry::decode(&bytes, 0).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("KIND_INDEX"),
            "expected error to name KIND_INDEX, got: {msg}",
        );
        assert!(
            msg.contains("secondary-index"),
            "expected error to identify the secondary-index B-Tree, got: {msg}",
        );
    }

    /// Symmetric coverage for HNSW and FTS — same wrong-decoder shape,
    /// same diagnostic guarantee.
    #[test]
    fn paged_entry_decode_rejects_hnsw_and_fts_kinds() {
        use crate::sql::pager::cell::{KIND_FTS_POSTING, KIND_HNSW};
        // Build minimal byte sequences carrying the right kind tag at
        // the right offset. Body content past the kind tag doesn't
        // matter — we expect the dispatch to short-circuit on the tag.
        for (tag, hint) in [(KIND_HNSW, "HNSW"), (KIND_FTS_POSTING, "FTS")] {
            // body_len declared as 1 (the kind tag itself); body is the
            // tag and nothing else. Honest about what's in the buffer
            // so a future tightening of `peek_kind` doesn't bite us.
            let bytes = vec![/*body_len varint=*/ 1u8, tag];
            let err = PagedEntry::decode(&bytes, 0).unwrap_err();
            let msg = format!("{err}");
            assert!(
                msg.contains(hint),
                "expected error to identify the {hint} B-Tree, got: {msg}",
            );
        }
    }

    #[test]
    fn peek_rowid_works_for_both_kinds() {
        let local = Cell::new(99, vec![Some(Value::Integer(1))]);
        let local_bytes = local.encode().unwrap();
        assert_eq!(Cell::peek_rowid(&local_bytes, 0).unwrap(), 99);

        let overflow = OverflowRef {
            rowid: -7,
            total_body_len: 100,
            first_overflow_page: 42,
        };
        let overflow_bytes = overflow.encode();
        assert_eq!(Cell::peek_rowid(&overflow_bytes, 0).unwrap(), -7);
    }

    #[test]
    fn write_then_read_overflow_chain() {
        let path = tmp_path("chain");
        let mut pager = Pager::create(&path).unwrap();

        // A blob that definitely spans multiple pages.
        let blob: Vec<u8> = (0..10_000).map(|i| (i % 251) as u8).collect();
        let pages_needed = blob.len().div_ceil(PAYLOAD_PER_PAGE) as u32;
        let mut alloc =
            crate::sql::pager::allocator::PageAllocator::new(std::collections::VecDeque::new(), 10);
        let start = write_overflow_chain(&mut pager, &blob, &mut alloc).unwrap();
        assert_eq!(start, 10);
        // Linear allocation from page 10 → high water = 10 + pages_needed.
        assert_eq!(alloc.high_water(), 10 + pages_needed);

        pager
            .commit(crate::sql::pager::header::DbHeader {
                page_count: alloc.high_water(),
                schema_root_page: 1,
                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
                freelist_head: 0,
            })
            .unwrap();

        // Fresh pager to verify we read from disk.
        drop(pager);
        let pager = Pager::open(&path).unwrap();
        let back = read_overflow_chain(&pager, start, blob.len() as u64).unwrap();
        assert_eq!(back, blob);

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn read_overflow_chain_rejects_length_mismatch() {
        let path = tmp_path("mismatch");
        let mut pager = Pager::create(&path).unwrap();
        let blob = vec![1u8; 500];
        let mut alloc =
            crate::sql::pager::allocator::PageAllocator::new(std::collections::VecDeque::new(), 10);
        let start = write_overflow_chain(&mut pager, &blob, &mut alloc).unwrap();
        assert_eq!(start, 10);
        pager
            .commit(crate::sql::pager::header::DbHeader {
                page_count: alloc.high_water(),
                schema_root_page: 1,
                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
                freelist_head: 0,
            })
            .unwrap();

        // Claim more bytes than the chain actually carries.
        let err = read_overflow_chain(&pager, start, 999).unwrap_err();
        assert!(format!("{err}").contains("overflow chain produced"));

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn empty_chain_is_rejected() {
        let path = tmp_path("empty");
        let mut pager = Pager::create(&path).unwrap();
        let mut alloc =
            crate::sql::pager::allocator::PageAllocator::new(std::collections::VecDeque::new(), 10);
        let err = write_overflow_chain(&mut pager, &[], &mut alloc).unwrap_err();
        assert!(format!("{err}").contains("empty overflow chain"));
        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn overflow_threshold_is_reasonable() {
        // The threshold should leave room for at least 4 cells per page.
        assert!(OVERFLOW_THRESHOLD <= PAYLOAD_PER_PAGE / 4);
        // And it should be comfortably larger than a typical small cell.
        assert!(OVERFLOW_THRESHOLD > 200);
    }
}