sqlrite/sql/pager/header.rs
1//! Database file header (page 0).
2//!
3//! The first 28 bytes of every `.sqlrite` file identify the format and point
4//! at the schema catalog. The rest of page 0 is reserved for future use.
5
6use crate::error::{Result, SQLRiteError};
7use crate::sql::pager::page::PAGE_SIZE;
8
9/// File magic. Distinct from SQLite's `"SQLite format 3\0"` so the formats
10/// can't be confused on inspection.
11pub const MAGIC: &[u8; 16] = b"SQLRiteFormat\0\0\0";
12
13/// On-disk format revision. Bump when the page layout changes incompatibly.
14///
15/// History:
16/// - Version 1 (Phases 2 / 3a / 3b): schema catalog and table data were
17/// opaque bincode blobs chained across typed payload pages.
18/// - Version 2 (Phases 3c / 3d): tables are stored as cell-based B-Trees;
19/// the schema catalog is itself a table called `sqlrite_master` with
20/// four columns `(name, sql, rootpage, last_rowid)`.
21/// - Version 3 (Phase 3e): `sqlrite_master` gains a `type` column
22/// (first), distinguishing `'table'` and `'index'` rows; secondary
23/// indexes persist as their own cell-based B-Trees whose cells use
24/// the new `KIND_INDEX` format.
25/// - Version 4 (Phase 7): cell encoding gains the `KIND_VECTOR` value
26/// tag (length-prefixed dense f32 array) for the new `VECTOR(N)`
27/// column type, plus the `KIND_HNSW` cell tag for vector ANN
28/// indexes. All Phase 7 storage additions (VECTOR cells, JSON cells,
29/// HNSW index nodes) live inside the v4 envelope.
30/// - Version 5 (Phase 8c): adds the `KIND_FTS_POSTING` cell tag for
31/// persisted FTS posting lists. Bumped **on demand** — a database
32/// without any FTS index keeps writing v4. The first save with at
33/// least one FTS index attached writes v5 instead. Decoders accept
34/// both v4 and v5; v5 reading a v4-shaped DB just sees zero FTS
35/// indexes in `sqlrite_master`. See [Phase 8 plan Q10].
36/// - Version 6 (SQLR-6): adds a persisted free-page list at header
37/// bytes [28..32] (`freelist_head`) plus the `PAGE_TYPE_FREELIST_TRUNK`
38/// page tag. Bumped **on demand** — a save that produces no freed
39/// pages keeps writing the file's existing version. The first save
40/// that yields a non-empty freelist promotes the file to v6.
41pub const FORMAT_VERSION_V4: u16 = 4;
42pub const FORMAT_VERSION_V5: u16 = 5;
43pub const FORMAT_VERSION_V6: u16 = 6;
44/// The version a brand-new write defaults to when no FTS index forces
45/// a bump. Existing databases keep their on-disk version unchanged
46/// across reads + non-FTS writes; FTS-bearing saves switch to V5,
47/// freelist-bearing saves switch to V6.
48pub const FORMAT_VERSION_BASELINE: u16 = FORMAT_VERSION_V4;
49
50/// Parsed header. `page_count` includes page 0 itself.
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub struct DbHeader {
53 pub page_count: u32,
54 pub schema_root_page: u32,
55 /// On-disk format version this header carries. Tracked explicitly
56 /// so save can preserve a v4 file as v4 (no FTS, no freelist),
57 /// bump it to v5 (FTS), or bump it to v6 (freelist), per the
58 /// on-demand promotion rules.
59 pub format_version: u16,
60 /// First page of the persisted free-page list, or `0` if the list
61 /// is empty. The freelist is a chain of trunk pages; each trunk
62 /// records up to ~1018 free leaf-page numbers. v4/v5 files don't
63 /// carry a freelist on disk — `decode_header` returns `0` for them.
64 pub freelist_head: u32,
65}
66
67/// Encodes the header into a `PAGE_SIZE`-sized buffer.
68pub fn encode_header(h: &DbHeader) -> [u8; PAGE_SIZE] {
69 let mut buf = [0u8; PAGE_SIZE];
70 buf[0..16].copy_from_slice(MAGIC);
71 buf[16..18].copy_from_slice(&h.format_version.to_le_bytes());
72 buf[18..20].copy_from_slice(&(PAGE_SIZE as u16).to_le_bytes());
73 buf[20..24].copy_from_slice(&h.page_count.to_le_bytes());
74 buf[24..28].copy_from_slice(&h.schema_root_page.to_le_bytes());
75 buf[28..32].copy_from_slice(&h.freelist_head.to_le_bytes());
76 buf
77}
78
79/// Decodes the header from a `PAGE_SIZE`-sized buffer. Returns an error if
80/// magic bytes, format version, or page size don't match what we wrote.
81/// V4, V5, and V6 are accepted; the result's `format_version` echoes
82/// what was on disk so a no-op resave preserves it. `freelist_head` is
83/// read from bytes [28..32] for V6 files; V4/V5 files have a zero
84/// reserved region there, so the field decodes as `0` either way.
85pub fn decode_header(buf: &[u8]) -> Result<DbHeader> {
86 if buf.len() != PAGE_SIZE {
87 return Err(SQLRiteError::Internal(format!(
88 "header buffer length {} != PAGE_SIZE {PAGE_SIZE}",
89 buf.len()
90 )));
91 }
92 if &buf[0..16] != MAGIC {
93 return Err(SQLRiteError::General(
94 "file is not a SQLRite database (bad magic bytes)".to_string(),
95 ));
96 }
97 let version = u16::from_le_bytes(buf[16..18].try_into().unwrap());
98 if version != FORMAT_VERSION_V4 && version != FORMAT_VERSION_V5 && version != FORMAT_VERSION_V6
99 {
100 return Err(SQLRiteError::General(format!(
101 "unsupported SQLRite format version {version}; this build understands \
102 {FORMAT_VERSION_V4}, {FORMAT_VERSION_V5}, and {FORMAT_VERSION_V6}"
103 )));
104 }
105 let page_size = u16::from_le_bytes(buf[18..20].try_into().unwrap()) as usize;
106 if page_size != PAGE_SIZE {
107 return Err(SQLRiteError::General(format!(
108 "unsupported page size {page_size}; this build expects {PAGE_SIZE}"
109 )));
110 }
111 let page_count = u32::from_le_bytes(buf[20..24].try_into().unwrap());
112 let schema_root_page = u32::from_le_bytes(buf[24..28].try_into().unwrap());
113 let freelist_head = u32::from_le_bytes(buf[28..32].try_into().unwrap());
114 Ok(DbHeader {
115 page_count,
116 schema_root_page,
117 format_version: version,
118 freelist_head,
119 })
120}