Skip to main content

sqlrite/sql/pager/
pager.rs

1//! Long-lived page cache + WAL-backed commits.
2//!
3//! A `Pager` wraps an open `.sqlrite` file plus its `-wal` sidecar. It owns
4//! three maps of page bytes:
5//!
6//! - `on_disk`:   snapshot of the main file as last checkpointed. Frozen
7//!                across regular commits — the main file is only rewritten
8//!                when the checkpointer (Phase 4d) runs.
9//! - `wal_cache`: latest committed body for each page that has been
10//!                appended to the WAL since the last checkpoint. Populated
11//!                at open by replaying the WAL, and kept in lockstep with
12//!                each successful `commit`.
13//! - `staged`:    pages queued for the next commit, not yet in the WAL.
14//!
15//! **Read precedence.** `read_page` consults `staged → wal_cache → on_disk`,
16//! so both uncommitted writes and WAL-resident committed writes shadow the
17//! frozen main file. A bounds check against `current_header.page_count`
18//! hides pages that have been logically truncated by a shrink-commit even
19//! though their bytes are still present in `on_disk` (the real truncation
20//! waits for the checkpointer).
21//!
22//! **Commit flow.** `commit` compares each staged page against the
23//! effective committed state (wal_cache layered on on_disk) and appends a
24//! WAL frame only for pages whose bytes actually differ. A final "commit"
25//! frame for page 0 carries the new encoded header and the post-commit
26//! page count in its `commit_page_count` field. That frame is fsync'd.
27//! The main file is not touched.
28//!
29//! **Checkpoint flow (Phase 4d).** When the WAL accumulates past
30//! `AUTO_CHECKPOINT_THRESHOLD_FRAMES` frames (tracked on `Wal`), `commit`
31//! opportunistically folds them back into the main file: write every
32//! WAL-resident page at its proper offset, overwrite the main-file
33//! header, truncate the file to `page_count * PAGE_SIZE` bytes, `fsync`,
34//! then `Wal::truncate` the sidecar (which rolls the salt so any stale
35//! tail bytes from the old generation can't be misread as valid). Reads
36//! stay consistent if a crash hits mid-checkpoint — the WAL still holds
37//! the authoritative bytes until its header is rewritten, and the
38//! checkpointer is idempotent, so rerunning is safe.
39//!
40//! This matters because higher layers re-serialize the entire database on
41//! every auto-save. Without the diff, even a one-row UPDATE would append a
42//! frame for every page of every table. With the diff, unchanged tables —
43//! whose encoded pages hash identically across saves — simply stay out of
44//! the WAL.
45//!
46//! **Locking (Phase 4a → 4e).** Every `Pager` takes an advisory lock on
47//! its main file and on its WAL sidecar. The mode is driven by
48//! [`AccessMode`]:
49//!
50//! - `ReadWrite` → `flock(LOCK_EX)` — one writer, no other openers.
51//! - `ReadOnly`  → `flock(LOCK_SH)` — multiple readers coexist; any writer
52//!   is excluded.
53//!
54//! Both locks are tied to their file descriptors and release
55//! automatically when the `Pager` drops. On collision the opener gets
56//! a clean typed error rather than racing silently. POSIX flock is
57//! "multiple readers OR one writer", not both — true concurrent
58//! reader-and-writer access would need a shared-memory coordination
59//! file and read marks, which is not on the roadmap.
60
61use std::collections::HashMap;
62use std::fs::{File, OpenOptions};
63use std::path::{Path, PathBuf};
64
65use crate::error::{Result, SQLRiteError};
66use crate::sql::pager::file::FileStorage;
67use crate::sql::pager::header::{DbHeader, decode_header, encode_header};
68use crate::sql::pager::page::PAGE_SIZE;
69use crate::sql::pager::wal::Wal;
70
71/// Returns the WAL sidecar path for a main `.sqlrite` file: appends
72/// the `-wal` suffix to the full path (so `foo.sqlrite` pairs with
73/// `foo.sqlrite-wal`). Matches SQLite's convention.
74pub(crate) fn wal_path_for(main: &Path) -> PathBuf {
75    let mut os = main.as_os_str().to_owned();
76    os.push("-wal");
77    PathBuf::from(os)
78}
79
80/// How a `Pager` (or `Wal`) intends to use the file: mutating writes vs.
81/// consistent-snapshot reads. Drives the OS-level lock mode, and the
82/// Pager uses it to reject mutation attempts on read-only openers.
83///
84/// - `ReadWrite` takes `flock(LOCK_EX)` — one writer, no other openers.
85/// - `ReadOnly`  takes `flock(LOCK_SH)` — multiple readers can coexist;
86///   a writer is excluded.
87///
88/// This is POSIX-flock semantics, so "multiple readers AND one writer"
89/// isn't supported yet. True concurrent reader-writer access would need
90/// a shared-memory coordination file and read marks — that's deferred.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum AccessMode {
93    ReadWrite,
94    ReadOnly,
95}
96
97/// Acquires an advisory lock on `file`, mapping the OS-level "lock
98/// held" error to a clean SQLRite error. `Exclusive` on Unix is
99/// `flock(LOCK_EX | LOCK_NB)`; `Shared` is `flock(LOCK_SH | LOCK_NB)`.
100/// On Windows, `LockFileEx` with the corresponding flags.
101///
102/// We call fs2's trait methods fully qualified because `std::fs::File`
103/// gained its own `try_lock_*` inherent methods in Rust 1.84 with a
104/// different error type — qualifying nails down which one we mean.
105#[cfg(feature = "file-locks")]
106pub(crate) fn acquire_lock(file: &File, path: &Path, mode: AccessMode) -> Result<()> {
107    let res = match mode {
108        AccessMode::ReadWrite => fs2::FileExt::try_lock_exclusive(file),
109        AccessMode::ReadOnly => fs2::FileExt::try_lock_shared(file),
110    };
111    res.map_err(|e| {
112        let how = match mode {
113            AccessMode::ReadWrite => {
114                "is in use (another process has it open; readers and writers are exclusive)"
115            }
116            AccessMode::ReadOnly => {
117                "is locked for writing by another process (read-only open blocked until the writer closes)"
118            }
119        };
120        SQLRiteError::General(format!(
121            "database '{}' {how} ({e})",
122            path.display()
123        ))
124    })
125}
126
127/// No-op variant for builds without the `file-locks` feature (most
128/// notably the WASM SDK, where `fs2` doesn't compile against
129/// wasm32-unknown-unknown). The Pager still refuses to touch a
130/// read-only open via `AccessMode`, but there's no OS-level
131/// multi-process coordination — the caller is trusted to avoid
132/// conflicting opens. Fine for WASM, where file-backed opens
133/// aren't exposed in the MVP anyway.
134#[cfg(not(feature = "file-locks"))]
135pub(crate) fn acquire_lock(_file: &File, _path: &Path, _mode: AccessMode) -> Result<()> {
136    Ok(())
137}
138
139/// How many WAL frames may accumulate between auto-checkpoints before
140/// `commit` opportunistically folds them back into the main file. Kept
141/// low enough that the WAL stays bounded on write-heavy workloads;
142/// high enough that small bursts don't thrash the main file. SQLite
143/// defaults to 1000; our target DBs are smaller so 100 is plenty.
144const AUTO_CHECKPOINT_THRESHOLD_FRAMES: usize = 100;
145
146pub struct Pager {
147    /// Main-file I/O handle. Regular commits leave it alone; the
148    /// checkpointer writes accumulated WAL pages back here.
149    storage: FileStorage,
150    current_header: DbHeader,
151    /// Byte snapshot of the main file as last checkpointed. The
152    /// checkpointer is the only thing that mutates it.
153    on_disk: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
154    /// Pages queued for the next commit. `commit` drains this.
155    staged: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
156    /// The committed WAL's view of each page. Populated at open by
157    /// replaying the log, and kept in sync with each successful commit.
158    /// Layered on top of `on_disk` for read resolution.
159    wal_cache: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
160    /// Write-ahead log sidecar. Present on a read-write Pager; `None`
161    /// on a read-only Pager that either found no WAL on disk or doesn't
162    /// retain the handle after initial replay. Reads consult
163    /// `wal_cache` (already populated at open) either way.
164    wal: Option<Wal>,
165    /// `ReadWrite` allows `commit` / `checkpoint`; `ReadOnly` rejects
166    /// them with a typed error. `stage_page` stays open on both modes
167    /// (it only touches the in-memory `staged` map) — any staged bytes
168    /// simply never reach disk on a read-only Pager because `commit` is
169    /// the gate.
170    access_mode: AccessMode,
171}
172
173impl Pager {
174    /// Opens an existing database file for read-write access. Shorthand
175    /// for [`Pager::open_with_mode`] with [`AccessMode::ReadWrite`].
176    pub fn open(path: &Path) -> Result<Self> {
177        Self::open_with_mode(path, AccessMode::ReadWrite)
178    }
179
180    /// Opens an existing database file for read-only access — takes
181    /// a shared advisory lock that coexists with other readers but is
182    /// excluded by any writer. `commit` and `checkpoint` return a clean
183    /// error rather than panic; `stage_page` stays a no-op-to-disk
184    /// (bytes sit in the in-memory `staged` map that `commit` would
185    /// have drained).
186    ///
187    /// If the WAL sidecar doesn't exist, the open succeeds with an
188    /// empty `wal_cache` — a read-only caller can't materialize a
189    /// sidecar on its own, and a DB that never had WAL writes is fine
190    /// to read straight from the main file.
191    pub fn open_read_only(path: &Path) -> Result<Self> {
192        Self::open_with_mode(path, AccessMode::ReadOnly)
193    }
194
195    /// Opens an existing database file with the given access mode.
196    /// Loads every main-file page into `on_disk`, then opens the WAL
197    /// sidecar (read-only mode uses a shared lock and skips sidecar
198    /// creation; read-write creates the sidecar if missing) and layers
199    /// committed frames into `wal_cache`.
200    pub fn open_with_mode(path: &Path, mode: AccessMode) -> Result<Self> {
201        let file = match mode {
202            AccessMode::ReadWrite => OpenOptions::new().read(true).write(true).open(path)?,
203            AccessMode::ReadOnly => OpenOptions::new().read(true).open(path)?,
204        };
205        acquire_lock(&file, path, mode)?;
206        let mut storage = FileStorage::new(file);
207        let mut header = storage.read_header()?;
208
209        let mut on_disk = HashMap::with_capacity(header.page_count.saturating_sub(1) as usize);
210        // page 0 is the header itself; regular pages live at 1..page_count.
211        for page_num in 1..header.page_count {
212            let buf = read_raw_page(&mut storage, page_num)?;
213            on_disk.insert(page_num, buf);
214        }
215
216        let wal_path = wal_path_for(path);
217        let (wal_handle, wal_cache) = match mode {
218            AccessMode::ReadWrite => {
219                // Create the sidecar if it's missing — a pre-Phase-4c
220                // file or a DB that was hand-deleted down to just the
221                // main file both need a fresh empty WAL to be writable.
222                let mut wal = if wal_path.exists() {
223                    Wal::open_with_mode(&wal_path, mode)?
224                } else {
225                    Wal::create(&wal_path)?
226                };
227                let mut cache: HashMap<u32, Box<[u8; PAGE_SIZE]>> = HashMap::new();
228                wal.load_committed_into(&mut cache)?;
229                (Some(wal), cache)
230            }
231            AccessMode::ReadOnly => {
232                // Read-only mustn't create files. If the sidecar is
233                // absent, treat the WAL as empty and serve reads from
234                // the main file alone.
235                if wal_path.exists() {
236                    let mut wal = Wal::open_with_mode(&wal_path, mode)?;
237                    let mut cache: HashMap<u32, Box<[u8; PAGE_SIZE]>> = HashMap::new();
238                    wal.load_committed_into(&mut cache)?;
239                    // We don't need to retain the WAL handle in
240                    // read-only mode — the cache is all reads need and
241                    // dropping the handle releases the shared lock on
242                    // the sidecar early. Keep it, though, so the lock
243                    // spans the whole Pager lifetime: a checkpointer
244                    // process grabbing LOCK_EX on the WAL while our
245                    // reader still has wal_cache loaded would be
246                    // correct for reads but surprising semantically.
247                    (Some(wal), cache)
248                } else {
249                    (None, HashMap::new())
250                }
251            }
252        };
253
254        // If the WAL committed a new page 0, that frame's body is the
255        // up-to-date header — decode it and let it override what the
256        // main file's stale header says.
257        if let Some(page0) = wal_cache.get(&0) {
258            header = decode_header(page0.as_ref())?;
259        } else if let Some(w) = wal_handle.as_ref()
260            && let Some(committed_pc) = w.last_commit_page_count()
261        {
262            // Belt-and-suspenders: even if the latest commit frame didn't
263            // land on page 0 (shouldn't happen under the current commit
264            // layout, but keeps us correct if that ever changes), trust
265            // its page count.
266            header.page_count = committed_pc;
267        }
268
269        Ok(Self {
270            storage,
271            current_header: header,
272            on_disk,
273            staged: HashMap::new(),
274            wal_cache,
275            wal: wal_handle,
276            access_mode: mode,
277        })
278    }
279
280    /// Creates a fresh database file. Page 0 is the header; page 1 is an
281    /// empty `TableLeaf` that serves as the initial `sqlrite_master` root
282    /// (zero rows, no user tables yet). A matching empty WAL sidecar is
283    /// created alongside it — any pre-existing WAL at the target path is
284    /// truncated.
285    pub fn create(path: &Path) -> Result<Self> {
286        use crate::sql::pager::page::{PAGE_HEADER_SIZE, PageType};
287        use crate::sql::pager::table_page::TablePage;
288
289        let file = OpenOptions::new()
290            .read(true)
291            .write(true)
292            .create(true)
293            .truncate(true)
294            .open(path)?;
295        acquire_lock(&file, path, AccessMode::ReadWrite)?;
296        let mut storage = FileStorage::new(file);
297
298        let empty_master = TablePage::empty();
299        let mut page1 = Box::new([0u8; PAGE_SIZE]);
300        page1[0] = PageType::TableLeaf as u8;
301        page1[1..5].copy_from_slice(&0u32.to_le_bytes());
302        page1[5..7].copy_from_slice(&0u16.to_le_bytes());
303        page1[PAGE_HEADER_SIZE..].copy_from_slice(empty_master.as_bytes());
304
305        let header = DbHeader {
306            page_count: 2,
307            schema_root_page: 1,
308            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
309            freelist_head: 0,
310        };
311
312        // Write the file synchronously so the initial create is durable and
313        // subsequent `Pager::open` calls see a valid header + page 1.
314        storage.seek_to(0)?;
315        storage.write_all(&encode_header(&header))?;
316        storage.write_all(page1.as_ref())?;
317        storage.flush()?;
318
319        // Sidecar WAL — fresh, no frames yet.
320        let wal = Wal::create(&wal_path_for(path))?;
321
322        let mut on_disk = HashMap::new();
323        on_disk.insert(1, page1);
324
325        Ok(Self {
326            storage,
327            current_header: header,
328            on_disk,
329            staged: HashMap::new(),
330            wal_cache: HashMap::new(),
331            wal: Some(wal),
332            access_mode: AccessMode::ReadWrite,
333        })
334    }
335
336    pub fn header(&self) -> DbHeader {
337        self.current_header
338    }
339
340    /// Returns the mode this Pager was opened in. Callers can use this
341    /// to bail out of a write path earlier than the Pager itself would.
342    pub fn access_mode(&self) -> AccessMode {
343        self.access_mode
344    }
345
346    fn require_writable(&self, op: &'static str) -> Result<()> {
347        if self.access_mode == AccessMode::ReadOnly {
348            return Err(SQLRiteError::General(format!(
349                "cannot {op}: database is opened read-only"
350            )));
351        }
352        Ok(())
353    }
354
355    /// Reads a page, preferring staged content, then the WAL-committed
356    /// overlay, then the frozen main-file snapshot. Returns `None` for
357    /// pages beyond the current page count (pages that have been logically
358    /// truncated by a shrink-commit stay in `on_disk` until checkpoint,
359    /// but a bounds check hides them from readers).
360    pub fn read_page(&self, page_num: u32) -> Option<&[u8; PAGE_SIZE]> {
361        // Staged pages are "the future" and should always shadow everything
362        // else, even pages we're about to extend beyond the old page count.
363        if let Some(b) = self.staged.get(&page_num) {
364            return Some(b);
365        }
366        // A page that's been logically dropped shouldn't be readable even
367        // if its bytes linger in on_disk until the next checkpoint.
368        if page_num >= self.current_header.page_count {
369            return None;
370        }
371        if let Some(b) = self.wal_cache.get(&page_num) {
372            return Some(b.as_ref());
373        }
374        self.on_disk.get(&page_num).map(|b| b.as_ref())
375    }
376
377    /// Queues `bytes` as the new content of page `page_num`. The write only
378    /// reaches disk when `commit` is called.
379    pub fn stage_page(&mut self, page_num: u32, bytes: [u8; PAGE_SIZE]) {
380        self.staged.insert(page_num, Box::new(bytes));
381    }
382
383    /// Discards all staged pages. Useful when beginning a new full re-save
384    /// from scratch; the higher layer can also just overwrite pages without
385    /// clearing since `stage_page` replaces.
386    pub fn clear_staged(&mut self) {
387        self.staged.clear();
388    }
389
390    /// Phase 11.9 — appends an MVCC commit-batch frame to the WAL
391    /// without fsync. The next legacy page-commit's fsync covers it,
392    /// so callers should follow this with `Pager::commit` (or
393    /// `pager::save_database`, which calls into it) to seal the
394    /// transaction. See [`crate::sql::pager::wal::Wal::append_mvcc_batch`]
395    /// for the durability story.
396    ///
397    /// `Connection::commit_concurrent` is the only caller today; it
398    /// invokes this after validation passes but before the legacy
399    /// save so a single fsync covers both the MVCC log record and
400    /// the page-level updates.
401    pub fn append_mvcc_batch(&mut self, batch: &crate::mvcc::MvccCommitBatch) -> Result<()> {
402        self.require_writable("append_mvcc_batch")?;
403        let wal = self
404            .wal
405            .as_mut()
406            .expect("read-write Pager must carry a WAL handle");
407        wal.append_mvcc_batch(batch)
408    }
409
410    /// Phase 11.9 — MVCC commit batches recovered from the WAL at
411    /// open time, in commit order. Empty for fresh databases, for
412    /// pre-11.9 (v1 / v2) WALs that carry no MVCC frames, and for
413    /// read-only opens that didn't replay (those still get the
414    /// batches if the WAL had any — replay is unconditional in
415    /// `Wal::open_with_mode`).
416    ///
417    /// The caller (`pager::open_database`) drains this into
418    /// `Database::mv_store` so the conflict-detection window
419    /// survives a process restart.
420    pub fn recovered_mvcc_commits(&self) -> &[crate::mvcc::MvccCommitBatch] {
421        match self.wal.as_ref() {
422            Some(wal) => wal.recovered_mvcc_commits(),
423            None => &[],
424        }
425    }
426
427    /// Phase 11.9 — returns the persisted MVCC clock high-water
428    /// from the WAL header, or 0 for in-memory / no-WAL opens. The
429    /// open path uses this to seed [`crate::mvcc::MvccClock`] so
430    /// post-reopen transactions don't hand out timestamps below
431    /// `max(committed_ts)`.
432    pub fn clock_high_water(&self) -> u64 {
433        self.wal.as_ref().map(|w| w.clock_high_water()).unwrap_or(0)
434    }
435
436    /// Phase 11.9 — promotes the WAL header's `clock_high_water` to
437    /// `value` if it would advance. No-op if `value` is at or below
438    /// the current high-water mark. Persists the new value into
439    /// the WAL header (which an fsync at checkpoint will flush).
440    ///
441    /// Called by `Connection::commit_concurrent` after each MVCC
442    /// commit so a crash between commits and the next checkpoint
443    /// leaves enough of the clock persisted that replay seeds
444    /// `MvccClock` correctly.
445    pub fn observe_clock_high_water(&mut self, value: u64) -> Result<()> {
446        if let Some(wal) = self.wal.as_mut() {
447            if value > wal.clock_high_water() {
448                wal.set_clock_high_water(value)?;
449            }
450        }
451        Ok(())
452    }
453
454    /// Commits all staged pages into the WAL. Only pages whose bytes differ
455    /// from the effective committed state (wal_cache layered on on_disk)
456    /// produce frames. A final commit frame carries the new page 0 (encoded
457    /// header) and is fsync'd; that seals the transaction. The main file is
458    /// left untouched — it only changes when the checkpointer (Phase 4d)
459    /// runs.
460    ///
461    /// Returns the number of dirty *data* frames appended (excluding the
462    /// implicit page-0 commit frame that's always written).
463    pub fn commit(&mut self, new_header: DbHeader) -> Result<usize> {
464        self.require_writable("commit")?;
465        let wal = self
466            .wal
467            .as_mut()
468            .expect("read-write Pager must carry a WAL handle");
469
470        // Decide which staged pages carry bytes that aren't already live.
471        // Effective committed state = wal_cache overlaid on on_disk.
472        let staged = std::mem::take(&mut self.staged);
473        let mut dirty: Vec<(u32, Box<[u8; PAGE_SIZE]>)> = staged
474            .into_iter()
475            .filter(|(n, bytes)| {
476                let existing = self.wal_cache.get(n).or_else(|| self.on_disk.get(n));
477                match existing {
478                    Some(e) => e.as_ref() != bytes.as_ref(),
479                    None => true,
480                }
481            })
482            .collect();
483        // Append in ascending page order so the log replays deterministically
484        // and sequential reads during checkpoint stay sequential.
485        dirty.sort_by_key(|(n, _)| *n);
486        let writes = dirty.len();
487
488        for (n, bytes) in &dirty {
489            wal.append_frame(*n, bytes.as_ref(), None)?;
490        }
491
492        // Seal the transaction. The commit frame carries the new page 0
493        // (encoded header) in its body and the new page count in its
494        // commit_page_count field — together they're the single atomic
495        // record that says "this is the new committed state".
496        let page0 = encode_header(&new_header);
497        wal.append_frame(0, &page0, Some(new_header.page_count))?;
498        let frame_count_after_commit = wal.frame_count();
499
500        // Promote every frame we just wrote into wal_cache so subsequent
501        // reads see the latest committed bytes without touching the WAL.
502        for (n, bytes) in dirty {
503            self.wal_cache.insert(n, bytes);
504        }
505        self.wal_cache.insert(0, Box::new(page0));
506
507        self.current_header = new_header;
508
509        // Keep the WAL bounded. Under write-heavy load, un-flushed frames
510        // accumulate; past the threshold we fold them back into the main
511        // file opportunistically so open doesn't have to replay an
512        // arbitrarily long log on the next start.
513        if frame_count_after_commit >= AUTO_CHECKPOINT_THRESHOLD_FRAMES {
514            self.checkpoint()?;
515        }
516
517        Ok(writes)
518    }
519
520    /// Folds all WAL-resident pages back into the main file and truncates
521    /// the WAL. Returns the number of data pages written to the main
522    /// file (excludes the header).
523    ///
524    /// **Crash safety — two fsync barriers.** The main-file writes happen
525    /// in two phases separated by a barrier, matching SQLite's checkpoint
526    /// ordering:
527    ///
528    /// 1. Write every `wal_cache` data page at its `page_num * PAGE_SIZE`
529    ///    offset in the main file.
530    /// 2. **`fsync`** — force those data pages to stable storage *before*
531    ///    the header publishes the new state. Without this barrier, a
532    ///    filesystem or disk-cache reordering could land the header first,
533    ///    leaving a main file that claims "N pages" over stale data.
534    /// 3. Rewrite the main-file header at offset 0. This is the
535    ///    checkpoint's "commit point" — after it hits disk the main file
536    ///    alone tells the truth.
537    /// 4. `set_len` shrinks the tail if `page_count` dropped.
538    /// 5. **`fsync`** — force the header + set_len durable.
539    /// 6. `Wal::truncate` resets the sidecar (rolls salt, writes new
540    ///    header, fsync). Running this *after* the main file is fully
541    ///    durable means a crash between 5 and 6 leaves a stale WAL over a
542    ///    current main file; readers still see the right bytes because
543    ///    wal_cache (replayed from the stale WAL on next open) would be
544    ///    byte-identical to what's in the main file. A retry of
545    ///    `checkpoint` then truncates cleanly.
546    ///
547    /// A crash between 1 and 2 can leave partial data-page writes, but
548    /// since the header hasn't moved yet, the main file still reads as
549    /// its pre-checkpoint self — the WAL is intact and authoritative,
550    /// and a retry rewrites the same bytes.
551    pub fn checkpoint(&mut self) -> Result<usize> {
552        self.require_writable("checkpoint")?;
553        // `require_writable` already guaranteed we're ReadWrite; in
554        // ReadWrite mode `wal` is always `Some` (it's only `None` for
555        // ReadOnly opens of a DB that had no sidecar on disk).
556        let wal_frame_count = self.wal.as_ref().map(|w| w.frame_count()).unwrap_or(0);
557
558        // Nothing to flush? Skip the fsyncs and get out.
559        if wal_frame_count == 0 && self.wal_cache.is_empty() {
560            return Ok(0);
561        }
562
563        // Step 1 — write every WAL-resident data page to the main file.
564        // Page 0 (header) is handled separately via write_header, and any
565        // pages past the new page count are skipped here (set_len will
566        // drop them when the file shrinks).
567        let page_count = self.current_header.page_count;
568        let mut pages: Vec<u32> = self
569            .wal_cache
570            .keys()
571            .copied()
572            .filter(|&n| n != 0 && n < page_count)
573            .collect();
574        pages.sort_unstable();
575        let written = pages.len();
576        for page_num in &pages {
577            let bytes = self
578                .wal_cache
579                .get(page_num)
580                .expect("iterated key must resolve");
581            self.storage
582                .seek_to((*page_num as u64) * (PAGE_SIZE as u64))?;
583            self.storage.write_all(bytes.as_ref())?;
584        }
585
586        // Step 2 — first durability barrier. Data pages must hit stable
587        // storage before the header publishes the new page count /
588        // schema root, or a reordered writeback could expose a
589        // half-migrated file on crash.
590        if written > 0 {
591            self.storage.flush()?;
592        }
593
594        // Step 3 — rewrite the main-file header. This is the checkpoint's
595        // atomic record.
596        self.storage.write_header(&self.current_header)?;
597
598        // Step 4 — shrink the main file if the committed page count is
599        // smaller than what the file physically holds.
600        self.storage.truncate_to_pages(page_count)?;
601
602        // Step 5 — second durability barrier. Makes header + set_len
603        // durable together before we touch the WAL.
604        self.storage.flush()?;
605
606        // Step 6 — reset the WAL sidecar. Runs before the in-memory
607        // cache swap so that if `wal.truncate` fails (disk full, EIO)
608        // we leave the in-memory state untouched rather than having
609        // wal_cache empty + on_disk updated + WAL un-truncated, which
610        // the Pager can't easily recover from on its own. Here a
611        // failure means the main file is already consistent on disk
612        // (steps 2 + 5 fsynced it); we just leave the stale WAL in
613        // place for the next checkpoint attempt.
614        self.wal
615            .as_mut()
616            .expect("read-write Pager must carry a WAL handle")
617            .truncate()?;
618
619        // Promote wal_cache into on_disk and drop everything that's no
620        // longer live. Page 0 is special — it's never materialized in
621        // on_disk (we read it lazily via storage.read_header on open).
622        for (n, bytes) in self.wal_cache.drain().filter(|(n, _)| *n != 0) {
623            if n < page_count {
624                self.on_disk.insert(n, bytes);
625            }
626        }
627        self.on_disk.retain(|&n, _| n < page_count);
628
629        Ok(written)
630    }
631}
632
633fn read_raw_page(storage: &mut FileStorage, page_num: u32) -> Result<Box<[u8; PAGE_SIZE]>> {
634    storage.seek_to((page_num as u64) * (PAGE_SIZE as u64))?;
635    let mut buf = Box::new([0u8; PAGE_SIZE]);
636    storage.read_exact(buf.as_mut())?;
637    Ok(buf)
638}
639
640impl std::fmt::Debug for Pager {
641    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
642        f.debug_struct("Pager")
643            .field("access_mode", &self.access_mode)
644            .field("page_count", &self.current_header.page_count)
645            .field("schema_root_page", &self.current_header.schema_root_page)
646            .field("cached_pages", &self.on_disk.len())
647            .field("staged_pages", &self.staged.len())
648            .field("wal_pages", &self.wal_cache.len())
649            .field(
650                "wal_frames",
651                &self.wal.as_ref().map(|w| w.frame_count()).unwrap_or(0),
652            )
653            .finish()
654    }
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    fn tmp_path(name: &str) -> std::path::PathBuf {
662        let mut p = std::env::temp_dir();
663        let pid = std::process::id();
664        let nanos = std::time::SystemTime::now()
665            .duration_since(std::time::UNIX_EPOCH)
666            .map(|d| d.as_nanos())
667            .unwrap_or(0);
668        p.push(format!("sqlrite-pager-{pid}-{nanos}-{name}.sqlrite"));
669        p
670    }
671
672    /// Remove both the main file and its `-wal` sidecar — leaving either
673    /// behind can destabilize later test runs on the same tmp dir.
674    fn cleanup(path: &Path) {
675        let _ = std::fs::remove_file(path);
676        let _ = std::fs::remove_file(wal_path_for(path));
677    }
678
679    fn make_page(first_byte: u8) -> [u8; PAGE_SIZE] {
680        let mut buf = [0u8; PAGE_SIZE];
681        buf[0] = first_byte;
682        buf
683    }
684
685    #[test]
686    fn create_then_open_round_trips() {
687        let path = tmp_path("create_open");
688        {
689            let p = Pager::create(&path).unwrap();
690            assert_eq!(p.header().page_count, 2);
691            assert_eq!(p.header().schema_root_page, 1);
692        }
693        let p2 = Pager::open(&path).unwrap();
694        assert_eq!(p2.header().page_count, 2);
695        cleanup(&path);
696    }
697
698    #[test]
699    fn create_spawns_wal_sidecar() {
700        // Phase 4c: `Pager::create` must produce an empty WAL sidecar
701        // alongside the main file so the first commit has somewhere to
702        // append frames.
703        use crate::sql::pager::wal::WAL_HEADER_SIZE;
704        let path = tmp_path("wal_sidecar");
705        let _p = Pager::create(&path).unwrap();
706        let wal = wal_path_for(&path);
707        assert!(wal.exists(), "WAL sidecar should exist after create");
708        // An empty WAL is just its header.
709        let len = std::fs::metadata(&wal).unwrap().len();
710        assert_eq!(
711            len, WAL_HEADER_SIZE as u64,
712            "fresh WAL should be header-only"
713        );
714        cleanup(&path);
715    }
716
717    #[test]
718    fn commit_writes_only_dirty_pages() {
719        let path = tmp_path("diff");
720        let mut p = Pager::create(&path).unwrap();
721
722        // Initial state: page 1 is the empty-catalog schema page.
723        // Stage three "table-data" pages.
724        p.stage_page(2, make_page(0xAA));
725        p.stage_page(3, make_page(0xBB));
726        p.stage_page(4, make_page(0xCC));
727        let writes = p
728            .commit(DbHeader {
729                page_count: 5,
730                schema_root_page: 1,
731                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
732                freelist_head: 0,
733            })
734            .unwrap();
735        // 3 dirty data pages (pages 2, 3, 4). The page-0 commit frame is
736        // implicit and not counted.
737        assert_eq!(writes, 3);
738
739        // Re-stage the same bytes for pages 2 and 3, and changed bytes for 4.
740        p.stage_page(2, make_page(0xAA));
741        p.stage_page(3, make_page(0xBB));
742        p.stage_page(4, make_page(0xDD));
743        let writes = p
744            .commit(DbHeader {
745                page_count: 5,
746                schema_root_page: 1,
747                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
748                freelist_head: 0,
749            })
750            .unwrap();
751        assert_eq!(writes, 1, "only the changed page should have been written");
752
753        // Reopen and confirm the content is as expected. The bytes live in
754        // the WAL — the main file still has the empty init state — so this
755        // also verifies the WAL-replay path.
756        drop(p);
757        let p2 = Pager::open(&path).unwrap();
758        assert_eq!(p2.read_page(2).unwrap()[0], 0xAA);
759        assert_eq!(p2.read_page(3).unwrap()[0], 0xBB);
760        assert_eq!(p2.read_page(4).unwrap()[0], 0xDD);
761
762        cleanup(&path);
763    }
764
765    #[test]
766    fn second_pager_on_same_file_is_rejected() {
767        // Phase 4a regression: two simultaneous read-write Pagers against
768        // the same file used to silently race. Now the second one must
769        // error out. Phase 4e reworded the lock-contention message; the
770        // stable substring we assert on is "in use".
771        let path = tmp_path("lock_contention");
772        let _first = Pager::create(&path).unwrap();
773
774        let second = Pager::open(&path);
775        assert!(second.is_err(), "expected lock-contention error, got Ok");
776        let msg = format!("{}", second.unwrap_err());
777        assert!(
778            msg.contains("in use"),
779            "error message should signal lock contention; got: {msg}"
780        );
781
782        // After the first Pager drops, both the main-file and WAL locks
783        // release and a fresh open succeeds — confirming the locks are
784        // tied to Pager lifetime, not leaked across instances.
785        drop(_first);
786        let third = Pager::open(&path);
787        assert!(third.is_ok(), "reopen after drop should succeed: {third:?}");
788
789        cleanup(&path);
790    }
791
792    #[test]
793    fn commit_leaves_main_file_untouched_and_shrink_hides_dropped_pages() {
794        // Phase 4c: commits now go to the WAL; the main file stays frozen
795        // until the checkpointer runs. Page-count shrinks still hide the
796        // logically-dropped pages from readers (via a bounds check in
797        // read_page) even though their bytes linger in the main file.
798        let path = tmp_path("shrink");
799        let mut p = Pager::create(&path).unwrap();
800        let main_size_after_create = std::fs::metadata(&path).unwrap().len();
801
802        p.stage_page(2, make_page(1));
803        p.stage_page(3, make_page(2));
804        p.stage_page(4, make_page(3));
805        p.commit(DbHeader {
806            page_count: 5,
807            schema_root_page: 1,
808            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
809            freelist_head: 0,
810        })
811        .unwrap();
812
813        // Main file unchanged: the page-2..4 bytes went into the WAL.
814        assert_eq!(
815            std::fs::metadata(&path).unwrap().len(),
816            main_size_after_create,
817            "main file must stay frozen across commits"
818        );
819        // WAL, however, has grown: 3 dirty frames + 1 commit frame.
820        let wal_size = std::fs::metadata(wal_path_for(&path)).unwrap().len();
821        assert!(
822            wal_size > 32,
823            "WAL should contain frames after a commit, got size {wal_size}"
824        );
825
826        // Shrink to 3 pages.
827        p.commit(DbHeader {
828            page_count: 3,
829            schema_root_page: 1,
830            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
831            freelist_head: 0,
832        })
833        .unwrap();
834
835        // Page 4 is now logically dropped — read_page hides it.
836        assert!(p.read_page(4).is_none());
837        // And page 2 is still visible under the new count.
838        assert_eq!(p.read_page(2).unwrap()[0], 1);
839
840        // Reopen confirms the committed page count survives.
841        drop(p);
842        let p2 = Pager::open(&path).unwrap();
843        assert_eq!(p2.header().page_count, 3);
844        assert!(p2.read_page(4).is_none());
845
846        cleanup(&path);
847    }
848
849    #[test]
850    fn wal_replay_on_reopen_restores_committed_state() {
851        // End-to-end: do a commit, close, reopen, and verify every staged
852        // page is visible. This is the core Phase 4c promise — committed
853        // writes survive a close/reopen via the WAL even though the main
854        // file wasn't touched.
855        let path = tmp_path("wal_replay");
856        {
857            let mut p = Pager::create(&path).unwrap();
858            p.stage_page(2, make_page(0x11));
859            p.stage_page(3, make_page(0x22));
860            p.commit(DbHeader {
861                page_count: 4,
862                schema_root_page: 1,
863                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
864                freelist_head: 0,
865            })
866            .unwrap();
867        }
868
869        let p2 = Pager::open(&path).unwrap();
870        assert_eq!(p2.header().page_count, 4);
871        assert_eq!(p2.read_page(2).unwrap()[0], 0x11);
872        assert_eq!(p2.read_page(3).unwrap()[0], 0x22);
873        cleanup(&path);
874    }
875
876    #[test]
877    fn orphan_dirty_frame_in_wal_is_invisible_on_reopen() {
878        // Simulates a crash between a dirty frame being written and the
879        // commit frame being appended. The Pager's open-time WAL replay
880        // should not surface the dirty bytes — reads must still return
881        // the previous-committed content.
882        let path = tmp_path("orphan_dirty");
883        {
884            let mut p = Pager::create(&path).unwrap();
885            p.stage_page(2, make_page(0xCC));
886            p.commit(DbHeader {
887                page_count: 3,
888                schema_root_page: 1,
889                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
890                freelist_head: 0,
891            })
892            .unwrap();
893        }
894
895        // Open the WAL directly and append a dirty frame for page 2 with
896        // *different* bytes — no commit frame follows. A later
897        // `Pager::open` must ignore this orphan frame.
898        {
899            let mut w = crate::sql::pager::wal::Wal::open(&wal_path_for(&path)).unwrap();
900            let mut other = Box::new([0u8; PAGE_SIZE]);
901            other[0] = 0x99;
902            w.append_frame(2, &other, None).unwrap();
903        }
904
905        let p = Pager::open(&path).unwrap();
906        assert_eq!(
907            p.read_page(2).unwrap()[0],
908            0xCC,
909            "orphan dirty frame must not shadow the last committed page"
910        );
911        cleanup(&path);
912    }
913
914    #[test]
915    fn two_commits_only_stage_the_delta() {
916        // Diffing vs. the effective state (wal_cache + on_disk) means a
917        // repeated identical commit writes zero dirty data frames. A commit
918        // frame is still appended, but that's implicit.
919        let path = tmp_path("diff_delta");
920        let mut p = Pager::create(&path).unwrap();
921        p.stage_page(2, make_page(0x77));
922        let first = p
923            .commit(DbHeader {
924                page_count: 3,
925                schema_root_page: 1,
926                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
927                freelist_head: 0,
928            })
929            .unwrap();
930        assert_eq!(first, 1);
931
932        // Stage the same byte again.
933        p.stage_page(2, make_page(0x77));
934        let second = p
935            .commit(DbHeader {
936                page_count: 3,
937                schema_root_page: 1,
938                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
939                freelist_head: 0,
940            })
941            .unwrap();
942        assert_eq!(second, 0, "no data frames should be re-appended");
943
944        cleanup(&path);
945    }
946
947    // -------------------------------------------------------------------
948    // Phase 4d — Checkpointer
949    // -------------------------------------------------------------------
950
951    #[test]
952    fn explicit_checkpoint_folds_wal_into_main_file_and_truncates_wal() {
953        use crate::sql::pager::wal::WAL_HEADER_SIZE;
954        let path = tmp_path("ckpt_explicit");
955        let mut p = Pager::create(&path).unwrap();
956
957        p.stage_page(2, make_page(0xA1));
958        p.stage_page(3, make_page(0xB2));
959        p.commit(DbHeader {
960            page_count: 4,
961            schema_root_page: 1,
962            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
963            freelist_head: 0,
964        })
965        .unwrap();
966
967        // Pre-checkpoint: WAL has frames, main file is still the initial size.
968        let wal = wal_path_for(&path);
969        assert!(std::fs::metadata(&wal).unwrap().len() > WAL_HEADER_SIZE as u64);
970
971        let written = p.checkpoint().unwrap();
972        assert_eq!(written, 2, "both data pages should flush to main file");
973
974        // WAL is now empty (just the header) with a rolled salt + bumped seq.
975        let wal_len = std::fs::metadata(&wal).unwrap().len();
976        assert_eq!(wal_len, WAL_HEADER_SIZE as u64);
977
978        // Main file is exactly page_count pages long.
979        let main_len = std::fs::metadata(&path).unwrap().len();
980        assert_eq!(main_len, 4 * PAGE_SIZE as u64);
981
982        // Drop + reopen: main file alone must carry the latest content.
983        // (The WAL is empty, so any surviving correctness is on the main file.)
984        drop(p);
985        let p2 = Pager::open(&path).unwrap();
986        assert_eq!(p2.header().page_count, 4);
987        assert_eq!(p2.read_page(2).unwrap()[0], 0xA1);
988        assert_eq!(p2.read_page(3).unwrap()[0], 0xB2);
989
990        cleanup(&path);
991    }
992
993    #[test]
994    fn checkpoint_is_idempotent() {
995        // Two back-to-back checkpoints: the second must be a no-op and
996        // must not error. (The first drains wal_cache; the second sees
997        // nothing to do.)
998        let path = tmp_path("ckpt_idempotent");
999        let mut p = Pager::create(&path).unwrap();
1000        p.stage_page(2, make_page(0x42));
1001        p.commit(DbHeader {
1002            page_count: 3,
1003            schema_root_page: 1,
1004            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1005            freelist_head: 0,
1006        })
1007        .unwrap();
1008
1009        let first = p.checkpoint().unwrap();
1010        assert_eq!(first, 1);
1011        let second = p.checkpoint().unwrap();
1012        assert_eq!(second, 0, "second checkpoint should be a no-op");
1013
1014        cleanup(&path);
1015    }
1016
1017    #[test]
1018    fn checkpoint_with_shrink_truncates_main_file() {
1019        // Grow to 5 pages, checkpoint; shrink to 3 pages, checkpoint.
1020        // After the second checkpoint the main file must physically
1021        // be 3 * PAGE_SIZE bytes — previous-tail pages are gone.
1022        let path = tmp_path("ckpt_shrink");
1023        let mut p = Pager::create(&path).unwrap();
1024        p.stage_page(2, make_page(1));
1025        p.stage_page(3, make_page(2));
1026        p.stage_page(4, make_page(3));
1027        p.commit(DbHeader {
1028            page_count: 5,
1029            schema_root_page: 1,
1030            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1031            freelist_head: 0,
1032        })
1033        .unwrap();
1034        p.checkpoint().unwrap();
1035        assert_eq!(
1036            std::fs::metadata(&path).unwrap().len(),
1037            5 * PAGE_SIZE as u64
1038        );
1039
1040        // Shrink.
1041        p.commit(DbHeader {
1042            page_count: 3,
1043            schema_root_page: 1,
1044            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1045            freelist_head: 0,
1046        })
1047        .unwrap();
1048        p.checkpoint().unwrap();
1049        assert_eq!(
1050            std::fs::metadata(&path).unwrap().len(),
1051            3 * PAGE_SIZE as u64,
1052            "main file should shrink to new page_count after checkpoint"
1053        );
1054        // Page 4 is gone both physically and logically.
1055        assert!(p.read_page(4).is_none());
1056
1057        cleanup(&path);
1058    }
1059
1060    #[test]
1061    fn auto_checkpoint_fires_past_frame_threshold() {
1062        // Do just enough commits to push the WAL past
1063        // AUTO_CHECKPOINT_THRESHOLD_FRAMES. After the crossing commit,
1064        // the WAL should be back to header-only (auto-checkpoint ran)
1065        // while the main file carries every committed byte.
1066        use crate::sql::pager::wal::WAL_HEADER_SIZE;
1067        let path = tmp_path("ckpt_auto");
1068        let mut p = Pager::create(&path).unwrap();
1069
1070        // Each commit appends: 1 dirty data frame + 1 commit frame for
1071        // page 0 = 2 frames. So ceil(THRESHOLD / 2) commits gets us past
1072        // the trigger.
1073        let commits_needed = AUTO_CHECKPOINT_THRESHOLD_FRAMES.div_ceil(2);
1074        for i in 0..commits_needed {
1075            p.stage_page(2, make_page((i & 0xff) as u8));
1076            p.commit(DbHeader {
1077                page_count: 3,
1078                schema_root_page: 1,
1079                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1080                freelist_head: 0,
1081            })
1082            .unwrap();
1083        }
1084
1085        // Auto-checkpoint must have fired at least once during that loop.
1086        let wal_len = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1087        assert_eq!(
1088            wal_len, WAL_HEADER_SIZE as u64,
1089            "auto-checkpoint should have truncated the WAL"
1090        );
1091
1092        // Last committed byte for page 2 is the latest (commits_needed - 1 & 0xff).
1093        let expected = ((commits_needed - 1) & 0xff) as u8;
1094        assert_eq!(p.read_page(2).unwrap()[0], expected);
1095
1096        cleanup(&path);
1097    }
1098
1099    // -------------------------------------------------------------------
1100    // Phase 4e — shared/exclusive lock modes
1101    // -------------------------------------------------------------------
1102
1103    #[test]
1104    fn two_read_only_openers_coexist() {
1105        // Phase 4e: multiple read-only openers take shared locks and
1106        // must not exclude each other.
1107        let path = tmp_path("ro_coexist");
1108        {
1109            let mut p = Pager::create(&path).unwrap();
1110            p.stage_page(2, make_page(0x55));
1111            p.commit(DbHeader {
1112                page_count: 3,
1113                schema_root_page: 1,
1114                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1115                freelist_head: 0,
1116            })
1117            .unwrap();
1118        }
1119
1120        let reader1 = Pager::open_read_only(&path).unwrap();
1121        let reader2 = Pager::open_read_only(&path).unwrap();
1122        // Both see the committed content.
1123        assert_eq!(reader1.read_page(2).unwrap()[0], 0x55);
1124        assert_eq!(reader2.read_page(2).unwrap()[0], 0x55);
1125        assert_eq!(reader1.access_mode(), AccessMode::ReadOnly);
1126
1127        cleanup(&path);
1128    }
1129
1130    #[test]
1131    fn read_write_blocks_read_only_and_vice_versa() {
1132        // A live exclusive lock blocks a shared-lock open, and a live
1133        // shared lock blocks an exclusive-lock open. Both error messages
1134        // mention that the database is in use.
1135        let path = tmp_path("rw_vs_ro");
1136        let _writer = Pager::create(&path).unwrap();
1137
1138        // Writer holds LOCK_EX — reader can't take LOCK_SH.
1139        let reader_attempt = Pager::open_read_only(&path);
1140        assert!(reader_attempt.is_err());
1141        let msg = format!("{}", reader_attempt.unwrap_err());
1142        assert!(
1143            msg.contains("locked for writing"),
1144            "read-only open while writer holds lock should mention writer; got: {msg}"
1145        );
1146
1147        drop(_writer);
1148
1149        // Now a reader comes in; a second read-write must be rejected.
1150        let _reader = Pager::open_read_only(&path).unwrap();
1151        let writer_attempt = Pager::open(&path);
1152        assert!(writer_attempt.is_err());
1153        let msg = format!("{}", writer_attempt.unwrap_err());
1154        assert!(
1155            msg.contains("in use"),
1156            "read-write open while reader holds lock should mention contention; got: {msg}"
1157        );
1158
1159        cleanup(&path);
1160    }
1161
1162    #[test]
1163    fn read_only_pager_rejects_mutations() {
1164        let path = tmp_path("ro_rejects");
1165        {
1166            // Seed with some content so an RO open has something to read.
1167            let mut p = Pager::create(&path).unwrap();
1168            p.stage_page(2, make_page(0x33));
1169            p.commit(DbHeader {
1170                page_count: 3,
1171                schema_root_page: 1,
1172                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1173                freelist_head: 0,
1174            })
1175            .unwrap();
1176        }
1177
1178        let mut ro = Pager::open_read_only(&path).unwrap();
1179        let commit_err = ro
1180            .commit(DbHeader {
1181                page_count: 3,
1182                schema_root_page: 1,
1183                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1184                freelist_head: 0,
1185            })
1186            .unwrap_err();
1187        assert!(
1188            format!("{commit_err}").contains("read-only"),
1189            "commit on RO pager should surface 'read-only'; got: {commit_err}"
1190        );
1191        let ckpt_err = ro.checkpoint().unwrap_err();
1192        assert!(
1193            format!("{ckpt_err}").contains("read-only"),
1194            "checkpoint on RO pager should surface 'read-only'; got: {ckpt_err}"
1195        );
1196
1197        // Reads still work.
1198        assert_eq!(ro.read_page(2).unwrap()[0], 0x33);
1199
1200        cleanup(&path);
1201    }
1202
1203    #[test]
1204    fn read_only_open_without_wal_sidecar_succeeds() {
1205        // A file-backed DB whose -wal sidecar was deleted (or a Phase-
1206        // 4a-vintage file predating Phase 4c) must still be openable
1207        // read-only. The Pager serves reads straight from on_disk with
1208        // an empty wal_cache.
1209        let path = tmp_path("ro_no_wal");
1210        {
1211            let mut p = Pager::create(&path).unwrap();
1212            p.stage_page(2, make_page(0x44));
1213            p.commit(DbHeader {
1214                page_count: 3,
1215                schema_root_page: 1,
1216                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1217                freelist_head: 0,
1218            })
1219            .unwrap();
1220            // Force the WAL into the main file before we nuke it.
1221            p.checkpoint().unwrap();
1222        }
1223        // Nuke the sidecar.
1224        std::fs::remove_file(wal_path_for(&path)).unwrap();
1225
1226        let ro = Pager::open_read_only(&path).unwrap();
1227        assert_eq!(ro.read_page(2).unwrap()[0], 0x44);
1228        // No WAL materialized by a read-only open.
1229        assert!(!wal_path_for(&path).exists());
1230        cleanup(&path);
1231    }
1232
1233    #[test]
1234    fn reopen_after_crash_between_data_write_and_header_write_recovers_via_wal() {
1235        // Simulates a crash between step 2 (data-page fsync) and step 3
1236        // (header write) of `checkpoint`: the main file has new data
1237        // pages but still carries the old header, AND the WAL still
1238        // holds every committed frame. Next open must reconstruct the
1239        // post-commit view via the WAL (wal_cache[0] overrides the stale
1240        // main-file header).
1241        use std::io::{Seek, SeekFrom, Write};
1242
1243        let path = tmp_path("ckpt_crash_mid_flush");
1244        {
1245            let mut p = Pager::create(&path).unwrap();
1246            p.stage_page(2, make_page(0xEE));
1247            p.commit(DbHeader {
1248                page_count: 3,
1249                schema_root_page: 1,
1250                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1251                freelist_head: 0,
1252            })
1253            .unwrap();
1254            // Manually write the committed page 2 into the main file at
1255            // offset 2*PAGE_SIZE to simulate the first half of a
1256            // checkpoint that only got as far as step 2. The header
1257            // stays at the pre-commit state (page_count=2 from create).
1258            // Drop the pager first so its exclusive lock releases.
1259        }
1260        {
1261            let mut f = std::fs::OpenOptions::new().write(true).open(&path).unwrap();
1262            f.seek(SeekFrom::Start(2 * PAGE_SIZE as u64)).unwrap();
1263            f.write_all(&make_page(0xEE)).unwrap();
1264            f.sync_all().unwrap();
1265            // NB: we didn't extend the file past its original length in
1266            // the create-only state; the write_all grew it implicitly.
1267            // The header at offset 0 is still the original "page_count=2".
1268        }
1269
1270        // Reopen. Main-file header says 2 pages; WAL replay should
1271        // override that to 3, and wal_cache[2] should shadow whatever
1272        // the main file now holds for page 2 (which happens to be the
1273        // same byte here — the point is the Pager doesn't depend on
1274        // that coincidence).
1275        let p2 = Pager::open(&path).unwrap();
1276        assert_eq!(p2.header().page_count, 3);
1277        assert_eq!(p2.read_page(2).unwrap()[0], 0xEE);
1278        cleanup(&path);
1279    }
1280
1281    #[test]
1282    fn auto_checkpoint_crosses_threshold_mid_loop() {
1283        // Pins the exact-threshold semantics: `commit` must trigger a
1284        // checkpoint as soon as the WAL's frame count hits the threshold,
1285        // not later. Catches a regression where someone accidentally
1286        // lowers it to `>` or bumps it into a different accounting.
1287        let path = tmp_path("ckpt_threshold_crossing");
1288        let mut p = Pager::create(&path).unwrap();
1289        let commits_to_cross = AUTO_CHECKPOINT_THRESHOLD_FRAMES.div_ceil(2);
1290        for i in 0..commits_to_cross - 1 {
1291            p.stage_page(2, make_page((i & 0xff) as u8));
1292            p.commit(DbHeader {
1293                page_count: 3,
1294                schema_root_page: 1,
1295                format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1296                freelist_head: 0,
1297            })
1298            .unwrap();
1299        }
1300        // One short of the threshold — WAL must not yet have been flushed.
1301        let pre = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1302        assert!(
1303            pre > crate::sql::pager::wal::WAL_HEADER_SIZE as u64,
1304            "WAL should still carry frames right before the crossing commit"
1305        );
1306
1307        // The crossing commit: this one's the trigger.
1308        p.stage_page(2, make_page(0xff));
1309        p.commit(DbHeader {
1310            page_count: 3,
1311            schema_root_page: 1,
1312            format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1313            freelist_head: 0,
1314        })
1315        .unwrap();
1316        let post = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1317        assert_eq!(
1318            post,
1319            crate::sql::pager::wal::WAL_HEADER_SIZE as u64,
1320            "WAL must be header-only right after the threshold-crossing commit"
1321        );
1322
1323        cleanup(&path);
1324    }
1325}