sqlrite/sql/pager/pager.rs
1//! Long-lived page cache + WAL-backed commits.
2//!
3//! A `Pager` wraps an open `.sqlrite` file plus its `-wal` sidecar. It owns
4//! three maps of page bytes:
5//!
6//! - `on_disk`: snapshot of the main file as last checkpointed. Frozen
7//! across regular commits — the main file is only rewritten
8//! when the checkpointer (Phase 4d) runs.
9//! - `wal_cache`: latest committed body for each page that has been
10//! appended to the WAL since the last checkpoint. Populated
11//! at open by replaying the WAL, and kept in lockstep with
12//! each successful `commit`.
13//! - `staged`: pages queued for the next commit, not yet in the WAL.
14//!
15//! **Read precedence.** `read_page` consults `staged → wal_cache → on_disk`,
16//! so both uncommitted writes and WAL-resident committed writes shadow the
17//! frozen main file. A bounds check against `current_header.page_count`
18//! hides pages that have been logically truncated by a shrink-commit even
19//! though their bytes are still present in `on_disk` (the real truncation
20//! waits for the checkpointer).
21//!
22//! **Commit flow.** `commit` compares each staged page against the
23//! effective committed state (wal_cache layered on on_disk) and appends a
24//! WAL frame only for pages whose bytes actually differ. A final "commit"
25//! frame for page 0 carries the new encoded header and the post-commit
26//! page count in its `commit_page_count` field. That frame is fsync'd.
27//! The main file is not touched.
28//!
29//! **Checkpoint flow (Phase 4d).** When the WAL accumulates past
30//! `AUTO_CHECKPOINT_THRESHOLD_FRAMES` frames (tracked on `Wal`), `commit`
31//! opportunistically folds them back into the main file: write every
32//! WAL-resident page at its proper offset, overwrite the main-file
33//! header, truncate the file to `page_count * PAGE_SIZE` bytes, `fsync`,
34//! then `Wal::truncate` the sidecar (which rolls the salt so any stale
35//! tail bytes from the old generation can't be misread as valid). Reads
36//! stay consistent if a crash hits mid-checkpoint — the WAL still holds
37//! the authoritative bytes until its header is rewritten, and the
38//! checkpointer is idempotent, so rerunning is safe.
39//!
40//! This matters because higher layers re-serialize the entire database on
41//! every auto-save. Without the diff, even a one-row UPDATE would append a
42//! frame for every page of every table. With the diff, unchanged tables —
43//! whose encoded pages hash identically across saves — simply stay out of
44//! the WAL.
45//!
46//! **Locking (Phase 4a → 4e).** Every `Pager` takes an advisory lock on
47//! its main file and on its WAL sidecar. The mode is driven by
48//! [`AccessMode`]:
49//!
50//! - `ReadWrite` → `flock(LOCK_EX)` — one writer, no other openers.
51//! - `ReadOnly` → `flock(LOCK_SH)` — multiple readers coexist; any writer
52//! is excluded.
53//!
54//! Both locks are tied to their file descriptors and release
55//! automatically when the `Pager` drops. On collision the opener gets
56//! a clean typed error rather than racing silently. POSIX flock is
57//! "multiple readers OR one writer", not both — true concurrent
58//! reader-and-writer access would need a shared-memory coordination
59//! file and read marks, which is not on the roadmap.
60
61use std::collections::HashMap;
62use std::fs::{File, OpenOptions};
63use std::path::{Path, PathBuf};
64
65use crate::error::{Result, SQLRiteError};
66use crate::sql::pager::file::FileStorage;
67use crate::sql::pager::header::{DbHeader, decode_header, encode_header};
68use crate::sql::pager::page::PAGE_SIZE;
69use crate::sql::pager::wal::Wal;
70
71/// Returns the WAL sidecar path for a main `.sqlrite` file: appends
72/// the `-wal` suffix to the full path (so `foo.sqlrite` pairs with
73/// `foo.sqlrite-wal`). Matches SQLite's convention.
74pub(crate) fn wal_path_for(main: &Path) -> PathBuf {
75 let mut os = main.as_os_str().to_owned();
76 os.push("-wal");
77 PathBuf::from(os)
78}
79
80/// How a `Pager` (or `Wal`) intends to use the file: mutating writes vs.
81/// consistent-snapshot reads. Drives the OS-level lock mode, and the
82/// Pager uses it to reject mutation attempts on read-only openers.
83///
84/// - `ReadWrite` takes `flock(LOCK_EX)` — one writer, no other openers.
85/// - `ReadOnly` takes `flock(LOCK_SH)` — multiple readers can coexist;
86/// a writer is excluded.
87///
88/// This is POSIX-flock semantics, so "multiple readers AND one writer"
89/// isn't supported yet. True concurrent reader-writer access would need
90/// a shared-memory coordination file and read marks — that's deferred.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum AccessMode {
93 ReadWrite,
94 ReadOnly,
95}
96
97/// Acquires an advisory lock on `file`, mapping the OS-level "lock
98/// held" error to a clean SQLRite error. `Exclusive` on Unix is
99/// `flock(LOCK_EX | LOCK_NB)`; `Shared` is `flock(LOCK_SH | LOCK_NB)`.
100/// On Windows, `LockFileEx` with the corresponding flags.
101///
102/// We call fs2's trait methods fully qualified because `std::fs::File`
103/// gained its own `try_lock_*` inherent methods in Rust 1.84 with a
104/// different error type — qualifying nails down which one we mean.
105#[cfg(feature = "file-locks")]
106pub(crate) fn acquire_lock(file: &File, path: &Path, mode: AccessMode) -> Result<()> {
107 let res = match mode {
108 AccessMode::ReadWrite => fs2::FileExt::try_lock_exclusive(file),
109 AccessMode::ReadOnly => fs2::FileExt::try_lock_shared(file),
110 };
111 res.map_err(|e| {
112 let how = match mode {
113 AccessMode::ReadWrite => {
114 "is in use (another process has it open; readers and writers are exclusive)"
115 }
116 AccessMode::ReadOnly => {
117 "is locked for writing by another process (read-only open blocked until the writer closes)"
118 }
119 };
120 SQLRiteError::General(format!(
121 "database '{}' {how} ({e})",
122 path.display()
123 ))
124 })
125}
126
127/// No-op variant for builds without the `file-locks` feature (most
128/// notably the WASM SDK, where `fs2` doesn't compile against
129/// wasm32-unknown-unknown). The Pager still refuses to touch a
130/// read-only open via `AccessMode`, but there's no OS-level
131/// multi-process coordination — the caller is trusted to avoid
132/// conflicting opens. Fine for WASM, where file-backed opens
133/// aren't exposed in the MVP anyway.
134#[cfg(not(feature = "file-locks"))]
135pub(crate) fn acquire_lock(_file: &File, _path: &Path, _mode: AccessMode) -> Result<()> {
136 Ok(())
137}
138
139/// How many WAL frames may accumulate between auto-checkpoints before
140/// `commit` opportunistically folds them back into the main file. Kept
141/// low enough that the WAL stays bounded on write-heavy workloads;
142/// high enough that small bursts don't thrash the main file. SQLite
143/// defaults to 1000; our target DBs are smaller so 100 is plenty.
144const AUTO_CHECKPOINT_THRESHOLD_FRAMES: usize = 100;
145
146pub struct Pager {
147 /// Main-file I/O handle. Regular commits leave it alone; the
148 /// checkpointer writes accumulated WAL pages back here.
149 storage: FileStorage,
150 current_header: DbHeader,
151 /// Byte snapshot of the main file as last checkpointed. The
152 /// checkpointer is the only thing that mutates it.
153 on_disk: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
154 /// Pages queued for the next commit. `commit` drains this.
155 staged: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
156 /// The committed WAL's view of each page. Populated at open by
157 /// replaying the log, and kept in sync with each successful commit.
158 /// Layered on top of `on_disk` for read resolution.
159 wal_cache: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
160 /// Write-ahead log sidecar. Present on a read-write Pager; `None`
161 /// on a read-only Pager that either found no WAL on disk or doesn't
162 /// retain the handle after initial replay. Reads consult
163 /// `wal_cache` (already populated at open) either way.
164 wal: Option<Wal>,
165 /// `ReadWrite` allows `commit` / `checkpoint`; `ReadOnly` rejects
166 /// them with a typed error. `stage_page` stays open on both modes
167 /// (it only touches the in-memory `staged` map) — any staged bytes
168 /// simply never reach disk on a read-only Pager because `commit` is
169 /// the gate.
170 access_mode: AccessMode,
171}
172
173impl Pager {
174 /// Opens an existing database file for read-write access. Shorthand
175 /// for [`Pager::open_with_mode`] with [`AccessMode::ReadWrite`].
176 pub fn open(path: &Path) -> Result<Self> {
177 Self::open_with_mode(path, AccessMode::ReadWrite)
178 }
179
180 /// Opens an existing database file for read-only access — takes
181 /// a shared advisory lock that coexists with other readers but is
182 /// excluded by any writer. `commit` and `checkpoint` return a clean
183 /// error rather than panic; `stage_page` stays a no-op-to-disk
184 /// (bytes sit in the in-memory `staged` map that `commit` would
185 /// have drained).
186 ///
187 /// If the WAL sidecar doesn't exist, the open succeeds with an
188 /// empty `wal_cache` — a read-only caller can't materialize a
189 /// sidecar on its own, and a DB that never had WAL writes is fine
190 /// to read straight from the main file.
191 pub fn open_read_only(path: &Path) -> Result<Self> {
192 Self::open_with_mode(path, AccessMode::ReadOnly)
193 }
194
195 /// Opens an existing database file with the given access mode.
196 /// Loads every main-file page into `on_disk`, then opens the WAL
197 /// sidecar (read-only mode uses a shared lock and skips sidecar
198 /// creation; read-write creates the sidecar if missing) and layers
199 /// committed frames into `wal_cache`.
200 pub fn open_with_mode(path: &Path, mode: AccessMode) -> Result<Self> {
201 let file = match mode {
202 AccessMode::ReadWrite => OpenOptions::new().read(true).write(true).open(path)?,
203 AccessMode::ReadOnly => OpenOptions::new().read(true).open(path)?,
204 };
205 acquire_lock(&file, path, mode)?;
206 let mut storage = FileStorage::new(file);
207 let mut header = storage.read_header()?;
208
209 let mut on_disk = HashMap::with_capacity(header.page_count.saturating_sub(1) as usize);
210 // page 0 is the header itself; regular pages live at 1..page_count.
211 for page_num in 1..header.page_count {
212 let buf = read_raw_page(&mut storage, page_num)?;
213 on_disk.insert(page_num, buf);
214 }
215
216 let wal_path = wal_path_for(path);
217 let (wal_handle, wal_cache) = match mode {
218 AccessMode::ReadWrite => {
219 // Create the sidecar if it's missing — a pre-Phase-4c
220 // file or a DB that was hand-deleted down to just the
221 // main file both need a fresh empty WAL to be writable.
222 let mut wal = if wal_path.exists() {
223 Wal::open_with_mode(&wal_path, mode)?
224 } else {
225 Wal::create(&wal_path)?
226 };
227 let mut cache: HashMap<u32, Box<[u8; PAGE_SIZE]>> = HashMap::new();
228 wal.load_committed_into(&mut cache)?;
229 (Some(wal), cache)
230 }
231 AccessMode::ReadOnly => {
232 // Read-only mustn't create files. If the sidecar is
233 // absent, treat the WAL as empty and serve reads from
234 // the main file alone.
235 if wal_path.exists() {
236 let mut wal = Wal::open_with_mode(&wal_path, mode)?;
237 let mut cache: HashMap<u32, Box<[u8; PAGE_SIZE]>> = HashMap::new();
238 wal.load_committed_into(&mut cache)?;
239 // We don't need to retain the WAL handle in
240 // read-only mode — the cache is all reads need and
241 // dropping the handle releases the shared lock on
242 // the sidecar early. Keep it, though, so the lock
243 // spans the whole Pager lifetime: a checkpointer
244 // process grabbing LOCK_EX on the WAL while our
245 // reader still has wal_cache loaded would be
246 // correct for reads but surprising semantically.
247 (Some(wal), cache)
248 } else {
249 (None, HashMap::new())
250 }
251 }
252 };
253
254 // If the WAL committed a new page 0, that frame's body is the
255 // up-to-date header — decode it and let it override what the
256 // main file's stale header says.
257 if let Some(page0) = wal_cache.get(&0) {
258 header = decode_header(page0.as_ref())?;
259 } else if let Some(w) = wal_handle.as_ref()
260 && let Some(committed_pc) = w.last_commit_page_count()
261 {
262 // Belt-and-suspenders: even if the latest commit frame didn't
263 // land on page 0 (shouldn't happen under the current commit
264 // layout, but keeps us correct if that ever changes), trust
265 // its page count.
266 header.page_count = committed_pc;
267 }
268
269 Ok(Self {
270 storage,
271 current_header: header,
272 on_disk,
273 staged: HashMap::new(),
274 wal_cache,
275 wal: wal_handle,
276 access_mode: mode,
277 })
278 }
279
280 /// Creates a fresh database file. Page 0 is the header; page 1 is an
281 /// empty `TableLeaf` that serves as the initial `sqlrite_master` root
282 /// (zero rows, no user tables yet). A matching empty WAL sidecar is
283 /// created alongside it — any pre-existing WAL at the target path is
284 /// truncated.
285 pub fn create(path: &Path) -> Result<Self> {
286 use crate::sql::pager::page::{PAGE_HEADER_SIZE, PageType};
287 use crate::sql::pager::table_page::TablePage;
288
289 let file = OpenOptions::new()
290 .read(true)
291 .write(true)
292 .create(true)
293 .truncate(true)
294 .open(path)?;
295 acquire_lock(&file, path, AccessMode::ReadWrite)?;
296 let mut storage = FileStorage::new(file);
297
298 let empty_master = TablePage::empty();
299 let mut page1 = Box::new([0u8; PAGE_SIZE]);
300 page1[0] = PageType::TableLeaf as u8;
301 page1[1..5].copy_from_slice(&0u32.to_le_bytes());
302 page1[5..7].copy_from_slice(&0u16.to_le_bytes());
303 page1[PAGE_HEADER_SIZE..].copy_from_slice(empty_master.as_bytes());
304
305 let header = DbHeader {
306 page_count: 2,
307 schema_root_page: 1,
308 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
309 freelist_head: 0,
310 };
311
312 // Write the file synchronously so the initial create is durable and
313 // subsequent `Pager::open` calls see a valid header + page 1.
314 storage.seek_to(0)?;
315 storage.write_all(&encode_header(&header))?;
316 storage.write_all(page1.as_ref())?;
317 storage.flush()?;
318
319 // Sidecar WAL — fresh, no frames yet.
320 let wal = Wal::create(&wal_path_for(path))?;
321
322 let mut on_disk = HashMap::new();
323 on_disk.insert(1, page1);
324
325 Ok(Self {
326 storage,
327 current_header: header,
328 on_disk,
329 staged: HashMap::new(),
330 wal_cache: HashMap::new(),
331 wal: Some(wal),
332 access_mode: AccessMode::ReadWrite,
333 })
334 }
335
336 pub fn header(&self) -> DbHeader {
337 self.current_header
338 }
339
340 /// Returns the mode this Pager was opened in. Callers can use this
341 /// to bail out of a write path earlier than the Pager itself would.
342 pub fn access_mode(&self) -> AccessMode {
343 self.access_mode
344 }
345
346 fn require_writable(&self, op: &'static str) -> Result<()> {
347 if self.access_mode == AccessMode::ReadOnly {
348 return Err(SQLRiteError::General(format!(
349 "cannot {op}: database is opened read-only"
350 )));
351 }
352 Ok(())
353 }
354
355 /// Reads a page, preferring staged content, then the WAL-committed
356 /// overlay, then the frozen main-file snapshot. Returns `None` for
357 /// pages beyond the current page count (pages that have been logically
358 /// truncated by a shrink-commit stay in `on_disk` until checkpoint,
359 /// but a bounds check hides them from readers).
360 pub fn read_page(&self, page_num: u32) -> Option<&[u8; PAGE_SIZE]> {
361 // Staged pages are "the future" and should always shadow everything
362 // else, even pages we're about to extend beyond the old page count.
363 if let Some(b) = self.staged.get(&page_num) {
364 return Some(b);
365 }
366 // A page that's been logically dropped shouldn't be readable even
367 // if its bytes linger in on_disk until the next checkpoint.
368 if page_num >= self.current_header.page_count {
369 return None;
370 }
371 if let Some(b) = self.wal_cache.get(&page_num) {
372 return Some(b.as_ref());
373 }
374 self.on_disk.get(&page_num).map(|b| b.as_ref())
375 }
376
377 /// Queues `bytes` as the new content of page `page_num`. The write only
378 /// reaches disk when `commit` is called.
379 pub fn stage_page(&mut self, page_num: u32, bytes: [u8; PAGE_SIZE]) {
380 self.staged.insert(page_num, Box::new(bytes));
381 }
382
383 /// Discards all staged pages. Useful when beginning a new full re-save
384 /// from scratch; the higher layer can also just overwrite pages without
385 /// clearing since `stage_page` replaces.
386 pub fn clear_staged(&mut self) {
387 self.staged.clear();
388 }
389
390 /// Commits all staged pages into the WAL. Only pages whose bytes differ
391 /// from the effective committed state (wal_cache layered on on_disk)
392 /// produce frames. A final commit frame carries the new page 0 (encoded
393 /// header) and is fsync'd; that seals the transaction. The main file is
394 /// left untouched — it only changes when the checkpointer (Phase 4d)
395 /// runs.
396 ///
397 /// Returns the number of dirty *data* frames appended (excluding the
398 /// implicit page-0 commit frame that's always written).
399 pub fn commit(&mut self, new_header: DbHeader) -> Result<usize> {
400 self.require_writable("commit")?;
401 let wal = self
402 .wal
403 .as_mut()
404 .expect("read-write Pager must carry a WAL handle");
405
406 // Decide which staged pages carry bytes that aren't already live.
407 // Effective committed state = wal_cache overlaid on on_disk.
408 let staged = std::mem::take(&mut self.staged);
409 let mut dirty: Vec<(u32, Box<[u8; PAGE_SIZE]>)> = staged
410 .into_iter()
411 .filter(|(n, bytes)| {
412 let existing = self.wal_cache.get(n).or_else(|| self.on_disk.get(n));
413 match existing {
414 Some(e) => e.as_ref() != bytes.as_ref(),
415 None => true,
416 }
417 })
418 .collect();
419 // Append in ascending page order so the log replays deterministically
420 // and sequential reads during checkpoint stay sequential.
421 dirty.sort_by_key(|(n, _)| *n);
422 let writes = dirty.len();
423
424 for (n, bytes) in &dirty {
425 wal.append_frame(*n, bytes.as_ref(), None)?;
426 }
427
428 // Seal the transaction. The commit frame carries the new page 0
429 // (encoded header) in its body and the new page count in its
430 // commit_page_count field — together they're the single atomic
431 // record that says "this is the new committed state".
432 let page0 = encode_header(&new_header);
433 wal.append_frame(0, &page0, Some(new_header.page_count))?;
434 let frame_count_after_commit = wal.frame_count();
435
436 // Promote every frame we just wrote into wal_cache so subsequent
437 // reads see the latest committed bytes without touching the WAL.
438 for (n, bytes) in dirty {
439 self.wal_cache.insert(n, bytes);
440 }
441 self.wal_cache.insert(0, Box::new(page0));
442
443 self.current_header = new_header;
444
445 // Keep the WAL bounded. Under write-heavy load, un-flushed frames
446 // accumulate; past the threshold we fold them back into the main
447 // file opportunistically so open doesn't have to replay an
448 // arbitrarily long log on the next start.
449 if frame_count_after_commit >= AUTO_CHECKPOINT_THRESHOLD_FRAMES {
450 self.checkpoint()?;
451 }
452
453 Ok(writes)
454 }
455
456 /// Folds all WAL-resident pages back into the main file and truncates
457 /// the WAL. Returns the number of data pages written to the main
458 /// file (excludes the header).
459 ///
460 /// **Crash safety — two fsync barriers.** The main-file writes happen
461 /// in two phases separated by a barrier, matching SQLite's checkpoint
462 /// ordering:
463 ///
464 /// 1. Write every `wal_cache` data page at its `page_num * PAGE_SIZE`
465 /// offset in the main file.
466 /// 2. **`fsync`** — force those data pages to stable storage *before*
467 /// the header publishes the new state. Without this barrier, a
468 /// filesystem or disk-cache reordering could land the header first,
469 /// leaving a main file that claims "N pages" over stale data.
470 /// 3. Rewrite the main-file header at offset 0. This is the
471 /// checkpoint's "commit point" — after it hits disk the main file
472 /// alone tells the truth.
473 /// 4. `set_len` shrinks the tail if `page_count` dropped.
474 /// 5. **`fsync`** — force the header + set_len durable.
475 /// 6. `Wal::truncate` resets the sidecar (rolls salt, writes new
476 /// header, fsync). Running this *after* the main file is fully
477 /// durable means a crash between 5 and 6 leaves a stale WAL over a
478 /// current main file; readers still see the right bytes because
479 /// wal_cache (replayed from the stale WAL on next open) would be
480 /// byte-identical to what's in the main file. A retry of
481 /// `checkpoint` then truncates cleanly.
482 ///
483 /// A crash between 1 and 2 can leave partial data-page writes, but
484 /// since the header hasn't moved yet, the main file still reads as
485 /// its pre-checkpoint self — the WAL is intact and authoritative,
486 /// and a retry rewrites the same bytes.
487 pub fn checkpoint(&mut self) -> Result<usize> {
488 self.require_writable("checkpoint")?;
489 // `require_writable` already guaranteed we're ReadWrite; in
490 // ReadWrite mode `wal` is always `Some` (it's only `None` for
491 // ReadOnly opens of a DB that had no sidecar on disk).
492 let wal_frame_count = self.wal.as_ref().map(|w| w.frame_count()).unwrap_or(0);
493
494 // Nothing to flush? Skip the fsyncs and get out.
495 if wal_frame_count == 0 && self.wal_cache.is_empty() {
496 return Ok(0);
497 }
498
499 // Step 1 — write every WAL-resident data page to the main file.
500 // Page 0 (header) is handled separately via write_header, and any
501 // pages past the new page count are skipped here (set_len will
502 // drop them when the file shrinks).
503 let page_count = self.current_header.page_count;
504 let mut pages: Vec<u32> = self
505 .wal_cache
506 .keys()
507 .copied()
508 .filter(|&n| n != 0 && n < page_count)
509 .collect();
510 pages.sort_unstable();
511 let written = pages.len();
512 for page_num in &pages {
513 let bytes = self
514 .wal_cache
515 .get(page_num)
516 .expect("iterated key must resolve");
517 self.storage
518 .seek_to((*page_num as u64) * (PAGE_SIZE as u64))?;
519 self.storage.write_all(bytes.as_ref())?;
520 }
521
522 // Step 2 — first durability barrier. Data pages must hit stable
523 // storage before the header publishes the new page count /
524 // schema root, or a reordered writeback could expose a
525 // half-migrated file on crash.
526 if written > 0 {
527 self.storage.flush()?;
528 }
529
530 // Step 3 — rewrite the main-file header. This is the checkpoint's
531 // atomic record.
532 self.storage.write_header(&self.current_header)?;
533
534 // Step 4 — shrink the main file if the committed page count is
535 // smaller than what the file physically holds.
536 self.storage.truncate_to_pages(page_count)?;
537
538 // Step 5 — second durability barrier. Makes header + set_len
539 // durable together before we touch the WAL.
540 self.storage.flush()?;
541
542 // Step 6 — reset the WAL sidecar. Runs before the in-memory
543 // cache swap so that if `wal.truncate` fails (disk full, EIO)
544 // we leave the in-memory state untouched rather than having
545 // wal_cache empty + on_disk updated + WAL un-truncated, which
546 // the Pager can't easily recover from on its own. Here a
547 // failure means the main file is already consistent on disk
548 // (steps 2 + 5 fsynced it); we just leave the stale WAL in
549 // place for the next checkpoint attempt.
550 self.wal
551 .as_mut()
552 .expect("read-write Pager must carry a WAL handle")
553 .truncate()?;
554
555 // Promote wal_cache into on_disk and drop everything that's no
556 // longer live. Page 0 is special — it's never materialized in
557 // on_disk (we read it lazily via storage.read_header on open).
558 for (n, bytes) in self.wal_cache.drain().filter(|(n, _)| *n != 0) {
559 if n < page_count {
560 self.on_disk.insert(n, bytes);
561 }
562 }
563 self.on_disk.retain(|&n, _| n < page_count);
564
565 Ok(written)
566 }
567}
568
569fn read_raw_page(storage: &mut FileStorage, page_num: u32) -> Result<Box<[u8; PAGE_SIZE]>> {
570 storage.seek_to((page_num as u64) * (PAGE_SIZE as u64))?;
571 let mut buf = Box::new([0u8; PAGE_SIZE]);
572 storage.read_exact(buf.as_mut())?;
573 Ok(buf)
574}
575
576impl std::fmt::Debug for Pager {
577 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
578 f.debug_struct("Pager")
579 .field("access_mode", &self.access_mode)
580 .field("page_count", &self.current_header.page_count)
581 .field("schema_root_page", &self.current_header.schema_root_page)
582 .field("cached_pages", &self.on_disk.len())
583 .field("staged_pages", &self.staged.len())
584 .field("wal_pages", &self.wal_cache.len())
585 .field(
586 "wal_frames",
587 &self.wal.as_ref().map(|w| w.frame_count()).unwrap_or(0),
588 )
589 .finish()
590 }
591}
592
593#[cfg(test)]
594mod tests {
595 use super::*;
596
597 fn tmp_path(name: &str) -> std::path::PathBuf {
598 let mut p = std::env::temp_dir();
599 let pid = std::process::id();
600 let nanos = std::time::SystemTime::now()
601 .duration_since(std::time::UNIX_EPOCH)
602 .map(|d| d.as_nanos())
603 .unwrap_or(0);
604 p.push(format!("sqlrite-pager-{pid}-{nanos}-{name}.sqlrite"));
605 p
606 }
607
608 /// Remove both the main file and its `-wal` sidecar — leaving either
609 /// behind can destabilize later test runs on the same tmp dir.
610 fn cleanup(path: &Path) {
611 let _ = std::fs::remove_file(path);
612 let _ = std::fs::remove_file(wal_path_for(path));
613 }
614
615 fn make_page(first_byte: u8) -> [u8; PAGE_SIZE] {
616 let mut buf = [0u8; PAGE_SIZE];
617 buf[0] = first_byte;
618 buf
619 }
620
621 #[test]
622 fn create_then_open_round_trips() {
623 let path = tmp_path("create_open");
624 {
625 let p = Pager::create(&path).unwrap();
626 assert_eq!(p.header().page_count, 2);
627 assert_eq!(p.header().schema_root_page, 1);
628 }
629 let p2 = Pager::open(&path).unwrap();
630 assert_eq!(p2.header().page_count, 2);
631 cleanup(&path);
632 }
633
634 #[test]
635 fn create_spawns_wal_sidecar() {
636 // Phase 4c: `Pager::create` must produce an empty WAL sidecar
637 // alongside the main file so the first commit has somewhere to
638 // append frames.
639 use crate::sql::pager::wal::WAL_HEADER_SIZE;
640 let path = tmp_path("wal_sidecar");
641 let _p = Pager::create(&path).unwrap();
642 let wal = wal_path_for(&path);
643 assert!(wal.exists(), "WAL sidecar should exist after create");
644 // An empty WAL is just its header.
645 let len = std::fs::metadata(&wal).unwrap().len();
646 assert_eq!(
647 len, WAL_HEADER_SIZE as u64,
648 "fresh WAL should be header-only"
649 );
650 cleanup(&path);
651 }
652
653 #[test]
654 fn commit_writes_only_dirty_pages() {
655 let path = tmp_path("diff");
656 let mut p = Pager::create(&path).unwrap();
657
658 // Initial state: page 1 is the empty-catalog schema page.
659 // Stage three "table-data" pages.
660 p.stage_page(2, make_page(0xAA));
661 p.stage_page(3, make_page(0xBB));
662 p.stage_page(4, make_page(0xCC));
663 let writes = p
664 .commit(DbHeader {
665 page_count: 5,
666 schema_root_page: 1,
667 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
668 freelist_head: 0,
669 })
670 .unwrap();
671 // 3 dirty data pages (pages 2, 3, 4). The page-0 commit frame is
672 // implicit and not counted.
673 assert_eq!(writes, 3);
674
675 // Re-stage the same bytes for pages 2 and 3, and changed bytes for 4.
676 p.stage_page(2, make_page(0xAA));
677 p.stage_page(3, make_page(0xBB));
678 p.stage_page(4, make_page(0xDD));
679 let writes = p
680 .commit(DbHeader {
681 page_count: 5,
682 schema_root_page: 1,
683 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
684 freelist_head: 0,
685 })
686 .unwrap();
687 assert_eq!(writes, 1, "only the changed page should have been written");
688
689 // Reopen and confirm the content is as expected. The bytes live in
690 // the WAL — the main file still has the empty init state — so this
691 // also verifies the WAL-replay path.
692 drop(p);
693 let p2 = Pager::open(&path).unwrap();
694 assert_eq!(p2.read_page(2).unwrap()[0], 0xAA);
695 assert_eq!(p2.read_page(3).unwrap()[0], 0xBB);
696 assert_eq!(p2.read_page(4).unwrap()[0], 0xDD);
697
698 cleanup(&path);
699 }
700
701 #[test]
702 fn second_pager_on_same_file_is_rejected() {
703 // Phase 4a regression: two simultaneous read-write Pagers against
704 // the same file used to silently race. Now the second one must
705 // error out. Phase 4e reworded the lock-contention message; the
706 // stable substring we assert on is "in use".
707 let path = tmp_path("lock_contention");
708 let _first = Pager::create(&path).unwrap();
709
710 let second = Pager::open(&path);
711 assert!(second.is_err(), "expected lock-contention error, got Ok");
712 let msg = format!("{}", second.unwrap_err());
713 assert!(
714 msg.contains("in use"),
715 "error message should signal lock contention; got: {msg}"
716 );
717
718 // After the first Pager drops, both the main-file and WAL locks
719 // release and a fresh open succeeds — confirming the locks are
720 // tied to Pager lifetime, not leaked across instances.
721 drop(_first);
722 let third = Pager::open(&path);
723 assert!(third.is_ok(), "reopen after drop should succeed: {third:?}");
724
725 cleanup(&path);
726 }
727
728 #[test]
729 fn commit_leaves_main_file_untouched_and_shrink_hides_dropped_pages() {
730 // Phase 4c: commits now go to the WAL; the main file stays frozen
731 // until the checkpointer runs. Page-count shrinks still hide the
732 // logically-dropped pages from readers (via a bounds check in
733 // read_page) even though their bytes linger in the main file.
734 let path = tmp_path("shrink");
735 let mut p = Pager::create(&path).unwrap();
736 let main_size_after_create = std::fs::metadata(&path).unwrap().len();
737
738 p.stage_page(2, make_page(1));
739 p.stage_page(3, make_page(2));
740 p.stage_page(4, make_page(3));
741 p.commit(DbHeader {
742 page_count: 5,
743 schema_root_page: 1,
744 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
745 freelist_head: 0,
746 })
747 .unwrap();
748
749 // Main file unchanged: the page-2..4 bytes went into the WAL.
750 assert_eq!(
751 std::fs::metadata(&path).unwrap().len(),
752 main_size_after_create,
753 "main file must stay frozen across commits"
754 );
755 // WAL, however, has grown: 3 dirty frames + 1 commit frame.
756 let wal_size = std::fs::metadata(wal_path_for(&path)).unwrap().len();
757 assert!(
758 wal_size > 32,
759 "WAL should contain frames after a commit, got size {wal_size}"
760 );
761
762 // Shrink to 3 pages.
763 p.commit(DbHeader {
764 page_count: 3,
765 schema_root_page: 1,
766 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
767 freelist_head: 0,
768 })
769 .unwrap();
770
771 // Page 4 is now logically dropped — read_page hides it.
772 assert!(p.read_page(4).is_none());
773 // And page 2 is still visible under the new count.
774 assert_eq!(p.read_page(2).unwrap()[0], 1);
775
776 // Reopen confirms the committed page count survives.
777 drop(p);
778 let p2 = Pager::open(&path).unwrap();
779 assert_eq!(p2.header().page_count, 3);
780 assert!(p2.read_page(4).is_none());
781
782 cleanup(&path);
783 }
784
785 #[test]
786 fn wal_replay_on_reopen_restores_committed_state() {
787 // End-to-end: do a commit, close, reopen, and verify every staged
788 // page is visible. This is the core Phase 4c promise — committed
789 // writes survive a close/reopen via the WAL even though the main
790 // file wasn't touched.
791 let path = tmp_path("wal_replay");
792 {
793 let mut p = Pager::create(&path).unwrap();
794 p.stage_page(2, make_page(0x11));
795 p.stage_page(3, make_page(0x22));
796 p.commit(DbHeader {
797 page_count: 4,
798 schema_root_page: 1,
799 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
800 freelist_head: 0,
801 })
802 .unwrap();
803 }
804
805 let p2 = Pager::open(&path).unwrap();
806 assert_eq!(p2.header().page_count, 4);
807 assert_eq!(p2.read_page(2).unwrap()[0], 0x11);
808 assert_eq!(p2.read_page(3).unwrap()[0], 0x22);
809 cleanup(&path);
810 }
811
812 #[test]
813 fn orphan_dirty_frame_in_wal_is_invisible_on_reopen() {
814 // Simulates a crash between a dirty frame being written and the
815 // commit frame being appended. The Pager's open-time WAL replay
816 // should not surface the dirty bytes — reads must still return
817 // the previous-committed content.
818 let path = tmp_path("orphan_dirty");
819 {
820 let mut p = Pager::create(&path).unwrap();
821 p.stage_page(2, make_page(0xCC));
822 p.commit(DbHeader {
823 page_count: 3,
824 schema_root_page: 1,
825 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
826 freelist_head: 0,
827 })
828 .unwrap();
829 }
830
831 // Open the WAL directly and append a dirty frame for page 2 with
832 // *different* bytes — no commit frame follows. A later
833 // `Pager::open` must ignore this orphan frame.
834 {
835 let mut w = crate::sql::pager::wal::Wal::open(&wal_path_for(&path)).unwrap();
836 let mut other = Box::new([0u8; PAGE_SIZE]);
837 other[0] = 0x99;
838 w.append_frame(2, &other, None).unwrap();
839 }
840
841 let p = Pager::open(&path).unwrap();
842 assert_eq!(
843 p.read_page(2).unwrap()[0],
844 0xCC,
845 "orphan dirty frame must not shadow the last committed page"
846 );
847 cleanup(&path);
848 }
849
850 #[test]
851 fn two_commits_only_stage_the_delta() {
852 // Diffing vs. the effective state (wal_cache + on_disk) means a
853 // repeated identical commit writes zero dirty data frames. A commit
854 // frame is still appended, but that's implicit.
855 let path = tmp_path("diff_delta");
856 let mut p = Pager::create(&path).unwrap();
857 p.stage_page(2, make_page(0x77));
858 let first = p
859 .commit(DbHeader {
860 page_count: 3,
861 schema_root_page: 1,
862 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
863 freelist_head: 0,
864 })
865 .unwrap();
866 assert_eq!(first, 1);
867
868 // Stage the same byte again.
869 p.stage_page(2, make_page(0x77));
870 let second = p
871 .commit(DbHeader {
872 page_count: 3,
873 schema_root_page: 1,
874 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
875 freelist_head: 0,
876 })
877 .unwrap();
878 assert_eq!(second, 0, "no data frames should be re-appended");
879
880 cleanup(&path);
881 }
882
883 // -------------------------------------------------------------------
884 // Phase 4d — Checkpointer
885 // -------------------------------------------------------------------
886
887 #[test]
888 fn explicit_checkpoint_folds_wal_into_main_file_and_truncates_wal() {
889 use crate::sql::pager::wal::WAL_HEADER_SIZE;
890 let path = tmp_path("ckpt_explicit");
891 let mut p = Pager::create(&path).unwrap();
892
893 p.stage_page(2, make_page(0xA1));
894 p.stage_page(3, make_page(0xB2));
895 p.commit(DbHeader {
896 page_count: 4,
897 schema_root_page: 1,
898 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
899 freelist_head: 0,
900 })
901 .unwrap();
902
903 // Pre-checkpoint: WAL has frames, main file is still the initial size.
904 let wal = wal_path_for(&path);
905 assert!(std::fs::metadata(&wal).unwrap().len() > WAL_HEADER_SIZE as u64);
906
907 let written = p.checkpoint().unwrap();
908 assert_eq!(written, 2, "both data pages should flush to main file");
909
910 // WAL is now empty (just the header) with a rolled salt + bumped seq.
911 let wal_len = std::fs::metadata(&wal).unwrap().len();
912 assert_eq!(wal_len, WAL_HEADER_SIZE as u64);
913
914 // Main file is exactly page_count pages long.
915 let main_len = std::fs::metadata(&path).unwrap().len();
916 assert_eq!(main_len, 4 * PAGE_SIZE as u64);
917
918 // Drop + reopen: main file alone must carry the latest content.
919 // (The WAL is empty, so any surviving correctness is on the main file.)
920 drop(p);
921 let p2 = Pager::open(&path).unwrap();
922 assert_eq!(p2.header().page_count, 4);
923 assert_eq!(p2.read_page(2).unwrap()[0], 0xA1);
924 assert_eq!(p2.read_page(3).unwrap()[0], 0xB2);
925
926 cleanup(&path);
927 }
928
929 #[test]
930 fn checkpoint_is_idempotent() {
931 // Two back-to-back checkpoints: the second must be a no-op and
932 // must not error. (The first drains wal_cache; the second sees
933 // nothing to do.)
934 let path = tmp_path("ckpt_idempotent");
935 let mut p = Pager::create(&path).unwrap();
936 p.stage_page(2, make_page(0x42));
937 p.commit(DbHeader {
938 page_count: 3,
939 schema_root_page: 1,
940 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
941 freelist_head: 0,
942 })
943 .unwrap();
944
945 let first = p.checkpoint().unwrap();
946 assert_eq!(first, 1);
947 let second = p.checkpoint().unwrap();
948 assert_eq!(second, 0, "second checkpoint should be a no-op");
949
950 cleanup(&path);
951 }
952
953 #[test]
954 fn checkpoint_with_shrink_truncates_main_file() {
955 // Grow to 5 pages, checkpoint; shrink to 3 pages, checkpoint.
956 // After the second checkpoint the main file must physically
957 // be 3 * PAGE_SIZE bytes — previous-tail pages are gone.
958 let path = tmp_path("ckpt_shrink");
959 let mut p = Pager::create(&path).unwrap();
960 p.stage_page(2, make_page(1));
961 p.stage_page(3, make_page(2));
962 p.stage_page(4, make_page(3));
963 p.commit(DbHeader {
964 page_count: 5,
965 schema_root_page: 1,
966 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
967 freelist_head: 0,
968 })
969 .unwrap();
970 p.checkpoint().unwrap();
971 assert_eq!(
972 std::fs::metadata(&path).unwrap().len(),
973 5 * PAGE_SIZE as u64
974 );
975
976 // Shrink.
977 p.commit(DbHeader {
978 page_count: 3,
979 schema_root_page: 1,
980 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
981 freelist_head: 0,
982 })
983 .unwrap();
984 p.checkpoint().unwrap();
985 assert_eq!(
986 std::fs::metadata(&path).unwrap().len(),
987 3 * PAGE_SIZE as u64,
988 "main file should shrink to new page_count after checkpoint"
989 );
990 // Page 4 is gone both physically and logically.
991 assert!(p.read_page(4).is_none());
992
993 cleanup(&path);
994 }
995
996 #[test]
997 fn auto_checkpoint_fires_past_frame_threshold() {
998 // Do just enough commits to push the WAL past
999 // AUTO_CHECKPOINT_THRESHOLD_FRAMES. After the crossing commit,
1000 // the WAL should be back to header-only (auto-checkpoint ran)
1001 // while the main file carries every committed byte.
1002 use crate::sql::pager::wal::WAL_HEADER_SIZE;
1003 let path = tmp_path("ckpt_auto");
1004 let mut p = Pager::create(&path).unwrap();
1005
1006 // Each commit appends: 1 dirty data frame + 1 commit frame for
1007 // page 0 = 2 frames. So ceil(THRESHOLD / 2) commits gets us past
1008 // the trigger.
1009 let commits_needed = AUTO_CHECKPOINT_THRESHOLD_FRAMES.div_ceil(2);
1010 for i in 0..commits_needed {
1011 p.stage_page(2, make_page((i & 0xff) as u8));
1012 p.commit(DbHeader {
1013 page_count: 3,
1014 schema_root_page: 1,
1015 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1016 freelist_head: 0,
1017 })
1018 .unwrap();
1019 }
1020
1021 // Auto-checkpoint must have fired at least once during that loop.
1022 let wal_len = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1023 assert_eq!(
1024 wal_len, WAL_HEADER_SIZE as u64,
1025 "auto-checkpoint should have truncated the WAL"
1026 );
1027
1028 // Last committed byte for page 2 is the latest (commits_needed - 1 & 0xff).
1029 let expected = ((commits_needed - 1) & 0xff) as u8;
1030 assert_eq!(p.read_page(2).unwrap()[0], expected);
1031
1032 cleanup(&path);
1033 }
1034
1035 // -------------------------------------------------------------------
1036 // Phase 4e — shared/exclusive lock modes
1037 // -------------------------------------------------------------------
1038
1039 #[test]
1040 fn two_read_only_openers_coexist() {
1041 // Phase 4e: multiple read-only openers take shared locks and
1042 // must not exclude each other.
1043 let path = tmp_path("ro_coexist");
1044 {
1045 let mut p = Pager::create(&path).unwrap();
1046 p.stage_page(2, make_page(0x55));
1047 p.commit(DbHeader {
1048 page_count: 3,
1049 schema_root_page: 1,
1050 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1051 freelist_head: 0,
1052 })
1053 .unwrap();
1054 }
1055
1056 let reader1 = Pager::open_read_only(&path).unwrap();
1057 let reader2 = Pager::open_read_only(&path).unwrap();
1058 // Both see the committed content.
1059 assert_eq!(reader1.read_page(2).unwrap()[0], 0x55);
1060 assert_eq!(reader2.read_page(2).unwrap()[0], 0x55);
1061 assert_eq!(reader1.access_mode(), AccessMode::ReadOnly);
1062
1063 cleanup(&path);
1064 }
1065
1066 #[test]
1067 fn read_write_blocks_read_only_and_vice_versa() {
1068 // A live exclusive lock blocks a shared-lock open, and a live
1069 // shared lock blocks an exclusive-lock open. Both error messages
1070 // mention that the database is in use.
1071 let path = tmp_path("rw_vs_ro");
1072 let _writer = Pager::create(&path).unwrap();
1073
1074 // Writer holds LOCK_EX — reader can't take LOCK_SH.
1075 let reader_attempt = Pager::open_read_only(&path);
1076 assert!(reader_attempt.is_err());
1077 let msg = format!("{}", reader_attempt.unwrap_err());
1078 assert!(
1079 msg.contains("locked for writing"),
1080 "read-only open while writer holds lock should mention writer; got: {msg}"
1081 );
1082
1083 drop(_writer);
1084
1085 // Now a reader comes in; a second read-write must be rejected.
1086 let _reader = Pager::open_read_only(&path).unwrap();
1087 let writer_attempt = Pager::open(&path);
1088 assert!(writer_attempt.is_err());
1089 let msg = format!("{}", writer_attempt.unwrap_err());
1090 assert!(
1091 msg.contains("in use"),
1092 "read-write open while reader holds lock should mention contention; got: {msg}"
1093 );
1094
1095 cleanup(&path);
1096 }
1097
1098 #[test]
1099 fn read_only_pager_rejects_mutations() {
1100 let path = tmp_path("ro_rejects");
1101 {
1102 // Seed with some content so an RO open has something to read.
1103 let mut p = Pager::create(&path).unwrap();
1104 p.stage_page(2, make_page(0x33));
1105 p.commit(DbHeader {
1106 page_count: 3,
1107 schema_root_page: 1,
1108 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1109 freelist_head: 0,
1110 })
1111 .unwrap();
1112 }
1113
1114 let mut ro = Pager::open_read_only(&path).unwrap();
1115 let commit_err = ro
1116 .commit(DbHeader {
1117 page_count: 3,
1118 schema_root_page: 1,
1119 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1120 freelist_head: 0,
1121 })
1122 .unwrap_err();
1123 assert!(
1124 format!("{commit_err}").contains("read-only"),
1125 "commit on RO pager should surface 'read-only'; got: {commit_err}"
1126 );
1127 let ckpt_err = ro.checkpoint().unwrap_err();
1128 assert!(
1129 format!("{ckpt_err}").contains("read-only"),
1130 "checkpoint on RO pager should surface 'read-only'; got: {ckpt_err}"
1131 );
1132
1133 // Reads still work.
1134 assert_eq!(ro.read_page(2).unwrap()[0], 0x33);
1135
1136 cleanup(&path);
1137 }
1138
1139 #[test]
1140 fn read_only_open_without_wal_sidecar_succeeds() {
1141 // A file-backed DB whose -wal sidecar was deleted (or a Phase-
1142 // 4a-vintage file predating Phase 4c) must still be openable
1143 // read-only. The Pager serves reads straight from on_disk with
1144 // an empty wal_cache.
1145 let path = tmp_path("ro_no_wal");
1146 {
1147 let mut p = Pager::create(&path).unwrap();
1148 p.stage_page(2, make_page(0x44));
1149 p.commit(DbHeader {
1150 page_count: 3,
1151 schema_root_page: 1,
1152 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1153 freelist_head: 0,
1154 })
1155 .unwrap();
1156 // Force the WAL into the main file before we nuke it.
1157 p.checkpoint().unwrap();
1158 }
1159 // Nuke the sidecar.
1160 std::fs::remove_file(wal_path_for(&path)).unwrap();
1161
1162 let ro = Pager::open_read_only(&path).unwrap();
1163 assert_eq!(ro.read_page(2).unwrap()[0], 0x44);
1164 // No WAL materialized by a read-only open.
1165 assert!(!wal_path_for(&path).exists());
1166 cleanup(&path);
1167 }
1168
1169 #[test]
1170 fn reopen_after_crash_between_data_write_and_header_write_recovers_via_wal() {
1171 // Simulates a crash between step 2 (data-page fsync) and step 3
1172 // (header write) of `checkpoint`: the main file has new data
1173 // pages but still carries the old header, AND the WAL still
1174 // holds every committed frame. Next open must reconstruct the
1175 // post-commit view via the WAL (wal_cache[0] overrides the stale
1176 // main-file header).
1177 use std::io::{Seek, SeekFrom, Write};
1178
1179 let path = tmp_path("ckpt_crash_mid_flush");
1180 {
1181 let mut p = Pager::create(&path).unwrap();
1182 p.stage_page(2, make_page(0xEE));
1183 p.commit(DbHeader {
1184 page_count: 3,
1185 schema_root_page: 1,
1186 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1187 freelist_head: 0,
1188 })
1189 .unwrap();
1190 // Manually write the committed page 2 into the main file at
1191 // offset 2*PAGE_SIZE to simulate the first half of a
1192 // checkpoint that only got as far as step 2. The header
1193 // stays at the pre-commit state (page_count=2 from create).
1194 // Drop the pager first so its exclusive lock releases.
1195 }
1196 {
1197 let mut f = std::fs::OpenOptions::new().write(true).open(&path).unwrap();
1198 f.seek(SeekFrom::Start(2 * PAGE_SIZE as u64)).unwrap();
1199 f.write_all(&make_page(0xEE)).unwrap();
1200 f.sync_all().unwrap();
1201 // NB: we didn't extend the file past its original length in
1202 // the create-only state; the write_all grew it implicitly.
1203 // The header at offset 0 is still the original "page_count=2".
1204 }
1205
1206 // Reopen. Main-file header says 2 pages; WAL replay should
1207 // override that to 3, and wal_cache[2] should shadow whatever
1208 // the main file now holds for page 2 (which happens to be the
1209 // same byte here — the point is the Pager doesn't depend on
1210 // that coincidence).
1211 let p2 = Pager::open(&path).unwrap();
1212 assert_eq!(p2.header().page_count, 3);
1213 assert_eq!(p2.read_page(2).unwrap()[0], 0xEE);
1214 cleanup(&path);
1215 }
1216
1217 #[test]
1218 fn auto_checkpoint_crosses_threshold_mid_loop() {
1219 // Pins the exact-threshold semantics: `commit` must trigger a
1220 // checkpoint as soon as the WAL's frame count hits the threshold,
1221 // not later. Catches a regression where someone accidentally
1222 // lowers it to `>` or bumps it into a different accounting.
1223 let path = tmp_path("ckpt_threshold_crossing");
1224 let mut p = Pager::create(&path).unwrap();
1225 let commits_to_cross = AUTO_CHECKPOINT_THRESHOLD_FRAMES.div_ceil(2);
1226 for i in 0..commits_to_cross - 1 {
1227 p.stage_page(2, make_page((i & 0xff) as u8));
1228 p.commit(DbHeader {
1229 page_count: 3,
1230 schema_root_page: 1,
1231 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1232 freelist_head: 0,
1233 })
1234 .unwrap();
1235 }
1236 // One short of the threshold — WAL must not yet have been flushed.
1237 let pre = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1238 assert!(
1239 pre > crate::sql::pager::wal::WAL_HEADER_SIZE as u64,
1240 "WAL should still carry frames right before the crossing commit"
1241 );
1242
1243 // The crossing commit: this one's the trigger.
1244 p.stage_page(2, make_page(0xff));
1245 p.commit(DbHeader {
1246 page_count: 3,
1247 schema_root_page: 1,
1248 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1249 freelist_head: 0,
1250 })
1251 .unwrap();
1252 let post = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1253 assert_eq!(
1254 post,
1255 crate::sql::pager::wal::WAL_HEADER_SIZE as u64,
1256 "WAL must be header-only right after the threshold-crossing commit"
1257 );
1258
1259 cleanup(&path);
1260 }
1261}