sqlrite/sql/pager/pager.rs
1//! Long-lived page cache + WAL-backed commits.
2//!
3//! A `Pager` wraps an open `.sqlrite` file plus its `-wal` sidecar. It owns
4//! three maps of page bytes:
5//!
6//! - `on_disk`: snapshot of the main file as last checkpointed. Frozen
7//! across regular commits — the main file is only rewritten
8//! when the checkpointer (Phase 4d) runs.
9//! - `wal_cache`: latest committed body for each page that has been
10//! appended to the WAL since the last checkpoint. Populated
11//! at open by replaying the WAL, and kept in lockstep with
12//! each successful `commit`.
13//! - `staged`: pages queued for the next commit, not yet in the WAL.
14//!
15//! **Read precedence.** `read_page` consults `staged → wal_cache → on_disk`,
16//! so both uncommitted writes and WAL-resident committed writes shadow the
17//! frozen main file. A bounds check against `current_header.page_count`
18//! hides pages that have been logically truncated by a shrink-commit even
19//! though their bytes are still present in `on_disk` (the real truncation
20//! waits for the checkpointer).
21//!
22//! **Commit flow.** `commit` compares each staged page against the
23//! effective committed state (wal_cache layered on on_disk) and appends a
24//! WAL frame only for pages whose bytes actually differ. A final "commit"
25//! frame for page 0 carries the new encoded header and the post-commit
26//! page count in its `commit_page_count` field. That frame is fsync'd.
27//! The main file is not touched.
28//!
29//! **Checkpoint flow (Phase 4d).** When the WAL accumulates past
30//! `AUTO_CHECKPOINT_THRESHOLD_FRAMES` frames (tracked on `Wal`), `commit`
31//! opportunistically folds them back into the main file: write every
32//! WAL-resident page at its proper offset, overwrite the main-file
33//! header, truncate the file to `page_count * PAGE_SIZE` bytes, `fsync`,
34//! then `Wal::truncate` the sidecar (which rolls the salt so any stale
35//! tail bytes from the old generation can't be misread as valid). Reads
36//! stay consistent if a crash hits mid-checkpoint — the WAL still holds
37//! the authoritative bytes until its header is rewritten, and the
38//! checkpointer is idempotent, so rerunning is safe.
39//!
40//! This matters because higher layers re-serialize the entire database on
41//! every auto-save. Without the diff, even a one-row UPDATE would append a
42//! frame for every page of every table. With the diff, unchanged tables —
43//! whose encoded pages hash identically across saves — simply stay out of
44//! the WAL.
45//!
46//! **Locking (Phase 4a → 4e).** Every `Pager` takes an advisory lock on
47//! its main file and on its WAL sidecar. The mode is driven by
48//! [`AccessMode`]:
49//!
50//! - `ReadWrite` → `flock(LOCK_EX)` — one writer, no other openers.
51//! - `ReadOnly` → `flock(LOCK_SH)` — multiple readers coexist; any writer
52//! is excluded.
53//!
54//! Both locks are tied to their file descriptors and release
55//! automatically when the `Pager` drops. On collision the opener gets
56//! a clean typed error rather than racing silently. POSIX flock is
57//! "multiple readers OR one writer", not both — true concurrent
58//! reader-and-writer access would need a shared-memory coordination
59//! file and read marks, which is not on the roadmap.
60
61use std::collections::HashMap;
62use std::fs::{File, OpenOptions};
63use std::path::{Path, PathBuf};
64
65use crate::error::{Result, SQLRiteError};
66use crate::sql::pager::file::FileStorage;
67use crate::sql::pager::header::{DbHeader, decode_header, encode_header};
68use crate::sql::pager::page::PAGE_SIZE;
69use crate::sql::pager::wal::Wal;
70
71/// Returns the WAL sidecar path for a main `.sqlrite` file: appends
72/// the `-wal` suffix to the full path (so `foo.sqlrite` pairs with
73/// `foo.sqlrite-wal`). Matches SQLite's convention.
74pub(crate) fn wal_path_for(main: &Path) -> PathBuf {
75 let mut os = main.as_os_str().to_owned();
76 os.push("-wal");
77 PathBuf::from(os)
78}
79
80/// How a `Pager` (or `Wal`) intends to use the file: mutating writes vs.
81/// consistent-snapshot reads. Drives the OS-level lock mode, and the
82/// Pager uses it to reject mutation attempts on read-only openers.
83///
84/// - `ReadWrite` takes `flock(LOCK_EX)` — one writer, no other openers.
85/// - `ReadOnly` takes `flock(LOCK_SH)` — multiple readers can coexist;
86/// a writer is excluded.
87///
88/// This is POSIX-flock semantics, so "multiple readers AND one writer"
89/// isn't supported yet. True concurrent reader-writer access would need
90/// a shared-memory coordination file and read marks — that's deferred.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum AccessMode {
93 ReadWrite,
94 ReadOnly,
95}
96
97/// Acquires an advisory lock on `file`, mapping the OS-level "lock
98/// held" error to a clean SQLRite error. `Exclusive` on Unix is
99/// `flock(LOCK_EX | LOCK_NB)`; `Shared` is `flock(LOCK_SH | LOCK_NB)`.
100/// On Windows, `LockFileEx` with the corresponding flags.
101///
102/// We call fs2's trait methods fully qualified because `std::fs::File`
103/// gained its own `try_lock_*` inherent methods in Rust 1.84 with a
104/// different error type — qualifying nails down which one we mean.
105#[cfg(feature = "file-locks")]
106pub(crate) fn acquire_lock(file: &File, path: &Path, mode: AccessMode) -> Result<()> {
107 let res = match mode {
108 AccessMode::ReadWrite => fs2::FileExt::try_lock_exclusive(file),
109 AccessMode::ReadOnly => fs2::FileExt::try_lock_shared(file),
110 };
111 res.map_err(|e| {
112 let how = match mode {
113 AccessMode::ReadWrite => {
114 "is in use (another process has it open; readers and writers are exclusive)"
115 }
116 AccessMode::ReadOnly => {
117 "is locked for writing by another process (read-only open blocked until the writer closes)"
118 }
119 };
120 SQLRiteError::General(format!(
121 "database '{}' {how} ({e})",
122 path.display()
123 ))
124 })
125}
126
127/// No-op variant for builds without the `file-locks` feature (most
128/// notably the WASM SDK, where `fs2` doesn't compile against
129/// wasm32-unknown-unknown). The Pager still refuses to touch a
130/// read-only open via `AccessMode`, but there's no OS-level
131/// multi-process coordination — the caller is trusted to avoid
132/// conflicting opens. Fine for WASM, where file-backed opens
133/// aren't exposed in the MVP anyway.
134#[cfg(not(feature = "file-locks"))]
135pub(crate) fn acquire_lock(_file: &File, _path: &Path, _mode: AccessMode) -> Result<()> {
136 Ok(())
137}
138
139/// How many WAL frames may accumulate between auto-checkpoints before
140/// `commit` opportunistically folds them back into the main file. Kept
141/// low enough that the WAL stays bounded on write-heavy workloads;
142/// high enough that small bursts don't thrash the main file. SQLite
143/// defaults to 1000; our target DBs are smaller so 100 is plenty.
144const AUTO_CHECKPOINT_THRESHOLD_FRAMES: usize = 100;
145
146pub struct Pager {
147 /// Main-file I/O handle. Regular commits leave it alone; the
148 /// checkpointer writes accumulated WAL pages back here.
149 storage: FileStorage,
150 current_header: DbHeader,
151 /// Byte snapshot of the main file as last checkpointed. The
152 /// checkpointer is the only thing that mutates it.
153 on_disk: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
154 /// Pages queued for the next commit. `commit` drains this.
155 staged: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
156 /// The committed WAL's view of each page. Populated at open by
157 /// replaying the log, and kept in sync with each successful commit.
158 /// Layered on top of `on_disk` for read resolution.
159 wal_cache: HashMap<u32, Box<[u8; PAGE_SIZE]>>,
160 /// Write-ahead log sidecar. Present on a read-write Pager; `None`
161 /// on a read-only Pager that either found no WAL on disk or doesn't
162 /// retain the handle after initial replay. Reads consult
163 /// `wal_cache` (already populated at open) either way.
164 wal: Option<Wal>,
165 /// `ReadWrite` allows `commit` / `checkpoint`; `ReadOnly` rejects
166 /// them with a typed error. `stage_page` stays open on both modes
167 /// (it only touches the in-memory `staged` map) — any staged bytes
168 /// simply never reach disk on a read-only Pager because `commit` is
169 /// the gate.
170 access_mode: AccessMode,
171}
172
173impl Pager {
174 /// Opens an existing database file for read-write access. Shorthand
175 /// for [`Pager::open_with_mode`] with [`AccessMode::ReadWrite`].
176 pub fn open(path: &Path) -> Result<Self> {
177 Self::open_with_mode(path, AccessMode::ReadWrite)
178 }
179
180 /// Opens an existing database file for read-only access — takes
181 /// a shared advisory lock that coexists with other readers but is
182 /// excluded by any writer. `commit` and `checkpoint` return a clean
183 /// error rather than panic; `stage_page` stays a no-op-to-disk
184 /// (bytes sit in the in-memory `staged` map that `commit` would
185 /// have drained).
186 ///
187 /// If the WAL sidecar doesn't exist, the open succeeds with an
188 /// empty `wal_cache` — a read-only caller can't materialize a
189 /// sidecar on its own, and a DB that never had WAL writes is fine
190 /// to read straight from the main file.
191 pub fn open_read_only(path: &Path) -> Result<Self> {
192 Self::open_with_mode(path, AccessMode::ReadOnly)
193 }
194
195 /// Opens an existing database file with the given access mode.
196 /// Loads every main-file page into `on_disk`, then opens the WAL
197 /// sidecar (read-only mode uses a shared lock and skips sidecar
198 /// creation; read-write creates the sidecar if missing) and layers
199 /// committed frames into `wal_cache`.
200 pub fn open_with_mode(path: &Path, mode: AccessMode) -> Result<Self> {
201 let file = match mode {
202 AccessMode::ReadWrite => OpenOptions::new().read(true).write(true).open(path)?,
203 AccessMode::ReadOnly => OpenOptions::new().read(true).open(path)?,
204 };
205 acquire_lock(&file, path, mode)?;
206 let mut storage = FileStorage::new(file);
207 let mut header = storage.read_header()?;
208
209 let mut on_disk = HashMap::with_capacity(header.page_count.saturating_sub(1) as usize);
210 // page 0 is the header itself; regular pages live at 1..page_count.
211 for page_num in 1..header.page_count {
212 let buf = read_raw_page(&mut storage, page_num)?;
213 on_disk.insert(page_num, buf);
214 }
215
216 let wal_path = wal_path_for(path);
217 let (wal_handle, wal_cache) = match mode {
218 AccessMode::ReadWrite => {
219 // Create the sidecar if it's missing — a pre-Phase-4c
220 // file or a DB that was hand-deleted down to just the
221 // main file both need a fresh empty WAL to be writable.
222 let mut wal = if wal_path.exists() {
223 Wal::open_with_mode(&wal_path, mode)?
224 } else {
225 Wal::create(&wal_path)?
226 };
227 let mut cache: HashMap<u32, Box<[u8; PAGE_SIZE]>> = HashMap::new();
228 wal.load_committed_into(&mut cache)?;
229 (Some(wal), cache)
230 }
231 AccessMode::ReadOnly => {
232 // Read-only mustn't create files. If the sidecar is
233 // absent, treat the WAL as empty and serve reads from
234 // the main file alone.
235 if wal_path.exists() {
236 let mut wal = Wal::open_with_mode(&wal_path, mode)?;
237 let mut cache: HashMap<u32, Box<[u8; PAGE_SIZE]>> = HashMap::new();
238 wal.load_committed_into(&mut cache)?;
239 // We don't need to retain the WAL handle in
240 // read-only mode — the cache is all reads need and
241 // dropping the handle releases the shared lock on
242 // the sidecar early. Keep it, though, so the lock
243 // spans the whole Pager lifetime: a checkpointer
244 // process grabbing LOCK_EX on the WAL while our
245 // reader still has wal_cache loaded would be
246 // correct for reads but surprising semantically.
247 (Some(wal), cache)
248 } else {
249 (None, HashMap::new())
250 }
251 }
252 };
253
254 // If the WAL committed a new page 0, that frame's body is the
255 // up-to-date header — decode it and let it override what the
256 // main file's stale header says.
257 if let Some(page0) = wal_cache.get(&0) {
258 header = decode_header(page0.as_ref())?;
259 } else if let Some(w) = wal_handle.as_ref()
260 && let Some(committed_pc) = w.last_commit_page_count()
261 {
262 // Belt-and-suspenders: even if the latest commit frame didn't
263 // land on page 0 (shouldn't happen under the current commit
264 // layout, but keeps us correct if that ever changes), trust
265 // its page count.
266 header.page_count = committed_pc;
267 }
268
269 Ok(Self {
270 storage,
271 current_header: header,
272 on_disk,
273 staged: HashMap::new(),
274 wal_cache,
275 wal: wal_handle,
276 access_mode: mode,
277 })
278 }
279
280 /// Creates a fresh database file. Page 0 is the header; page 1 is an
281 /// empty `TableLeaf` that serves as the initial `sqlrite_master` root
282 /// (zero rows, no user tables yet). A matching empty WAL sidecar is
283 /// created alongside it — any pre-existing WAL at the target path is
284 /// truncated.
285 pub fn create(path: &Path) -> Result<Self> {
286 use crate::sql::pager::page::{PAGE_HEADER_SIZE, PageType};
287 use crate::sql::pager::table_page::TablePage;
288
289 let file = OpenOptions::new()
290 .read(true)
291 .write(true)
292 .create(true)
293 .truncate(true)
294 .open(path)?;
295 acquire_lock(&file, path, AccessMode::ReadWrite)?;
296 let mut storage = FileStorage::new(file);
297
298 let empty_master = TablePage::empty();
299 let mut page1 = Box::new([0u8; PAGE_SIZE]);
300 page1[0] = PageType::TableLeaf as u8;
301 page1[1..5].copy_from_slice(&0u32.to_le_bytes());
302 page1[5..7].copy_from_slice(&0u16.to_le_bytes());
303 page1[PAGE_HEADER_SIZE..].copy_from_slice(empty_master.as_bytes());
304
305 let header = DbHeader {
306 page_count: 2,
307 schema_root_page: 1,
308 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
309 freelist_head: 0,
310 };
311
312 // Write the file synchronously so the initial create is durable and
313 // subsequent `Pager::open` calls see a valid header + page 1.
314 storage.seek_to(0)?;
315 storage.write_all(&encode_header(&header))?;
316 storage.write_all(page1.as_ref())?;
317 storage.flush()?;
318
319 // Sidecar WAL — fresh, no frames yet.
320 let wal = Wal::create(&wal_path_for(path))?;
321
322 let mut on_disk = HashMap::new();
323 on_disk.insert(1, page1);
324
325 Ok(Self {
326 storage,
327 current_header: header,
328 on_disk,
329 staged: HashMap::new(),
330 wal_cache: HashMap::new(),
331 wal: Some(wal),
332 access_mode: AccessMode::ReadWrite,
333 })
334 }
335
336 pub fn header(&self) -> DbHeader {
337 self.current_header
338 }
339
340 /// Returns the mode this Pager was opened in. Callers can use this
341 /// to bail out of a write path earlier than the Pager itself would.
342 pub fn access_mode(&self) -> AccessMode {
343 self.access_mode
344 }
345
346 fn require_writable(&self, op: &'static str) -> Result<()> {
347 if self.access_mode == AccessMode::ReadOnly {
348 return Err(SQLRiteError::General(format!(
349 "cannot {op}: database is opened read-only"
350 )));
351 }
352 Ok(())
353 }
354
355 /// Reads a page, preferring staged content, then the WAL-committed
356 /// overlay, then the frozen main-file snapshot. Returns `None` for
357 /// pages beyond the current page count (pages that have been logically
358 /// truncated by a shrink-commit stay in `on_disk` until checkpoint,
359 /// but a bounds check hides them from readers).
360 pub fn read_page(&self, page_num: u32) -> Option<&[u8; PAGE_SIZE]> {
361 // Staged pages are "the future" and should always shadow everything
362 // else, even pages we're about to extend beyond the old page count.
363 if let Some(b) = self.staged.get(&page_num) {
364 return Some(b);
365 }
366 // A page that's been logically dropped shouldn't be readable even
367 // if its bytes linger in on_disk until the next checkpoint.
368 if page_num >= self.current_header.page_count {
369 return None;
370 }
371 if let Some(b) = self.wal_cache.get(&page_num) {
372 return Some(b.as_ref());
373 }
374 self.on_disk.get(&page_num).map(|b| b.as_ref())
375 }
376
377 /// Queues `bytes` as the new content of page `page_num`. The write only
378 /// reaches disk when `commit` is called.
379 pub fn stage_page(&mut self, page_num: u32, bytes: [u8; PAGE_SIZE]) {
380 self.staged.insert(page_num, Box::new(bytes));
381 }
382
383 /// Discards all staged pages. Useful when beginning a new full re-save
384 /// from scratch; the higher layer can also just overwrite pages without
385 /// clearing since `stage_page` replaces.
386 pub fn clear_staged(&mut self) {
387 self.staged.clear();
388 }
389
390 /// Phase 11.9 — appends an MVCC commit-batch frame to the WAL
391 /// without fsync. The next legacy page-commit's fsync covers it,
392 /// so callers should follow this with `Pager::commit` (or
393 /// `pager::save_database`, which calls into it) to seal the
394 /// transaction. See [`crate::sql::pager::wal::Wal::append_mvcc_batch`]
395 /// for the durability story.
396 ///
397 /// `Connection::commit_concurrent` is the only caller today; it
398 /// invokes this after validation passes but before the legacy
399 /// save so a single fsync covers both the MVCC log record and
400 /// the page-level updates.
401 pub fn append_mvcc_batch(&mut self, batch: &crate::mvcc::MvccCommitBatch) -> Result<()> {
402 self.require_writable("append_mvcc_batch")?;
403 let wal = self
404 .wal
405 .as_mut()
406 .expect("read-write Pager must carry a WAL handle");
407 wal.append_mvcc_batch(batch)
408 }
409
410 /// Phase 11.9 — MVCC commit batches recovered from the WAL at
411 /// open time, in commit order. Empty for fresh databases, for
412 /// pre-11.9 (v1 / v2) WALs that carry no MVCC frames, and for
413 /// read-only opens that didn't replay (those still get the
414 /// batches if the WAL had any — replay is unconditional in
415 /// `Wal::open_with_mode`).
416 ///
417 /// The caller (`pager::open_database`) drains this into
418 /// `Database::mv_store` so the conflict-detection window
419 /// survives a process restart.
420 pub fn recovered_mvcc_commits(&self) -> &[crate::mvcc::MvccCommitBatch] {
421 match self.wal.as_ref() {
422 Some(wal) => wal.recovered_mvcc_commits(),
423 None => &[],
424 }
425 }
426
427 /// Phase 11.9 — returns the persisted MVCC clock high-water
428 /// from the WAL header, or 0 for in-memory / no-WAL opens. The
429 /// open path uses this to seed [`crate::mvcc::MvccClock`] so
430 /// post-reopen transactions don't hand out timestamps below
431 /// `max(committed_ts)`.
432 pub fn clock_high_water(&self) -> u64 {
433 self.wal.as_ref().map(|w| w.clock_high_water()).unwrap_or(0)
434 }
435
436 /// Phase 11.9 — promotes the WAL header's `clock_high_water` to
437 /// `value` if it would advance. No-op if `value` is at or below
438 /// the current high-water mark. Persists the new value into
439 /// the WAL header (which an fsync at checkpoint will flush).
440 ///
441 /// Called by `Connection::commit_concurrent` after each MVCC
442 /// commit so a crash between commits and the next checkpoint
443 /// leaves enough of the clock persisted that replay seeds
444 /// `MvccClock` correctly.
445 pub fn observe_clock_high_water(&mut self, value: u64) -> Result<()> {
446 if let Some(wal) = self.wal.as_mut() {
447 if value > wal.clock_high_water() {
448 wal.set_clock_high_water(value)?;
449 }
450 }
451 Ok(())
452 }
453
454 /// Commits all staged pages into the WAL. Only pages whose bytes differ
455 /// from the effective committed state (wal_cache layered on on_disk)
456 /// produce frames. A final commit frame carries the new page 0 (encoded
457 /// header) and is fsync'd; that seals the transaction. The main file is
458 /// left untouched — it only changes when the checkpointer (Phase 4d)
459 /// runs.
460 ///
461 /// Returns the number of dirty *data* frames appended (excluding the
462 /// implicit page-0 commit frame that's always written).
463 pub fn commit(&mut self, new_header: DbHeader) -> Result<usize> {
464 self.require_writable("commit")?;
465 let wal = self
466 .wal
467 .as_mut()
468 .expect("read-write Pager must carry a WAL handle");
469
470 // Decide which staged pages carry bytes that aren't already live.
471 // Effective committed state = wal_cache overlaid on on_disk.
472 let staged = std::mem::take(&mut self.staged);
473 let mut dirty: Vec<(u32, Box<[u8; PAGE_SIZE]>)> = staged
474 .into_iter()
475 .filter(|(n, bytes)| {
476 let existing = self.wal_cache.get(n).or_else(|| self.on_disk.get(n));
477 match existing {
478 Some(e) => e.as_ref() != bytes.as_ref(),
479 None => true,
480 }
481 })
482 .collect();
483 // Append in ascending page order so the log replays deterministically
484 // and sequential reads during checkpoint stay sequential.
485 dirty.sort_by_key(|(n, _)| *n);
486 let writes = dirty.len();
487
488 for (n, bytes) in &dirty {
489 wal.append_frame(*n, bytes.as_ref(), None)?;
490 }
491
492 // Seal the transaction. The commit frame carries the new page 0
493 // (encoded header) in its body and the new page count in its
494 // commit_page_count field — together they're the single atomic
495 // record that says "this is the new committed state".
496 let page0 = encode_header(&new_header);
497 wal.append_frame(0, &page0, Some(new_header.page_count))?;
498 let frame_count_after_commit = wal.frame_count();
499
500 // Promote every frame we just wrote into wal_cache so subsequent
501 // reads see the latest committed bytes without touching the WAL.
502 for (n, bytes) in dirty {
503 self.wal_cache.insert(n, bytes);
504 }
505 self.wal_cache.insert(0, Box::new(page0));
506
507 self.current_header = new_header;
508
509 // Keep the WAL bounded. Under write-heavy load, un-flushed frames
510 // accumulate; past the threshold we fold them back into the main
511 // file opportunistically so open doesn't have to replay an
512 // arbitrarily long log on the next start.
513 if frame_count_after_commit >= AUTO_CHECKPOINT_THRESHOLD_FRAMES {
514 self.checkpoint()?;
515 }
516
517 Ok(writes)
518 }
519
520 /// Folds all WAL-resident pages back into the main file and truncates
521 /// the WAL. Returns the number of data pages written to the main
522 /// file (excludes the header).
523 ///
524 /// **Crash safety — two fsync barriers.** The main-file writes happen
525 /// in two phases separated by a barrier, matching SQLite's checkpoint
526 /// ordering:
527 ///
528 /// 1. Write every `wal_cache` data page at its `page_num * PAGE_SIZE`
529 /// offset in the main file.
530 /// 2. **`fsync`** — force those data pages to stable storage *before*
531 /// the header publishes the new state. Without this barrier, a
532 /// filesystem or disk-cache reordering could land the header first,
533 /// leaving a main file that claims "N pages" over stale data.
534 /// 3. Rewrite the main-file header at offset 0. This is the
535 /// checkpoint's "commit point" — after it hits disk the main file
536 /// alone tells the truth.
537 /// 4. `set_len` shrinks the tail if `page_count` dropped.
538 /// 5. **`fsync`** — force the header + set_len durable.
539 /// 6. `Wal::truncate` resets the sidecar (rolls salt, writes new
540 /// header, fsync). Running this *after* the main file is fully
541 /// durable means a crash between 5 and 6 leaves a stale WAL over a
542 /// current main file; readers still see the right bytes because
543 /// wal_cache (replayed from the stale WAL on next open) would be
544 /// byte-identical to what's in the main file. A retry of
545 /// `checkpoint` then truncates cleanly.
546 ///
547 /// A crash between 1 and 2 can leave partial data-page writes, but
548 /// since the header hasn't moved yet, the main file still reads as
549 /// its pre-checkpoint self — the WAL is intact and authoritative,
550 /// and a retry rewrites the same bytes.
551 pub fn checkpoint(&mut self) -> Result<usize> {
552 self.require_writable("checkpoint")?;
553 // `require_writable` already guaranteed we're ReadWrite; in
554 // ReadWrite mode `wal` is always `Some` (it's only `None` for
555 // ReadOnly opens of a DB that had no sidecar on disk).
556 let wal_frame_count = self.wal.as_ref().map(|w| w.frame_count()).unwrap_or(0);
557
558 // Nothing to flush? Skip the fsyncs and get out.
559 if wal_frame_count == 0 && self.wal_cache.is_empty() {
560 return Ok(0);
561 }
562
563 // Step 1 — write every WAL-resident data page to the main file.
564 // Page 0 (header) is handled separately via write_header, and any
565 // pages past the new page count are skipped here (set_len will
566 // drop them when the file shrinks).
567 let page_count = self.current_header.page_count;
568 let mut pages: Vec<u32> = self
569 .wal_cache
570 .keys()
571 .copied()
572 .filter(|&n| n != 0 && n < page_count)
573 .collect();
574 pages.sort_unstable();
575 let written = pages.len();
576 for page_num in &pages {
577 let bytes = self
578 .wal_cache
579 .get(page_num)
580 .expect("iterated key must resolve");
581 self.storage
582 .seek_to((*page_num as u64) * (PAGE_SIZE as u64))?;
583 self.storage.write_all(bytes.as_ref())?;
584 }
585
586 // Step 2 — first durability barrier. Data pages must hit stable
587 // storage before the header publishes the new page count /
588 // schema root, or a reordered writeback could expose a
589 // half-migrated file on crash.
590 if written > 0 {
591 self.storage.flush()?;
592 }
593
594 // Step 3 — rewrite the main-file header. This is the checkpoint's
595 // atomic record.
596 self.storage.write_header(&self.current_header)?;
597
598 // Step 4 — shrink the main file if the committed page count is
599 // smaller than what the file physically holds.
600 self.storage.truncate_to_pages(page_count)?;
601
602 // Step 5 — second durability barrier. Makes header + set_len
603 // durable together before we touch the WAL.
604 self.storage.flush()?;
605
606 // Step 6 — reset the WAL sidecar. Runs before the in-memory
607 // cache swap so that if `wal.truncate` fails (disk full, EIO)
608 // we leave the in-memory state untouched rather than having
609 // wal_cache empty + on_disk updated + WAL un-truncated, which
610 // the Pager can't easily recover from on its own. Here a
611 // failure means the main file is already consistent on disk
612 // (steps 2 + 5 fsynced it); we just leave the stale WAL in
613 // place for the next checkpoint attempt.
614 self.wal
615 .as_mut()
616 .expect("read-write Pager must carry a WAL handle")
617 .truncate()?;
618
619 // Promote wal_cache into on_disk and drop everything that's no
620 // longer live. Page 0 is special — it's never materialized in
621 // on_disk (we read it lazily via storage.read_header on open).
622 for (n, bytes) in self.wal_cache.drain().filter(|(n, _)| *n != 0) {
623 if n < page_count {
624 self.on_disk.insert(n, bytes);
625 }
626 }
627 self.on_disk.retain(|&n, _| n < page_count);
628
629 Ok(written)
630 }
631}
632
633fn read_raw_page(storage: &mut FileStorage, page_num: u32) -> Result<Box<[u8; PAGE_SIZE]>> {
634 storage.seek_to((page_num as u64) * (PAGE_SIZE as u64))?;
635 let mut buf = Box::new([0u8; PAGE_SIZE]);
636 storage.read_exact(buf.as_mut())?;
637 Ok(buf)
638}
639
640impl std::fmt::Debug for Pager {
641 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
642 f.debug_struct("Pager")
643 .field("access_mode", &self.access_mode)
644 .field("page_count", &self.current_header.page_count)
645 .field("schema_root_page", &self.current_header.schema_root_page)
646 .field("cached_pages", &self.on_disk.len())
647 .field("staged_pages", &self.staged.len())
648 .field("wal_pages", &self.wal_cache.len())
649 .field(
650 "wal_frames",
651 &self.wal.as_ref().map(|w| w.frame_count()).unwrap_or(0),
652 )
653 .finish()
654 }
655}
656
657#[cfg(test)]
658mod tests {
659 use super::*;
660
661 fn tmp_path(name: &str) -> std::path::PathBuf {
662 let mut p = std::env::temp_dir();
663 let pid = std::process::id();
664 let nanos = std::time::SystemTime::now()
665 .duration_since(std::time::UNIX_EPOCH)
666 .map(|d| d.as_nanos())
667 .unwrap_or(0);
668 p.push(format!("sqlrite-pager-{pid}-{nanos}-{name}.sqlrite"));
669 p
670 }
671
672 /// Remove both the main file and its `-wal` sidecar — leaving either
673 /// behind can destabilize later test runs on the same tmp dir.
674 fn cleanup(path: &Path) {
675 let _ = std::fs::remove_file(path);
676 let _ = std::fs::remove_file(wal_path_for(path));
677 }
678
679 fn make_page(first_byte: u8) -> [u8; PAGE_SIZE] {
680 let mut buf = [0u8; PAGE_SIZE];
681 buf[0] = first_byte;
682 buf
683 }
684
685 #[test]
686 fn create_then_open_round_trips() {
687 let path = tmp_path("create_open");
688 {
689 let p = Pager::create(&path).unwrap();
690 assert_eq!(p.header().page_count, 2);
691 assert_eq!(p.header().schema_root_page, 1);
692 }
693 let p2 = Pager::open(&path).unwrap();
694 assert_eq!(p2.header().page_count, 2);
695 cleanup(&path);
696 }
697
698 #[test]
699 fn create_spawns_wal_sidecar() {
700 // Phase 4c: `Pager::create` must produce an empty WAL sidecar
701 // alongside the main file so the first commit has somewhere to
702 // append frames.
703 use crate::sql::pager::wal::WAL_HEADER_SIZE;
704 let path = tmp_path("wal_sidecar");
705 let _p = Pager::create(&path).unwrap();
706 let wal = wal_path_for(&path);
707 assert!(wal.exists(), "WAL sidecar should exist after create");
708 // An empty WAL is just its header.
709 let len = std::fs::metadata(&wal).unwrap().len();
710 assert_eq!(
711 len, WAL_HEADER_SIZE as u64,
712 "fresh WAL should be header-only"
713 );
714 cleanup(&path);
715 }
716
717 #[test]
718 fn commit_writes_only_dirty_pages() {
719 let path = tmp_path("diff");
720 let mut p = Pager::create(&path).unwrap();
721
722 // Initial state: page 1 is the empty-catalog schema page.
723 // Stage three "table-data" pages.
724 p.stage_page(2, make_page(0xAA));
725 p.stage_page(3, make_page(0xBB));
726 p.stage_page(4, make_page(0xCC));
727 let writes = p
728 .commit(DbHeader {
729 page_count: 5,
730 schema_root_page: 1,
731 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
732 freelist_head: 0,
733 })
734 .unwrap();
735 // 3 dirty data pages (pages 2, 3, 4). The page-0 commit frame is
736 // implicit and not counted.
737 assert_eq!(writes, 3);
738
739 // Re-stage the same bytes for pages 2 and 3, and changed bytes for 4.
740 p.stage_page(2, make_page(0xAA));
741 p.stage_page(3, make_page(0xBB));
742 p.stage_page(4, make_page(0xDD));
743 let writes = p
744 .commit(DbHeader {
745 page_count: 5,
746 schema_root_page: 1,
747 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
748 freelist_head: 0,
749 })
750 .unwrap();
751 assert_eq!(writes, 1, "only the changed page should have been written");
752
753 // Reopen and confirm the content is as expected. The bytes live in
754 // the WAL — the main file still has the empty init state — so this
755 // also verifies the WAL-replay path.
756 drop(p);
757 let p2 = Pager::open(&path).unwrap();
758 assert_eq!(p2.read_page(2).unwrap()[0], 0xAA);
759 assert_eq!(p2.read_page(3).unwrap()[0], 0xBB);
760 assert_eq!(p2.read_page(4).unwrap()[0], 0xDD);
761
762 cleanup(&path);
763 }
764
765 #[test]
766 fn second_pager_on_same_file_is_rejected() {
767 // Phase 4a regression: two simultaneous read-write Pagers against
768 // the same file used to silently race. Now the second one must
769 // error out. Phase 4e reworded the lock-contention message; the
770 // stable substring we assert on is "in use".
771 let path = tmp_path("lock_contention");
772 let _first = Pager::create(&path).unwrap();
773
774 let second = Pager::open(&path);
775 assert!(second.is_err(), "expected lock-contention error, got Ok");
776 let msg = format!("{}", second.unwrap_err());
777 assert!(
778 msg.contains("in use"),
779 "error message should signal lock contention; got: {msg}"
780 );
781
782 // After the first Pager drops, both the main-file and WAL locks
783 // release and a fresh open succeeds — confirming the locks are
784 // tied to Pager lifetime, not leaked across instances.
785 drop(_first);
786 let third = Pager::open(&path);
787 assert!(third.is_ok(), "reopen after drop should succeed: {third:?}");
788
789 cleanup(&path);
790 }
791
792 #[test]
793 fn commit_leaves_main_file_untouched_and_shrink_hides_dropped_pages() {
794 // Phase 4c: commits now go to the WAL; the main file stays frozen
795 // until the checkpointer runs. Page-count shrinks still hide the
796 // logically-dropped pages from readers (via a bounds check in
797 // read_page) even though their bytes linger in the main file.
798 let path = tmp_path("shrink");
799 let mut p = Pager::create(&path).unwrap();
800 let main_size_after_create = std::fs::metadata(&path).unwrap().len();
801
802 p.stage_page(2, make_page(1));
803 p.stage_page(3, make_page(2));
804 p.stage_page(4, make_page(3));
805 p.commit(DbHeader {
806 page_count: 5,
807 schema_root_page: 1,
808 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
809 freelist_head: 0,
810 })
811 .unwrap();
812
813 // Main file unchanged: the page-2..4 bytes went into the WAL.
814 assert_eq!(
815 std::fs::metadata(&path).unwrap().len(),
816 main_size_after_create,
817 "main file must stay frozen across commits"
818 );
819 // WAL, however, has grown: 3 dirty frames + 1 commit frame.
820 let wal_size = std::fs::metadata(wal_path_for(&path)).unwrap().len();
821 assert!(
822 wal_size > 32,
823 "WAL should contain frames after a commit, got size {wal_size}"
824 );
825
826 // Shrink to 3 pages.
827 p.commit(DbHeader {
828 page_count: 3,
829 schema_root_page: 1,
830 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
831 freelist_head: 0,
832 })
833 .unwrap();
834
835 // Page 4 is now logically dropped — read_page hides it.
836 assert!(p.read_page(4).is_none());
837 // And page 2 is still visible under the new count.
838 assert_eq!(p.read_page(2).unwrap()[0], 1);
839
840 // Reopen confirms the committed page count survives.
841 drop(p);
842 let p2 = Pager::open(&path).unwrap();
843 assert_eq!(p2.header().page_count, 3);
844 assert!(p2.read_page(4).is_none());
845
846 cleanup(&path);
847 }
848
849 #[test]
850 fn wal_replay_on_reopen_restores_committed_state() {
851 // End-to-end: do a commit, close, reopen, and verify every staged
852 // page is visible. This is the core Phase 4c promise — committed
853 // writes survive a close/reopen via the WAL even though the main
854 // file wasn't touched.
855 let path = tmp_path("wal_replay");
856 {
857 let mut p = Pager::create(&path).unwrap();
858 p.stage_page(2, make_page(0x11));
859 p.stage_page(3, make_page(0x22));
860 p.commit(DbHeader {
861 page_count: 4,
862 schema_root_page: 1,
863 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
864 freelist_head: 0,
865 })
866 .unwrap();
867 }
868
869 let p2 = Pager::open(&path).unwrap();
870 assert_eq!(p2.header().page_count, 4);
871 assert_eq!(p2.read_page(2).unwrap()[0], 0x11);
872 assert_eq!(p2.read_page(3).unwrap()[0], 0x22);
873 cleanup(&path);
874 }
875
876 #[test]
877 fn orphan_dirty_frame_in_wal_is_invisible_on_reopen() {
878 // Simulates a crash between a dirty frame being written and the
879 // commit frame being appended. The Pager's open-time WAL replay
880 // should not surface the dirty bytes — reads must still return
881 // the previous-committed content.
882 let path = tmp_path("orphan_dirty");
883 {
884 let mut p = Pager::create(&path).unwrap();
885 p.stage_page(2, make_page(0xCC));
886 p.commit(DbHeader {
887 page_count: 3,
888 schema_root_page: 1,
889 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
890 freelist_head: 0,
891 })
892 .unwrap();
893 }
894
895 // Open the WAL directly and append a dirty frame for page 2 with
896 // *different* bytes — no commit frame follows. A later
897 // `Pager::open` must ignore this orphan frame.
898 {
899 let mut w = crate::sql::pager::wal::Wal::open(&wal_path_for(&path)).unwrap();
900 let mut other = Box::new([0u8; PAGE_SIZE]);
901 other[0] = 0x99;
902 w.append_frame(2, &other, None).unwrap();
903 }
904
905 let p = Pager::open(&path).unwrap();
906 assert_eq!(
907 p.read_page(2).unwrap()[0],
908 0xCC,
909 "orphan dirty frame must not shadow the last committed page"
910 );
911 cleanup(&path);
912 }
913
914 #[test]
915 fn two_commits_only_stage_the_delta() {
916 // Diffing vs. the effective state (wal_cache + on_disk) means a
917 // repeated identical commit writes zero dirty data frames. A commit
918 // frame is still appended, but that's implicit.
919 let path = tmp_path("diff_delta");
920 let mut p = Pager::create(&path).unwrap();
921 p.stage_page(2, make_page(0x77));
922 let first = p
923 .commit(DbHeader {
924 page_count: 3,
925 schema_root_page: 1,
926 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
927 freelist_head: 0,
928 })
929 .unwrap();
930 assert_eq!(first, 1);
931
932 // Stage the same byte again.
933 p.stage_page(2, make_page(0x77));
934 let second = p
935 .commit(DbHeader {
936 page_count: 3,
937 schema_root_page: 1,
938 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
939 freelist_head: 0,
940 })
941 .unwrap();
942 assert_eq!(second, 0, "no data frames should be re-appended");
943
944 cleanup(&path);
945 }
946
947 // -------------------------------------------------------------------
948 // Phase 4d — Checkpointer
949 // -------------------------------------------------------------------
950
951 #[test]
952 fn explicit_checkpoint_folds_wal_into_main_file_and_truncates_wal() {
953 use crate::sql::pager::wal::WAL_HEADER_SIZE;
954 let path = tmp_path("ckpt_explicit");
955 let mut p = Pager::create(&path).unwrap();
956
957 p.stage_page(2, make_page(0xA1));
958 p.stage_page(3, make_page(0xB2));
959 p.commit(DbHeader {
960 page_count: 4,
961 schema_root_page: 1,
962 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
963 freelist_head: 0,
964 })
965 .unwrap();
966
967 // Pre-checkpoint: WAL has frames, main file is still the initial size.
968 let wal = wal_path_for(&path);
969 assert!(std::fs::metadata(&wal).unwrap().len() > WAL_HEADER_SIZE as u64);
970
971 let written = p.checkpoint().unwrap();
972 assert_eq!(written, 2, "both data pages should flush to main file");
973
974 // WAL is now empty (just the header) with a rolled salt + bumped seq.
975 let wal_len = std::fs::metadata(&wal).unwrap().len();
976 assert_eq!(wal_len, WAL_HEADER_SIZE as u64);
977
978 // Main file is exactly page_count pages long.
979 let main_len = std::fs::metadata(&path).unwrap().len();
980 assert_eq!(main_len, 4 * PAGE_SIZE as u64);
981
982 // Drop + reopen: main file alone must carry the latest content.
983 // (The WAL is empty, so any surviving correctness is on the main file.)
984 drop(p);
985 let p2 = Pager::open(&path).unwrap();
986 assert_eq!(p2.header().page_count, 4);
987 assert_eq!(p2.read_page(2).unwrap()[0], 0xA1);
988 assert_eq!(p2.read_page(3).unwrap()[0], 0xB2);
989
990 cleanup(&path);
991 }
992
993 #[test]
994 fn checkpoint_is_idempotent() {
995 // Two back-to-back checkpoints: the second must be a no-op and
996 // must not error. (The first drains wal_cache; the second sees
997 // nothing to do.)
998 let path = tmp_path("ckpt_idempotent");
999 let mut p = Pager::create(&path).unwrap();
1000 p.stage_page(2, make_page(0x42));
1001 p.commit(DbHeader {
1002 page_count: 3,
1003 schema_root_page: 1,
1004 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1005 freelist_head: 0,
1006 })
1007 .unwrap();
1008
1009 let first = p.checkpoint().unwrap();
1010 assert_eq!(first, 1);
1011 let second = p.checkpoint().unwrap();
1012 assert_eq!(second, 0, "second checkpoint should be a no-op");
1013
1014 cleanup(&path);
1015 }
1016
1017 #[test]
1018 fn checkpoint_with_shrink_truncates_main_file() {
1019 // Grow to 5 pages, checkpoint; shrink to 3 pages, checkpoint.
1020 // After the second checkpoint the main file must physically
1021 // be 3 * PAGE_SIZE bytes — previous-tail pages are gone.
1022 let path = tmp_path("ckpt_shrink");
1023 let mut p = Pager::create(&path).unwrap();
1024 p.stage_page(2, make_page(1));
1025 p.stage_page(3, make_page(2));
1026 p.stage_page(4, make_page(3));
1027 p.commit(DbHeader {
1028 page_count: 5,
1029 schema_root_page: 1,
1030 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1031 freelist_head: 0,
1032 })
1033 .unwrap();
1034 p.checkpoint().unwrap();
1035 assert_eq!(
1036 std::fs::metadata(&path).unwrap().len(),
1037 5 * PAGE_SIZE as u64
1038 );
1039
1040 // Shrink.
1041 p.commit(DbHeader {
1042 page_count: 3,
1043 schema_root_page: 1,
1044 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1045 freelist_head: 0,
1046 })
1047 .unwrap();
1048 p.checkpoint().unwrap();
1049 assert_eq!(
1050 std::fs::metadata(&path).unwrap().len(),
1051 3 * PAGE_SIZE as u64,
1052 "main file should shrink to new page_count after checkpoint"
1053 );
1054 // Page 4 is gone both physically and logically.
1055 assert!(p.read_page(4).is_none());
1056
1057 cleanup(&path);
1058 }
1059
1060 #[test]
1061 fn auto_checkpoint_fires_past_frame_threshold() {
1062 // Do just enough commits to push the WAL past
1063 // AUTO_CHECKPOINT_THRESHOLD_FRAMES. After the crossing commit,
1064 // the WAL should be back to header-only (auto-checkpoint ran)
1065 // while the main file carries every committed byte.
1066 use crate::sql::pager::wal::WAL_HEADER_SIZE;
1067 let path = tmp_path("ckpt_auto");
1068 let mut p = Pager::create(&path).unwrap();
1069
1070 // Each commit appends: 1 dirty data frame + 1 commit frame for
1071 // page 0 = 2 frames. So ceil(THRESHOLD / 2) commits gets us past
1072 // the trigger.
1073 let commits_needed = AUTO_CHECKPOINT_THRESHOLD_FRAMES.div_ceil(2);
1074 for i in 0..commits_needed {
1075 p.stage_page(2, make_page((i & 0xff) as u8));
1076 p.commit(DbHeader {
1077 page_count: 3,
1078 schema_root_page: 1,
1079 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1080 freelist_head: 0,
1081 })
1082 .unwrap();
1083 }
1084
1085 // Auto-checkpoint must have fired at least once during that loop.
1086 let wal_len = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1087 assert_eq!(
1088 wal_len, WAL_HEADER_SIZE as u64,
1089 "auto-checkpoint should have truncated the WAL"
1090 );
1091
1092 // Last committed byte for page 2 is the latest (commits_needed - 1 & 0xff).
1093 let expected = ((commits_needed - 1) & 0xff) as u8;
1094 assert_eq!(p.read_page(2).unwrap()[0], expected);
1095
1096 cleanup(&path);
1097 }
1098
1099 // -------------------------------------------------------------------
1100 // Phase 4e — shared/exclusive lock modes
1101 // -------------------------------------------------------------------
1102
1103 #[test]
1104 fn two_read_only_openers_coexist() {
1105 // Phase 4e: multiple read-only openers take shared locks and
1106 // must not exclude each other.
1107 let path = tmp_path("ro_coexist");
1108 {
1109 let mut p = Pager::create(&path).unwrap();
1110 p.stage_page(2, make_page(0x55));
1111 p.commit(DbHeader {
1112 page_count: 3,
1113 schema_root_page: 1,
1114 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1115 freelist_head: 0,
1116 })
1117 .unwrap();
1118 }
1119
1120 let reader1 = Pager::open_read_only(&path).unwrap();
1121 let reader2 = Pager::open_read_only(&path).unwrap();
1122 // Both see the committed content.
1123 assert_eq!(reader1.read_page(2).unwrap()[0], 0x55);
1124 assert_eq!(reader2.read_page(2).unwrap()[0], 0x55);
1125 assert_eq!(reader1.access_mode(), AccessMode::ReadOnly);
1126
1127 cleanup(&path);
1128 }
1129
1130 #[test]
1131 fn read_write_blocks_read_only_and_vice_versa() {
1132 // A live exclusive lock blocks a shared-lock open, and a live
1133 // shared lock blocks an exclusive-lock open. Both error messages
1134 // mention that the database is in use.
1135 let path = tmp_path("rw_vs_ro");
1136 let _writer = Pager::create(&path).unwrap();
1137
1138 // Writer holds LOCK_EX — reader can't take LOCK_SH.
1139 let reader_attempt = Pager::open_read_only(&path);
1140 assert!(reader_attempt.is_err());
1141 let msg = format!("{}", reader_attempt.unwrap_err());
1142 assert!(
1143 msg.contains("locked for writing"),
1144 "read-only open while writer holds lock should mention writer; got: {msg}"
1145 );
1146
1147 drop(_writer);
1148
1149 // Now a reader comes in; a second read-write must be rejected.
1150 let _reader = Pager::open_read_only(&path).unwrap();
1151 let writer_attempt = Pager::open(&path);
1152 assert!(writer_attempt.is_err());
1153 let msg = format!("{}", writer_attempt.unwrap_err());
1154 assert!(
1155 msg.contains("in use"),
1156 "read-write open while reader holds lock should mention contention; got: {msg}"
1157 );
1158
1159 cleanup(&path);
1160 }
1161
1162 #[test]
1163 fn read_only_pager_rejects_mutations() {
1164 let path = tmp_path("ro_rejects");
1165 {
1166 // Seed with some content so an RO open has something to read.
1167 let mut p = Pager::create(&path).unwrap();
1168 p.stage_page(2, make_page(0x33));
1169 p.commit(DbHeader {
1170 page_count: 3,
1171 schema_root_page: 1,
1172 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1173 freelist_head: 0,
1174 })
1175 .unwrap();
1176 }
1177
1178 let mut ro = Pager::open_read_only(&path).unwrap();
1179 let commit_err = ro
1180 .commit(DbHeader {
1181 page_count: 3,
1182 schema_root_page: 1,
1183 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1184 freelist_head: 0,
1185 })
1186 .unwrap_err();
1187 assert!(
1188 format!("{commit_err}").contains("read-only"),
1189 "commit on RO pager should surface 'read-only'; got: {commit_err}"
1190 );
1191 let ckpt_err = ro.checkpoint().unwrap_err();
1192 assert!(
1193 format!("{ckpt_err}").contains("read-only"),
1194 "checkpoint on RO pager should surface 'read-only'; got: {ckpt_err}"
1195 );
1196
1197 // Reads still work.
1198 assert_eq!(ro.read_page(2).unwrap()[0], 0x33);
1199
1200 cleanup(&path);
1201 }
1202
1203 #[test]
1204 fn read_only_open_without_wal_sidecar_succeeds() {
1205 // A file-backed DB whose -wal sidecar was deleted (or a Phase-
1206 // 4a-vintage file predating Phase 4c) must still be openable
1207 // read-only. The Pager serves reads straight from on_disk with
1208 // an empty wal_cache.
1209 let path = tmp_path("ro_no_wal");
1210 {
1211 let mut p = Pager::create(&path).unwrap();
1212 p.stage_page(2, make_page(0x44));
1213 p.commit(DbHeader {
1214 page_count: 3,
1215 schema_root_page: 1,
1216 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1217 freelist_head: 0,
1218 })
1219 .unwrap();
1220 // Force the WAL into the main file before we nuke it.
1221 p.checkpoint().unwrap();
1222 }
1223 // Nuke the sidecar.
1224 std::fs::remove_file(wal_path_for(&path)).unwrap();
1225
1226 let ro = Pager::open_read_only(&path).unwrap();
1227 assert_eq!(ro.read_page(2).unwrap()[0], 0x44);
1228 // No WAL materialized by a read-only open.
1229 assert!(!wal_path_for(&path).exists());
1230 cleanup(&path);
1231 }
1232
1233 #[test]
1234 fn reopen_after_crash_between_data_write_and_header_write_recovers_via_wal() {
1235 // Simulates a crash between step 2 (data-page fsync) and step 3
1236 // (header write) of `checkpoint`: the main file has new data
1237 // pages but still carries the old header, AND the WAL still
1238 // holds every committed frame. Next open must reconstruct the
1239 // post-commit view via the WAL (wal_cache[0] overrides the stale
1240 // main-file header).
1241 use std::io::{Seek, SeekFrom, Write};
1242
1243 let path = tmp_path("ckpt_crash_mid_flush");
1244 {
1245 let mut p = Pager::create(&path).unwrap();
1246 p.stage_page(2, make_page(0xEE));
1247 p.commit(DbHeader {
1248 page_count: 3,
1249 schema_root_page: 1,
1250 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1251 freelist_head: 0,
1252 })
1253 .unwrap();
1254 // Manually write the committed page 2 into the main file at
1255 // offset 2*PAGE_SIZE to simulate the first half of a
1256 // checkpoint that only got as far as step 2. The header
1257 // stays at the pre-commit state (page_count=2 from create).
1258 // Drop the pager first so its exclusive lock releases.
1259 }
1260 {
1261 let mut f = std::fs::OpenOptions::new().write(true).open(&path).unwrap();
1262 f.seek(SeekFrom::Start(2 * PAGE_SIZE as u64)).unwrap();
1263 f.write_all(&make_page(0xEE)).unwrap();
1264 f.sync_all().unwrap();
1265 // NB: we didn't extend the file past its original length in
1266 // the create-only state; the write_all grew it implicitly.
1267 // The header at offset 0 is still the original "page_count=2".
1268 }
1269
1270 // Reopen. Main-file header says 2 pages; WAL replay should
1271 // override that to 3, and wal_cache[2] should shadow whatever
1272 // the main file now holds for page 2 (which happens to be the
1273 // same byte here — the point is the Pager doesn't depend on
1274 // that coincidence).
1275 let p2 = Pager::open(&path).unwrap();
1276 assert_eq!(p2.header().page_count, 3);
1277 assert_eq!(p2.read_page(2).unwrap()[0], 0xEE);
1278 cleanup(&path);
1279 }
1280
1281 #[test]
1282 fn auto_checkpoint_crosses_threshold_mid_loop() {
1283 // Pins the exact-threshold semantics: `commit` must trigger a
1284 // checkpoint as soon as the WAL's frame count hits the threshold,
1285 // not later. Catches a regression where someone accidentally
1286 // lowers it to `>` or bumps it into a different accounting.
1287 let path = tmp_path("ckpt_threshold_crossing");
1288 let mut p = Pager::create(&path).unwrap();
1289 let commits_to_cross = AUTO_CHECKPOINT_THRESHOLD_FRAMES.div_ceil(2);
1290 for i in 0..commits_to_cross - 1 {
1291 p.stage_page(2, make_page((i & 0xff) as u8));
1292 p.commit(DbHeader {
1293 page_count: 3,
1294 schema_root_page: 1,
1295 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1296 freelist_head: 0,
1297 })
1298 .unwrap();
1299 }
1300 // One short of the threshold — WAL must not yet have been flushed.
1301 let pre = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1302 assert!(
1303 pre > crate::sql::pager::wal::WAL_HEADER_SIZE as u64,
1304 "WAL should still carry frames right before the crossing commit"
1305 );
1306
1307 // The crossing commit: this one's the trigger.
1308 p.stage_page(2, make_page(0xff));
1309 p.commit(DbHeader {
1310 page_count: 3,
1311 schema_root_page: 1,
1312 format_version: crate::sql::pager::header::FORMAT_VERSION_BASELINE,
1313 freelist_head: 0,
1314 })
1315 .unwrap();
1316 let post = std::fs::metadata(wal_path_for(&path)).unwrap().len();
1317 assert_eq!(
1318 post,
1319 crate::sql::pager::wal::WAL_HEADER_SIZE as u64,
1320 "WAL must be header-only right after the threshold-crossing commit"
1321 );
1322
1323 cleanup(&path);
1324 }
1325}