obj_core/wal/frame.rs
1//! WAL frame encode / decode helpers.
2//!
3//! Byte layout is documented in `docs/format.md` § Write-ahead log.
4//! This module is the single place that knows it; the parent
5//! [`super::Wal`] only calls into the helpers exposed here.
6
7#![forbid(unsafe_code)]
8
9use crate::pager::checksum::crc32c_append;
10use crate::pager::page::PAGE_SIZE;
11
12/// File magic for the WAL header. ASCII `OBJW`.
13pub const WAL_MAGIC: [u8; 4] = *b"OBJW";
14
15/// Fixed size of the WAL file header. See `docs/format.md` § Write-
16/// ahead log → WAL header.
17pub const WAL_HEADER_SIZE: usize = 64;
18
19/// Fixed size of a per-frame header (preceding the page body).
20pub const FRAME_HEADER_SIZE: usize = 64;
21
22/// Per-frame size on disk for a **plaintext** WAL: header + page body.
23/// Equal to 4160 bytes.
24pub const FRAME_SIZE: usize = FRAME_HEADER_SIZE + PAGE_SIZE;
25
26/// Phase 4 (issue #9): per-frame AEAD suffix length for encrypted
27/// WALs (24-byte XChaCha20-Poly1305 nonce + 16-byte Poly1305 tag = 40
28/// bytes). Frames in encrypted WALs are
29/// `FRAME_SIZE + FRAME_AEAD_SUFFIX_SIZE = 4200` bytes on disk; the
30/// extra bytes sit AFTER the ciphertext body.
31pub const FRAME_AEAD_SUFFIX_SIZE: usize = 24 + 16;
32
33/// Phase 4 (issue #9): per-frame size on disk for an **encrypted**
34/// WAL.
35pub const FRAME_SIZE_ENCRYPTED: usize = FRAME_SIZE + FRAME_AEAD_SUFFIX_SIZE;
36
37/// Phase 4 (issue #9): pick the right per-frame size given the
38/// `encrypted` flag.
39#[must_use]
40pub const fn frame_size_for(encrypted: bool) -> usize {
41 if encrypted {
42 FRAME_SIZE_ENCRYPTED
43 } else {
44 FRAME_SIZE
45 }
46}
47
48// --- Field offsets within a frame. See docs/format.md. ----------------
49const OFF_PAGE_ID: usize = 0;
50const OFF_LSN: usize = 8;
51const OFF_SALT: usize = 16;
52const OFF_FLAGS: usize = 20;
53const OFF_CRC: usize = 60;
54
55const FLAG_COMMIT: u8 = 0x01;
56
57/// In-memory representation of a WAL frame header.
58#[derive(Debug, Clone, Copy)]
59pub struct FrameHeader {
60 /// Page-id whose payload this frame replaces.
61 pub page_id: u64,
62 /// Monotonic per-WAL-generation log sequence number.
63 pub lsn: u64,
64 /// WAL generation salt; must match the WAL header.
65 pub salt: u32,
66 /// `true` iff this frame is the last in its transaction (commit
67 /// marker).
68 pub commit: bool,
69}
70
71/// Encode `header` into the first [`FRAME_HEADER_SIZE`] bytes of
72/// `buf` AND compute the per-frame CRC32C covering the frame header
73/// (with the CRC field zeroed) plus the page body that follows.
74///
75/// `buf.len()` must equal [`FRAME_SIZE`]; the page body must already
76/// have been written into `buf[FRAME_HEADER_SIZE..]`.
77///
78/// **Phase 4 (issue #9):** on encrypted WALs the CRC is computed
79/// over (`header_sans_crc` + PLAINTEXT body); the caller MUST pass
80/// a buffer whose `[FRAME_HEADER_SIZE..FRAME_SIZE]` slice carries
81/// the plaintext body before invoking this helper. Encryption of
82/// the body happens AFTER `encode_frame_header` returns (see
83/// `Wal::write_frame`).
84pub fn encode_frame_header(header: &FrameHeader, buf: &mut [u8]) {
85 debug_assert_eq!(buf.len(), FRAME_SIZE, "frame buffer must be FRAME_SIZE");
86 // Zero the header region first (reserved bytes stay zero).
87 for b in buf.iter_mut().take(FRAME_HEADER_SIZE) {
88 *b = 0;
89 }
90 buf[OFF_PAGE_ID..OFF_PAGE_ID + 8].copy_from_slice(&header.page_id.to_le_bytes());
91 buf[OFF_LSN..OFF_LSN + 8].copy_from_slice(&header.lsn.to_le_bytes());
92 buf[OFF_SALT..OFF_SALT + 4].copy_from_slice(&header.salt.to_le_bytes());
93 buf[OFF_FLAGS] = if header.commit { FLAG_COMMIT } else { 0 };
94 // CRC covers bytes [0, OFF_CRC) and bytes [FRAME_HEADER_SIZE,
95 // FRAME_SIZE). The CRC field itself ([OFF_CRC, OFF_CRC+4)) is
96 // excluded; reserved bytes after the flag are zero.
97 let crc = compute_frame_crc(buf);
98 buf[OFF_CRC..OFF_CRC + 4].copy_from_slice(&crc.to_le_bytes());
99}
100
101/// Decode and validate a frame from `buf`. Returns `None` if the
102/// CRC does not validate, if the salt does not match
103/// `expected_salt`, or if reserved bytes are non-zero.
104///
105/// Caller-side: `None` means "tail" — recovery stops here.
106#[must_use]
107pub fn decode_frame_header(buf: &[u8], expected_salt: u32) -> Option<FrameHeader> {
108 match decode_frame_header_classified(buf, expected_salt) {
109 FrameDecode::Ok(header) => Some(header),
110 FrameDecode::SaltMismatch | FrameDecode::CrcInvalid | FrameDecode::Malformed => None,
111 }
112}
113
114/// Classified outcome of decoding a single WAL frame. Pass-2 of recovery
115/// distinguishes "torn tail / stale generation" (silently discarded)
116/// from "CRC mismatch in a frame that should have been valid" (which
117/// surfaces as `Error::WalCorruption`). See `docs/format.md`
118/// § Recovery semantics.
119#[derive(Debug)]
120pub enum FrameDecode {
121 /// Salt matches and CRC validates — a usable frame.
122 Ok(FrameHeader),
123 /// The frame's salt does not match `expected_salt`. The frame may
124 /// be torn-tail bytes from a previous generation or an in-progress
125 /// torn write; in either case it is **not** corruption.
126 SaltMismatch,
127 /// Salt matches but the CRC32C does not validate. In pass 2 (frames
128 /// before the last commit marker) this is `Error::WalCorruption`;
129 /// in pass 1 / past the last commit it is torn tail.
130 CrcInvalid,
131 /// Reserved-flag bits or buffer-length problem. Treated as torn
132 /// tail by recovery (forward-compat boundary).
133 Malformed,
134}
135
136/// Classify a single frame buffer. The buffer length must equal
137/// [`FRAME_SIZE`]; otherwise [`FrameDecode::Malformed`] is returned.
138#[must_use]
139pub fn decode_frame_header_classified(buf: &[u8], expected_salt: u32) -> FrameDecode {
140 if buf.len() != FRAME_SIZE {
141 return FrameDecode::Malformed;
142 }
143 let salt = u32::from_le_bytes([
144 buf[OFF_SALT],
145 buf[OFF_SALT + 1],
146 buf[OFF_SALT + 2],
147 buf[OFF_SALT + 3],
148 ]);
149 if salt != expected_salt {
150 return FrameDecode::SaltMismatch;
151 }
152 let stored_crc = u32::from_le_bytes([
153 buf[OFF_CRC],
154 buf[OFF_CRC + 1],
155 buf[OFF_CRC + 2],
156 buf[OFF_CRC + 3],
157 ]);
158 let computed = compute_frame_crc(buf);
159 if stored_crc != computed {
160 return FrameDecode::CrcInvalid;
161 }
162 let flags = buf[OFF_FLAGS];
163 if flags & !FLAG_COMMIT != 0 {
164 // Unknown flag bits — treat as tail (forward-compat boundary).
165 return FrameDecode::Malformed;
166 }
167 let page_id = u64::from_le_bytes([
168 buf[OFF_PAGE_ID],
169 buf[OFF_PAGE_ID + 1],
170 buf[OFF_PAGE_ID + 2],
171 buf[OFF_PAGE_ID + 3],
172 buf[OFF_PAGE_ID + 4],
173 buf[OFF_PAGE_ID + 5],
174 buf[OFF_PAGE_ID + 6],
175 buf[OFF_PAGE_ID + 7],
176 ]);
177 let lsn = u64::from_le_bytes([
178 buf[OFF_LSN],
179 buf[OFF_LSN + 1],
180 buf[OFF_LSN + 2],
181 buf[OFF_LSN + 3],
182 buf[OFF_LSN + 4],
183 buf[OFF_LSN + 5],
184 buf[OFF_LSN + 6],
185 buf[OFF_LSN + 7],
186 ]);
187 FrameDecode::Ok(FrameHeader {
188 page_id,
189 lsn,
190 salt,
191 commit: flags & FLAG_COMMIT != 0,
192 })
193}
194
195/// Byte offset of the frame at index `frame_index` (0-based, where
196/// 0 is the first frame after the WAL header).
197///
198/// Power-of-ten Rule 7: `frame_index` is caller-controlled (this is
199/// a `pub` helper used by tests and by external WAL inspection
200/// utilities). Saturate at `u64::MAX` on overflow rather than
201/// panicking — every production caller passes an index bounded by
202/// `committed_frames`, but a fuzz / forensic caller could pass an
203/// arbitrarily-large index and `overflow-checks = true` (Rule 10)
204/// would otherwise turn the multiply into a panic.
205///
206/// Phase 4 (issue #9): `frame_size` is the on-disk per-frame stride
207/// (4160 for unencrypted WALs, 4188 for encrypted ones). Use
208/// [`frame_size_for`] to pick the right value.
209#[must_use]
210pub fn frame_offset(frame_index: u64, frame_size: usize) -> u64 {
211 frame_index
212 .checked_mul(frame_size as u64)
213 .and_then(|product| product.checked_add(WAL_HEADER_SIZE as u64))
214 .unwrap_or(u64::MAX)
215}
216
217/// Compute the CRC32C over (frame header with its CRC field zeroed)
218/// ++ (page body), folding the three contiguous segments directly
219/// into the running CRC via [`crc32c_append`] — no scratch buffer,
220/// no memcpy.
221///
222/// The segments, in order, are exactly the bytes the previous
223/// memcpy-based implementation laid out in its linear scratch:
224/// 1. `buf[0..OFF_CRC]` — header bytes before the CRC field
225/// 2. four zero bytes — the zeroed CRC field (`[OFF_CRC, 64)`)
226/// 3. `buf[OFF_CRC + 4..]` — the full ~4096-byte page body
227///
228/// Because `OFF_CRC + 4 == FRAME_HEADER_SIZE` (the CRC field is the
229/// final 4 bytes of the 64-byte header), segment 3 is precisely the
230/// page body that followed the header in the old linear scratch —
231/// there are no reserved header bytes between the CRC field and the
232/// body. The `debug_assert!` below pins that invariant.
233///
234/// `crc32c` is `crc32c_append(0, ..)` and CRC32C is computed strictly
235/// left-to-right over the byte stream, so folding the segments
236/// incrementally is byte-identical to one `crc32c` call over their
237/// concatenation. The output therefore matches the prior
238/// scratch-buffer implementation bit-for-bit, which is mandatory:
239/// this function is shared by writer encode AND reader/recovery
240/// decode, and any divergence would silently corrupt the on-disk
241/// format.
242///
243/// (`crc32c 0.6.8` DOES expose `crc32c_append`; the earlier comment
244/// claiming otherwise was stale.)
245fn compute_frame_crc(buf: &[u8]) -> u32 {
246 debug_assert_eq!(buf.len(), FRAME_SIZE);
247 debug_assert_eq!(OFF_CRC + 4, FRAME_HEADER_SIZE, "CRC field ends the header");
248 // Segment 1: header bytes preceding the CRC field.
249 let crc = crc32c_append(0, &buf[..OFF_CRC]);
250 // Segment 2: the CRC field itself, folded as four zero bytes so
251 // the result is independent of any stale CRC bytes in `buf`.
252 let crc = crc32c_append(crc, &[0u8; 4]);
253 // Segment 3: the full page body (the bytes after the header).
254 crc32c_append(crc, &buf[OFF_CRC + 4..])
255}
256
257#[cfg(test)]
258mod tests {
259 use super::{
260 compute_frame_crc, decode_frame_header, encode_frame_header, FrameHeader,
261 FRAME_HEADER_SIZE, FRAME_SIZE, OFF_CRC,
262 };
263 use crate::pager::checksum::crc32c;
264
265 /// Reference implementation: the original 3-step, single-`crc32c`
266 /// algorithm (build a contiguous `[u8; FRAME_SIZE]` scratch with
267 /// the full header, zero the CRC field, copy in the body, hash
268 /// once). The production `compute_frame_crc` MUST agree with this
269 /// for every input — it is the on-disk format contract shared by
270 /// the writer and by recovery.
271 fn compute_frame_crc_reference(buf: &[u8]) -> u32 {
272 assert_eq!(buf.len(), FRAME_SIZE);
273 let mut linear = [0u8; FRAME_SIZE];
274 linear[..FRAME_HEADER_SIZE].copy_from_slice(&buf[..FRAME_HEADER_SIZE]);
275 for b in &mut linear[OFF_CRC..OFF_CRC + 4] {
276 *b = 0;
277 }
278 linear[FRAME_HEADER_SIZE..].copy_from_slice(&buf[FRAME_HEADER_SIZE..]);
279 crc32c(&linear)
280 }
281
282 /// THE load-bearing test (#85): the `crc32c_append`-based
283 /// `compute_frame_crc` must be byte-identical to the original
284 /// memcpy reference for a spread of pseudo-random `(header, body)`
285 /// inputs, INCLUDING non-zero CRC-field bytes (which both
286 /// implementations must ignore by treating that region as zero).
287 #[test]
288 fn crc_byte_identical_to_memcpy_reference() {
289 // Deterministic xorshift PRNG — no external dep, reproducible.
290 let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
291 let mut next = || {
292 state ^= state << 13;
293 state ^= state >> 7;
294 state ^= state << 17;
295 state
296 };
297 for case in 0..256u32 {
298 let mut buf = [0u8; FRAME_SIZE];
299 for b in &mut buf {
300 *b = u8::try_from(next() & 0xFF).expect("masked to a byte");
301 }
302 let got = compute_frame_crc(&buf);
303 let want = compute_frame_crc_reference(&buf);
304 assert_eq!(
305 got, want,
306 "case {case}: crc32c_append result diverged from memcpy reference"
307 );
308 }
309 // Edge cases: all-zero and all-ones frames.
310 let zero = [0u8; FRAME_SIZE];
311 assert_eq!(compute_frame_crc(&zero), compute_frame_crc_reference(&zero));
312 let ones = [0xFFu8; FRAME_SIZE];
313 assert_eq!(compute_frame_crc(&ones), compute_frame_crc_reference(&ones));
314 }
315
316 /// The CRC must be independent of whatever bytes already sit in
317 /// the CRC field — both the new impl and the reference treat that
318 /// 4-byte region as zero.
319 #[test]
320 fn crc_ignores_stale_crc_field_bytes() {
321 let mut a = [0xABu8; FRAME_SIZE];
322 for (i, b) in a.iter_mut().enumerate() {
323 *b = u8::try_from(i & 0xFF).expect("masked");
324 }
325 let mut b = a;
326 a[OFF_CRC..OFF_CRC + 4].copy_from_slice(&0xDEAD_BEEFu32.to_le_bytes());
327 b[OFF_CRC..OFF_CRC + 4].copy_from_slice(&0u32.to_le_bytes());
328 assert_eq!(compute_frame_crc(&a), compute_frame_crc(&b));
329 assert_eq!(compute_frame_crc(&a), compute_frame_crc_reference(&a));
330 }
331
332 #[test]
333 fn round_trip_basic_frame() {
334 let header = FrameHeader {
335 page_id: 7,
336 lsn: 42,
337 salt: 0xDEAD_BEEF,
338 commit: true,
339 };
340 let mut buf = vec![0u8; FRAME_SIZE];
341 // Some payload pattern.
342 for (i, b) in buf.iter_mut().enumerate().skip(64).take(128) {
343 *b = u8::try_from(i & 0xFF).expect("masked");
344 }
345 encode_frame_header(&header, &mut buf);
346 let decoded = decode_frame_header(&buf, 0xDEAD_BEEF).expect("decode");
347 assert_eq!(decoded.page_id, 7);
348 assert_eq!(decoded.lsn, 42);
349 assert_eq!(decoded.salt, 0xDEAD_BEEF);
350 assert!(decoded.commit);
351 }
352
353 #[test]
354 fn salt_mismatch_yields_tail() {
355 let header = FrameHeader {
356 page_id: 1,
357 lsn: 1,
358 salt: 1,
359 commit: true,
360 };
361 let mut buf = vec![0u8; FRAME_SIZE];
362 encode_frame_header(&header, &mut buf);
363 assert!(decode_frame_header(&buf, 2).is_none());
364 }
365
366 #[test]
367 fn flipped_body_invalidates_crc() {
368 let header = FrameHeader {
369 page_id: 1,
370 lsn: 1,
371 salt: 1,
372 commit: false,
373 };
374 let mut buf = vec![0u8; FRAME_SIZE];
375 buf[64 + 50] = 0xAA;
376 encode_frame_header(&header, &mut buf);
377 assert!(decode_frame_header(&buf, 1).is_some());
378 buf[64 + 50] ^= 0x01;
379 assert!(decode_frame_header(&buf, 1).is_none());
380 }
381
382 #[test]
383 fn unknown_flag_bits_are_tail() {
384 let header = FrameHeader {
385 page_id: 1,
386 lsn: 1,
387 salt: 1,
388 commit: true,
389 };
390 let mut buf = vec![0u8; FRAME_SIZE];
391 encode_frame_header(&header, &mut buf);
392 // Forge an unknown flag bit.
393 buf[20] = 0x80;
394 // Need to recompute CRC for the forge to look honest.
395 let crc = super::compute_frame_crc(&buf);
396 buf[60..64].copy_from_slice(&crc.to_le_bytes());
397 assert!(decode_frame_header(&buf, 1).is_none());
398 }
399}