obj_core/codec/header.rs
1//! Per-document record header — encode / decode.
2//!
3//! See `docs/format.md` § Document records for the byte layout. The
4//! header is 16 bytes laid out as four little-endian `u32` fields,
5//! immediately followed by the `postcard` payload. The page-level
6//! CRC32C trailer (on the B+tree leaf containing this record) covers
7//! everything around the record; the header's own `payload_crc32c`
8//! covers only the payload bytes so that a forensic tool can verify
9//! a record in isolation.
10
11#![forbid(unsafe_code)]
12
13use crate::btree::{max_inline_value, max_key_len};
14use crate::error::{Error, Result};
15
16/// Size of the per-document header in bytes.
17pub const DOC_HEADER_SIZE: usize = 16;
18
19// Field offsets inside the header. Single-source-of-truth that
20// matches `docs/format.md` § Document records.
21const OFF_COLLECTION_ID: usize = 0;
22const OFF_TYPE_VERSION: usize = 4;
23const OFF_PAYLOAD_LEN: usize = 8;
24const OFF_PAYLOAD_CRC32C: usize = 12;
25
26/// Maximum on-disk record length that still fits inline in a B+tree
27/// leaf alongside at least one slot.
28///
29/// Equals the codec's slice of the leaf's `max_inline_value` budget
30/// at the worst-case key length (`max_key_len()`). Records exceeding
31/// this bound return [`Error::DocumentTooLarge`].
32///
33/// The value is computed at compile time from
34/// [`crate::btree::max_inline_value`] so the codec and the B+tree
35/// agree on the bound: any record the codec accepts will also fit
36/// in a leaf — there is no second runtime check at insert time.
37pub const MAX_INLINE_DOC: usize = max_inline_value(max_key_len());
38
39/// In-memory representation of the per-document record header.
40///
41/// Constructed by [`DocumentHeader::read_from`] (on decode) or by
42/// the codec on encode. All four fields are stored on disk as
43/// little-endian `u32`.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
45pub struct DocumentHeader {
46 /// The catalog-assigned id of the collection this record belongs
47 /// to. Decode rejects mismatches with
48 /// [`Error::CollectionIdMismatch`].
49 pub collection_id: u32,
50 /// `Document::VERSION` of the type that wrote this record.
51 pub type_version: u32,
52 /// Number of payload bytes that follow this header.
53 pub payload_len: u32,
54 /// CRC32C of the payload bytes only — the page-trailer CRC32C
55 /// on the containing leaf covers everything else.
56 pub payload_crc32c: u32,
57}
58
59impl DocumentHeader {
60 /// Write the header into `dst`. Appends exactly
61 /// [`DOC_HEADER_SIZE`] bytes.
62 ///
63 /// Used by [`crate::codec::encode`]; the format is the
64 /// canonical disk shape so the buffer can be handed straight to
65 /// `BTree::insert` without further wrapping.
66 pub fn write_to(&self, dst: &mut Vec<u8>) {
67 debug_assert_eq!(
68 OFF_PAYLOAD_CRC32C + 4,
69 DOC_HEADER_SIZE,
70 "header offsets must cover exactly DOC_HEADER_SIZE bytes"
71 );
72 // Reserve capacity for the header in one allocation; the
73 // caller's outer buffer typically already pre-allocated for
74 // the full record.
75 dst.reserve(DOC_HEADER_SIZE);
76 dst.extend_from_slice(&self.collection_id.to_le_bytes());
77 dst.extend_from_slice(&self.type_version.to_le_bytes());
78 dst.extend_from_slice(&self.payload_len.to_le_bytes());
79 dst.extend_from_slice(&self.payload_crc32c.to_le_bytes());
80 }
81
82 /// Decode a header from `bytes`. Validates only the layout
83 /// (length >= [`DOC_HEADER_SIZE`]); semantic checks (CRC,
84 /// collection-id, version range) are the caller's responsibility.
85 ///
86 /// # Errors
87 ///
88 /// Returns [`Error::Corruption`] with `page_id = 0` if `bytes`
89 /// is shorter than [`DOC_HEADER_SIZE`].
90 pub fn read_from(bytes: &[u8]) -> Result<Self> {
91 if bytes.len() < DOC_HEADER_SIZE {
92 return Err(Error::Corruption { page_id: 0 });
93 }
94 let collection_id = u32::from_le_bytes(read_array(bytes, OFF_COLLECTION_ID));
95 let type_version = u32::from_le_bytes(read_array(bytes, OFF_TYPE_VERSION));
96 let payload_len = u32::from_le_bytes(read_array(bytes, OFF_PAYLOAD_LEN));
97 let payload_crc32c = u32::from_le_bytes(read_array(bytes, OFF_PAYLOAD_CRC32C));
98 Ok(Self {
99 collection_id,
100 type_version,
101 payload_len,
102 payload_crc32c,
103 })
104 }
105}
106
107/// Read a fixed-size field out of the input slice. Mirrors the
108/// helper in `pager::header` so the codec does not need to import
109/// pager internals. `off + N <= bytes.len()` is the caller's
110/// invariant (every call-site reads a field whose offset is checked
111/// against `DOC_HEADER_SIZE` upstream).
112fn read_array<const N: usize>(bytes: &[u8], off: usize) -> [u8; N] {
113 debug_assert!(off + N <= bytes.len(), "header field out of bounds");
114 let mut out = [0u8; N];
115 out.copy_from_slice(&bytes[off..off + N]);
116 out
117}
118
119#[cfg(test)]
120mod tests {
121 use super::*;
122
123 #[test]
124 fn header_round_trip() {
125 let h = DocumentHeader {
126 collection_id: 0x1122_3344,
127 type_version: 5,
128 payload_len: 99,
129 payload_crc32c: 0xDEAD_BEEF,
130 };
131 let mut buf = Vec::new();
132 h.write_to(&mut buf);
133 assert_eq!(buf.len(), DOC_HEADER_SIZE);
134 let decoded = DocumentHeader::read_from(&buf).expect("decode");
135 assert_eq!(decoded, h);
136 }
137
138 #[test]
139 fn header_layout_little_endian() {
140 let h = DocumentHeader {
141 collection_id: 0x0403_0201,
142 type_version: 0x0807_0605,
143 payload_len: 0x0C0B_0A09,
144 payload_crc32c: 0x100F_0E0D,
145 };
146 let mut buf = Vec::new();
147 h.write_to(&mut buf);
148 assert_eq!(
149 &buf[..],
150 &[
151 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
152 0x0F, 0x10,
153 ]
154 );
155 }
156
157 #[test]
158 fn header_short_input_errors() {
159 let err = DocumentHeader::read_from(&[0u8; DOC_HEADER_SIZE - 1])
160 .expect_err("short input rejected");
161 assert!(matches!(err, Error::Corruption { page_id: 0 }));
162 }
163
164 #[test]
165 fn max_inline_doc_is_positive() {
166 // const-time assertion so the bound is checked once at
167 // compile time rather than per test run.
168 const {
169 assert!(
170 MAX_INLINE_DOC > DOC_HEADER_SIZE,
171 "MAX_INLINE_DOC must leave room for at least one payload byte",
172 );
173 }
174 }
175}