Skip to main content

obj_core/codec/
header.rs

1//! Per-document record header — encode / decode.
2//!
3//! See `docs/format.md` § Document records for the byte layout. The
4//! header is 16 bytes laid out as four little-endian `u32` fields,
5//! immediately followed by the `postcard` payload. The page-level
6//! CRC32C trailer (on the B+tree leaf containing this record) covers
7//! everything around the record; the header's own `payload_crc32c`
8//! covers only the payload bytes so that a forensic tool can verify
9//! a record in isolation.
10
11#![forbid(unsafe_code)]
12
13use crate::btree::{max_inline_value, max_key_len};
14use crate::error::{Error, Result};
15
16/// Size of the per-document header in bytes.
17pub const DOC_HEADER_SIZE: usize = 16;
18
19// Field offsets inside the header. Single-source-of-truth that
20// matches `docs/format.md` § Document records.
21const OFF_COLLECTION_ID: usize = 0;
22const OFF_TYPE_VERSION: usize = 4;
23const OFF_PAYLOAD_LEN: usize = 8;
24const OFF_PAYLOAD_CRC32C: usize = 12;
25
26/// Maximum on-disk record length that still fits inline in a B+tree
27/// leaf alongside at least one slot.
28///
29/// Equals the codec's slice of the leaf's `max_inline_value` budget
30/// at the worst-case key length (`max_key_len()`). Records exceeding
31/// this bound return [`Error::DocumentTooLarge`].
32///
33/// The value is computed at compile time from
34/// [`crate::btree::max_inline_value`] so the codec and the B+tree
35/// agree on the bound: any record the codec accepts will also fit
36/// in a leaf — there is no second runtime check at insert time.
37pub const MAX_INLINE_DOC: usize = max_inline_value(max_key_len());
38
39/// In-memory representation of the per-document record header.
40///
41/// Constructed by [`DocumentHeader::read_from`] (on decode) or by
42/// the codec on encode. All four fields are stored on disk as
43/// little-endian `u32`.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
45pub struct DocumentHeader {
46    /// The catalog-assigned id of the collection this record belongs
47    /// to. Decode rejects mismatches with
48    /// [`Error::CollectionIdMismatch`].
49    pub collection_id: u32,
50    /// `Document::VERSION` of the type that wrote this record.
51    pub type_version: u32,
52    /// Number of payload bytes that follow this header.
53    pub payload_len: u32,
54    /// CRC32C of the payload bytes only — the page-trailer CRC32C
55    /// on the containing leaf covers everything else.
56    pub payload_crc32c: u32,
57}
58
59impl DocumentHeader {
60    /// Write the header into `dst`. Appends exactly
61    /// [`DOC_HEADER_SIZE`] bytes.
62    ///
63    /// Used by [`crate::codec::encode`]; the format is the
64    /// canonical disk shape so the buffer can be handed straight to
65    /// `BTree::insert` without further wrapping.
66    pub fn write_to(&self, dst: &mut Vec<u8>) {
67        debug_assert_eq!(
68            OFF_PAYLOAD_CRC32C + 4,
69            DOC_HEADER_SIZE,
70            "header offsets must cover exactly DOC_HEADER_SIZE bytes"
71        );
72        // Reserve capacity for the header in one allocation; the
73        // caller's outer buffer typically already pre-allocated for
74        // the full record.
75        dst.reserve(DOC_HEADER_SIZE);
76        dst.extend_from_slice(&self.collection_id.to_le_bytes());
77        dst.extend_from_slice(&self.type_version.to_le_bytes());
78        dst.extend_from_slice(&self.payload_len.to_le_bytes());
79        dst.extend_from_slice(&self.payload_crc32c.to_le_bytes());
80    }
81
82    /// Decode a header from `bytes`. Validates only the layout
83    /// (length >= [`DOC_HEADER_SIZE`]); semantic checks (CRC,
84    /// collection-id, version range) are the caller's responsibility.
85    ///
86    /// # Errors
87    ///
88    /// Returns [`Error::Corruption`] with `page_id = 0` if `bytes`
89    /// is shorter than [`DOC_HEADER_SIZE`].
90    pub fn read_from(bytes: &[u8]) -> Result<Self> {
91        if bytes.len() < DOC_HEADER_SIZE {
92            return Err(Error::Corruption { page_id: 0 });
93        }
94        let collection_id = u32::from_le_bytes(read_array(bytes, OFF_COLLECTION_ID));
95        let type_version = u32::from_le_bytes(read_array(bytes, OFF_TYPE_VERSION));
96        let payload_len = u32::from_le_bytes(read_array(bytes, OFF_PAYLOAD_LEN));
97        let payload_crc32c = u32::from_le_bytes(read_array(bytes, OFF_PAYLOAD_CRC32C));
98        Ok(Self {
99            collection_id,
100            type_version,
101            payload_len,
102            payload_crc32c,
103        })
104    }
105}
106
107/// Read a fixed-size field out of the input slice. Mirrors the
108/// helper in `pager::header` so the codec does not need to import
109/// pager internals. `off + N <= bytes.len()` is the caller's
110/// invariant (every call-site reads a field whose offset is checked
111/// against `DOC_HEADER_SIZE` upstream).
112fn read_array<const N: usize>(bytes: &[u8], off: usize) -> [u8; N] {
113    debug_assert!(off + N <= bytes.len(), "header field out of bounds");
114    let mut out = [0u8; N];
115    out.copy_from_slice(&bytes[off..off + N]);
116    out
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn header_round_trip() {
125        let h = DocumentHeader {
126            collection_id: 0x1122_3344,
127            type_version: 5,
128            payload_len: 99,
129            payload_crc32c: 0xDEAD_BEEF,
130        };
131        let mut buf = Vec::new();
132        h.write_to(&mut buf);
133        assert_eq!(buf.len(), DOC_HEADER_SIZE);
134        let decoded = DocumentHeader::read_from(&buf).expect("decode");
135        assert_eq!(decoded, h);
136    }
137
138    #[test]
139    fn header_layout_little_endian() {
140        let h = DocumentHeader {
141            collection_id: 0x0403_0201,
142            type_version: 0x0807_0605,
143            payload_len: 0x0C0B_0A09,
144            payload_crc32c: 0x100F_0E0D,
145        };
146        let mut buf = Vec::new();
147        h.write_to(&mut buf);
148        assert_eq!(
149            &buf[..],
150            &[
151                0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
152                0x0F, 0x10,
153            ]
154        );
155    }
156
157    #[test]
158    fn header_short_input_errors() {
159        let err = DocumentHeader::read_from(&[0u8; DOC_HEADER_SIZE - 1])
160            .expect_err("short input rejected");
161        assert!(matches!(err, Error::Corruption { page_id: 0 }));
162    }
163
164    #[test]
165    fn max_inline_doc_is_positive() {
166        // const-time assertion so the bound is checked once at
167        // compile time rather than per test run.
168        const {
169            assert!(
170                MAX_INLINE_DOC > DOC_HEADER_SIZE,
171                "MAX_INLINE_DOC must leave room for at least one payload byte",
172            );
173        }
174    }
175}