Skip to main content

reddb_server/storage/engine/btree/
value_layout.rs

1//! Large-value layout for B-tree leaf cells (slice E of PRD #662).
2//!
3//! Wires the standalone slice modules together at the write/read boundary:
4//!
5//! - [`value_codec`] (slice A, #698) decides whether the bytes are worth
6//!   compressing.
7//! - [`OverflowChain`] (slice B, #699) owns the chain of dedicated overflow
8//!   pages.
9//! - `reddb-file` pins the persisted two-bit `(pointer, compressed)` cell
10//!   shape and pointer payload encoding.
11//!
12//! The function entry points are [`encode`] and [`decode`]. They persist
13//! opaque cell bytes whose on-disk shape is owned by `reddb-file`.
14//!
15//! Decision ladder applied by [`encode`]:
16//!
17//! 1. `value.len() > MAX_VALUE_SIZE` → [`ValueLayoutError::ValueTooLarge`]
18//!    (no allocation, no LZ4 work).
19//! 2. `value.len() <= OVERFLOW_THRESHOLD` → inline raw.
20//! 3. Else try LZ4. If the compressed bytes fit inline → inline compressed.
21//! 4. Else spill via [`OverflowChain`], storing only the pointer in the
22//!    leaf cell.
23//!
24//! Per ADR 0025 (overflow chain MVCC) the chain identity is anchored at
25//! the leaf cell, so this slice does not add any WAL records of its own
26//! — the overflow page writes go through the existing pager path.
27
28use crate::storage::engine::overflow::{OverflowChain, OverflowError};
29use crate::storage::engine::pager::Pager;
30use crate::storage::engine::vector_btree::value_codec;
31use reddb_file::BTreeValueCell;
32
33/// At or below this length a value stores inline in the leaf, raw or
34/// compressed. Above it, the value spills via [`OverflowChain`]. Set per
35/// ADR 0023 — preserves leaf fanout regardless of page size.
36pub const OVERFLOW_THRESHOLD: usize = reddb_file::BTREE_VALUE_OVERFLOW_THRESHOLD;
37
38/// Hard upper bound on logical value size. Values above this are
39/// rejected before any LZ4 or overflow work runs.
40/// 2^28 = 256 MiB per ADR 0023.
41pub const MAX_VALUE_SIZE: usize = reddb_file::BTREE_VALUE_MAX_SIZE;
42
43/// Total stored bytes for a pointer cell — flag byte + payload.
44pub const POINTER_CELL_LEN: usize = reddb_file::BTREE_VALUE_POINTER_CELL_LEN;
45
46/// Errors returned by [`encode`] and [`decode`].
47#[derive(Debug)]
48pub enum ValueLayoutError {
49    /// Logical value length exceeds [`MAX_VALUE_SIZE`].
50    ValueTooLarge(usize),
51    /// Stored cell flag byte sets a reserved bit.
52    UnknownFlag(u8),
53    /// Stored bytes ended before the expected pointer payload.
54    TruncatedPointer { got: usize },
55    /// LZ4 decode failed.
56    Codec(value_codec::ValueCodecError),
57    /// Overflow chain operation failed.
58    Overflow(OverflowError),
59}
60
61impl std::fmt::Display for ValueLayoutError {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        match self {
64            Self::ValueTooLarge(n) => {
65                write!(f, "value too large: {} bytes (max {})", n, MAX_VALUE_SIZE)
66            }
67            Self::UnknownFlag(b) => write!(f, "unknown leaf-cell flag byte: 0b{:08b}", b),
68            Self::TruncatedPointer { got } => {
69                write!(
70                    f,
71                    "pointer cell truncated: need {} bytes after flag, got {}",
72                    POINTER_CELL_LEN - 1,
73                    got
74                )
75            }
76            Self::Codec(e) => write!(f, "value codec: {}", e),
77            Self::Overflow(e) => write!(f, "overflow chain: {}", e),
78        }
79    }
80}
81
82impl std::error::Error for ValueLayoutError {}
83
84impl From<value_codec::ValueCodecError> for ValueLayoutError {
85    fn from(e: value_codec::ValueCodecError) -> Self {
86        Self::Codec(e)
87    }
88}
89
90impl From<reddb_file::BTreeValueCellError> for ValueLayoutError {
91    fn from(e: reddb_file::BTreeValueCellError) -> Self {
92        match e {
93            reddb_file::BTreeValueCellError::UnknownFlag(flag) => Self::UnknownFlag(flag),
94            reddb_file::BTreeValueCellError::TruncatedPointer { got } => {
95                Self::TruncatedPointer { got }
96            }
97        }
98    }
99}
100
101impl From<OverflowError> for ValueLayoutError {
102    fn from(e: OverflowError) -> Self {
103        Self::Overflow(e)
104    }
105}
106
107/// Apply the decision ladder and return the bytes the B-tree should
108/// persist in the leaf cell value slot. May allocate overflow pages
109/// through `pager`.
110pub fn encode(pager: &Pager, value: &[u8]) -> Result<Vec<u8>, ValueLayoutError> {
111    if value.len() > MAX_VALUE_SIZE {
112        return Err(ValueLayoutError::ValueTooLarge(value.len()));
113    }
114
115    // Step 1: small values short-circuit to inline raw. No LZ4 work, no
116    // overflow allocation — preserves the legacy hot path for tiny
117    // values like entity IDs or short JSON.
118    if value.len() <= OVERFLOW_THRESHOLD {
119        return Ok(reddb_file::encode_btree_inline_raw(value));
120    }
121
122    // Step 2: above the threshold, try LZ4. The codec returns Raw when
123    // compression would not shrink the input.
124    let (codec_flag, codec_bytes) = value_codec::encode(value);
125
126    // Step 3: if compressed bytes (including their length header) fit
127    // inline, keep them in the leaf. Note this only fires when codec
128    // actually produced Lz4 bytes — Raw bytes ≥ original length > threshold
129    // by construction, so they cannot inline.
130    if codec_flag == value_codec::ValueFlag::Lz4 && codec_bytes.len() <= OVERFLOW_THRESHOLD {
131        return Ok(reddb_file::encode_btree_inline_compressed(&codec_bytes));
132    }
133
134    // Step 4: spill. We always store the bytes the codec produced —
135    // either Lz4 (with its 4-byte length header) so the reader can
136    // round-trip via the same codec, or Raw bytes when compression
137    // failed to shrink.
138    let is_compressed = codec_flag == value_codec::ValueFlag::Lz4;
139    let chain = OverflowChain::new(pager);
140    let (head, total_len) = chain.store(&codec_bytes)?;
141
142    Ok(reddb_file::encode_btree_pointer(
143        head,
144        total_len,
145        is_compressed,
146    ))
147}
148
149/// Inspect leaf-cell flags, follow the pointer if any, concatenate the
150/// chain, decode if compressed, return the materialised value.
151pub fn decode(pager: &Pager, stored: &[u8]) -> Result<Vec<u8>, ValueLayoutError> {
152    match reddb_file::decode_btree_value_cell(stored)? {
153        BTreeValueCell::Pointer {
154            is_compressed,
155            head_page_id,
156            total_len,
157        } => {
158            let chain = OverflowChain::new(pager);
159            let chain_bytes = chain.read(head_page_id, total_len)?;
160            if is_compressed {
161                Ok(value_codec::decode(
162                    value_codec::ValueFlag::Lz4,
163                    &chain_bytes,
164                )?)
165            } else {
166                Ok(chain_bytes)
167            }
168        }
169        BTreeValueCell::Inline {
170            is_compressed,
171            payload,
172        } => Ok(reddb_file::decode_btree_inline_payload(
173            is_compressed,
174            payload,
175        )?),
176    }
177}
178
179/// Return the head overflow page id when `stored` represents a pointer
180/// cell, else `None`. Returns `None` for inline (raw or compressed)
181/// cells and for empty/malformed cells. Callers use this from the
182/// B-tree delete and shrinking-update paths (slice F of PRD #662) to
183/// free the chain before the leaf cell goes away.
184pub fn pointer_head(stored: &[u8]) -> Option<u32> {
185    reddb_file::btree_value_pointer_head(stored)
186}
187
188/// `true` when [`encode`] would emit a spill pointer for a value of
189/// this length (without actually spilling). Used by the leaf-fit check
190/// in `bulk_insert_sorted` to size cells before allocation.
191#[inline]
192#[allow(dead_code)]
193pub fn projected_cell_len(input: &[u8]) -> usize {
194    let codec_len = value_codec::would_encode_to(input);
195    reddb_file::btree_projected_cell_len(input.len(), codec_len)
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use crate::storage::engine::pager::Pager;
202    use std::path::PathBuf;
203    use std::sync::atomic::{AtomicU64, Ordering};
204
205    fn temp_db_path() -> PathBuf {
206        static COUNTER: AtomicU64 = AtomicU64::new(0);
207        let id = COUNTER.fetch_add(1, Ordering::Relaxed);
208        let mut path = std::env::temp_dir();
209        path.push(format!(
210            "reddb_value_layout_test_{}_{}.db",
211            std::process::id(),
212            id
213        ));
214        path
215    }
216
217    fn cleanup(path: &std::path::Path) {
218        let _ = std::fs::remove_file(path);
219        for sidecar in reddb_file::layout::pager_shadow_sidecar_paths(path) {
220            let _ = std::fs::remove_file(sidecar);
221        }
222    }
223
224    fn fresh_pager() -> (Pager, PathBuf) {
225        let path = temp_db_path();
226        cleanup(&path);
227        let pager = Pager::open_default(&path).unwrap();
228        (pager, path)
229    }
230
231    #[test]
232    fn inline_raw_round_trip_below_threshold() {
233        let (pager, path) = fresh_pager();
234        let value = vec![0xABu8; OVERFLOW_THRESHOLD - 1];
235        let stored = encode(&pager, &value).unwrap();
236        assert_eq!(
237            reddb_file::decode_btree_value_cell(&stored).unwrap(),
238            reddb_file::BTreeValueCell::Inline {
239                is_compressed: false,
240                payload: value.as_slice(),
241            }
242        );
243        assert_eq!(stored.len(), 1 + value.len());
244        let decoded = decode(&pager, &stored).unwrap();
245        assert_eq!(decoded, value);
246        cleanup(&path);
247    }
248
249    #[test]
250    fn inline_raw_at_exact_threshold() {
251        let (pager, path) = fresh_pager();
252        let value = vec![0x7Eu8; OVERFLOW_THRESHOLD];
253        let stored = encode(&pager, &value).unwrap();
254        assert!(matches!(
255            reddb_file::decode_btree_value_cell(&stored).unwrap(),
256            reddb_file::BTreeValueCell::Inline {
257                is_compressed: false,
258                ..
259            }
260        ));
261        assert_eq!(decode(&pager, &stored).unwrap(), value);
262        cleanup(&path);
263    }
264
265    #[test]
266    fn compressible_above_threshold_inlines_compressed() {
267        let (pager, path) = fresh_pager();
268        // ~44 KB of repeating text — compresses into a few hundred bytes
269        // which fit inline.
270        let value = "the quick brown fox jumps over the lazy dog\n"
271            .repeat(1024)
272            .into_bytes();
273        assert!(value.len() > OVERFLOW_THRESHOLD);
274        let stored = encode(&pager, &value).unwrap();
275        assert!(matches!(
276            reddb_file::decode_btree_value_cell(&stored).unwrap(),
277            reddb_file::BTreeValueCell::Inline {
278                is_compressed: true,
279                ..
280            }
281        ));
282        assert!(
283            stored.len() <= 1 + OVERFLOW_THRESHOLD,
284            "compressed cell must fit inline budget"
285        );
286        let decoded = decode(&pager, &stored).unwrap();
287        assert_eq!(decoded, value);
288        cleanup(&path);
289    }
290
291    #[test]
292    fn incompressible_above_threshold_spills_raw() {
293        let (pager, path) = fresh_pager();
294        // Pseudo-random bytes — incompressible.
295        let mut state: u64 = 0x1234_5678_9ABC_DEF0;
296        let value: Vec<u8> = (0..5 * 1024 * 1024)
297            .map(|_| {
298                state ^= state << 13;
299                state ^= state >> 7;
300                state ^= state << 17;
301                state as u8
302            })
303            .collect();
304        let stored = encode(&pager, &value).unwrap();
305        assert!(matches!(
306            reddb_file::decode_btree_value_cell(&stored).unwrap(),
307            reddb_file::BTreeValueCell::Pointer {
308                is_compressed: false,
309                ..
310            }
311        ));
312        assert_eq!(stored.len(), POINTER_CELL_LEN);
313        let decoded = decode(&pager, &stored).unwrap();
314        assert_eq!(decoded.len(), value.len());
315        assert_eq!(decoded, value);
316        cleanup(&path);
317    }
318
319    #[test]
320    fn value_above_max_rejected_without_allocation() {
321        let (pager, path) = fresh_pager();
322        let before = pager.page_count().unwrap();
323        let value = vec![0u8; MAX_VALUE_SIZE + 1];
324        let err = encode(&pager, &value).unwrap_err();
325        assert!(matches!(err, ValueLayoutError::ValueTooLarge(_)));
326        let after = pager.page_count().unwrap();
327        assert_eq!(before, after, "rejected value must not allocate pages");
328        cleanup(&path);
329    }
330
331    #[test]
332    fn pointer_head_extracts_head_id_only_for_pointer_cells() {
333        let inline = reddb_file::encode_btree_inline_raw(&[1, 2, 3]);
334        assert_eq!(pointer_head(&inline), None);
335        let inline_compressed = reddb_file::encode_btree_inline_compressed(&[0, 0, 0, 5]);
336        assert_eq!(pointer_head(&inline_compressed), None);
337
338        let cell = reddb_file::encode_btree_pointer(0x0102_0304, 0, false);
339        assert_eq!(pointer_head(&cell), Some(0x0102_0304));
340        let compressed_cell = reddb_file::encode_btree_pointer(0x0102_0304, 0, true);
341        assert_eq!(pointer_head(&compressed_cell), Some(0x0102_0304));
342    }
343
344    #[test]
345    fn empty_value_round_trips() {
346        let (pager, path) = fresh_pager();
347        let stored = encode(&pager, &[]).unwrap();
348        assert_eq!(stored, vec![0u8]);
349        assert!(decode(&pager, &stored).unwrap().is_empty());
350        cleanup(&path);
351    }
352}