Skip to main content

reddb_file/
vector_value_codec.rs

1//! Large-value codec for vector/B-tree persisted payloads.
2//!
3//! This byte-slice codec knows nothing about pages, MVCC, or overflow chains.
4//! It stores either raw bytes or an LZ4 block prefixed with the original length.
5
6/// One-byte tag stored alongside the encoded payload.
7#[repr(u8)]
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ValueFlag {
10    /// Payload bytes are the original input, byte-identical.
11    Raw = 0,
12    /// Payload bytes are an LZ4 block with a little-endian `u32` raw length.
13    Lz4 = 1,
14}
15
16impl ValueFlag {
17    /// Convert from the on-disk tag byte. Unknown tags are rejected.
18    pub fn from_byte(b: u8) -> Result<Self, ValueCodecError> {
19        match b {
20            0 => Ok(ValueFlag::Raw),
21            1 => Ok(ValueFlag::Lz4),
22            other => Err(ValueCodecError::UnknownFlag(other)),
23        }
24    }
25}
26
27#[derive(Debug, PartialEq, Eq)]
28pub enum ValueCodecError {
29    UnknownFlag(u8),
30    TruncatedHeader,
31    Lz4Decode(String),
32}
33
34impl std::fmt::Display for ValueCodecError {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        match self {
37            ValueCodecError::UnknownFlag(b) => write!(f, "unknown value codec flag: {}", b),
38            ValueCodecError::TruncatedHeader => write!(
39                f,
40                "compressed payload truncated: need at least 4 bytes for length header"
41            ),
42            ValueCodecError::Lz4Decode(msg) => write!(f, "lz4 decode failed: {}", msg),
43        }
44    }
45}
46
47impl std::error::Error for ValueCodecError {}
48
49/// Encode `input` for storage. Returns the flag and stored bytes.
50///
51/// When LZ4 would not shrink the input including the 4-byte raw length header,
52/// the codec returns raw bytes. Equal size is treated as no win.
53pub fn encode(input: &[u8]) -> (ValueFlag, Vec<u8>) {
54    if input.is_empty() {
55        return (ValueFlag::Raw, Vec::new());
56    }
57
58    let compressed = lz4_flex::compress(input);
59    if compressed.len() + 4 < input.len() {
60        let mut out = Vec::with_capacity(compressed.len() + 4);
61        out.extend_from_slice(&(input.len() as u32).to_le_bytes());
62        out.extend_from_slice(&compressed);
63        (ValueFlag::Lz4, out)
64    } else {
65        (ValueFlag::Raw, input.to_vec())
66    }
67}
68
69/// Return the on-disk size [`encode`] would choose without returning the bytes.
70pub fn would_encode_to(input: &[u8]) -> usize {
71    if input.is_empty() {
72        return 0;
73    }
74    let compressed_len = lz4_flex::compress(input).len();
75    let lz4_total = compressed_len + 4;
76    if lz4_total < input.len() {
77        lz4_total
78    } else {
79        input.len()
80    }
81}
82
83/// Decode a `(flag, bytes)` pair produced by [`encode`].
84pub fn decode(flag: ValueFlag, bytes: &[u8]) -> Result<Vec<u8>, ValueCodecError> {
85    match flag {
86        ValueFlag::Raw => Ok(bytes.to_vec()),
87        ValueFlag::Lz4 => {
88            if bytes.len() < 4 {
89                return Err(ValueCodecError::TruncatedHeader);
90            }
91            let raw_len = u32::from_le_bytes(bytes[0..4].try_into().expect("len checked")) as usize;
92            lz4_flex::decompress(&bytes[4..], raw_len)
93                .map_err(|e| ValueCodecError::Lz4Decode(e.to_string()))
94        }
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    #[test]
103    fn round_trip_compressible_text() {
104        let input = "the quick brown fox jumps over the lazy dog "
105            .repeat(64)
106            .into_bytes();
107        let (flag, bytes) = encode(&input);
108        assert_eq!(flag, ValueFlag::Lz4, "highly repetitive text must compress");
109        assert!(
110            bytes.len() < input.len(),
111            "stored size {} must be less than input {}",
112            bytes.len(),
113            input.len()
114        );
115        let decoded = decode(flag, &bytes).expect("decode");
116        assert_eq!(decoded, input);
117    }
118
119    #[test]
120    fn round_trip_incompressible_random() {
121        let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
122        let input: Vec<u8> = (0..512)
123            .map(|_| {
124                state ^= state << 13;
125                state ^= state >> 7;
126                state ^= state << 17;
127                state as u8
128            })
129            .collect();
130        let (flag, bytes) = encode(&input);
131        assert_eq!(
132            flag,
133            ValueFlag::Raw,
134            "incompressible input must fall back to raw"
135        );
136        assert_eq!(bytes, input, "raw bytes must be byte-identical");
137        let decoded = decode(flag, &bytes).expect("decode");
138        assert_eq!(decoded, input);
139    }
140
141    #[test]
142    fn empty_input_round_trips_as_raw() {
143        let (flag, bytes) = encode(&[]);
144        assert_eq!(flag, ValueFlag::Raw);
145        assert!(bytes.is_empty());
146        let decoded = decode(flag, &bytes).expect("decode empty");
147        assert!(decoded.is_empty());
148    }
149
150    #[test]
151    fn exact_threshold_falls_back_to_raw() {
152        let input = vec![0x42u8];
153        let (flag, bytes) = encode(&input);
154        assert_eq!(flag, ValueFlag::Raw);
155        assert_eq!(bytes, input);
156    }
157
158    #[test]
159    fn flag_distinguishes_compressed_and_raw() {
160        let compressible = vec![b'a'; 256];
161        let (flag_c, _) = encode(&compressible);
162        let (flag_r, _) = encode(&[0xAB, 0xCD, 0xEF]);
163        assert_eq!(flag_c, ValueFlag::Lz4);
164        assert_eq!(flag_r, ValueFlag::Raw);
165        assert_ne!(flag_c, flag_r);
166    }
167
168    #[test]
169    fn flag_byte_round_trips() {
170        assert_eq!(ValueFlag::from_byte(0).unwrap(), ValueFlag::Raw);
171        assert_eq!(ValueFlag::from_byte(1).unwrap(), ValueFlag::Lz4);
172        assert_eq!(
173            ValueFlag::from_byte(255).unwrap_err(),
174            ValueCodecError::UnknownFlag(255)
175        );
176    }
177
178    #[test]
179    fn would_encode_to_matches_actual_encode() {
180        let compressible = vec![b'x'; 1024];
181        let (_, bytes) = encode(&compressible);
182        assert_eq!(would_encode_to(&compressible), bytes.len());
183
184        let mut state: u64 = 0xDEAD_BEEF_1234_5678;
185        let random: Vec<u8> = (0..256)
186            .map(|_| {
187                state ^= state << 13;
188                state ^= state >> 7;
189                state ^= state << 17;
190                state as u8
191            })
192            .collect();
193        let (_, bytes) = encode(&random);
194        assert_eq!(would_encode_to(&random), bytes.len());
195
196        assert_eq!(would_encode_to(&[]), 0);
197    }
198
199    #[test]
200    fn would_encode_to_decouples_from_spill_decision() {
201        let blob = vec![b'z'; 4096];
202        let projected = would_encode_to(&blob);
203        let fits_in_64 = projected <= 64;
204
205        let (flag, bytes) = encode(&blob);
206        assert_eq!(bytes.len(), projected);
207        assert_eq!(decode(flag, &bytes).unwrap(), blob);
208        assert!(fits_in_64);
209    }
210
211    #[test]
212    fn decode_rejects_unknown_flag_byte() {
213        assert!(matches!(
214            ValueFlag::from_byte(7),
215            Err(ValueCodecError::UnknownFlag(7))
216        ));
217    }
218
219    #[test]
220    fn decode_rejects_truncated_lz4_header() {
221        let err = decode(ValueFlag::Lz4, &[0x01, 0x02]).unwrap_err();
222        assert_eq!(err, ValueCodecError::TruncatedHeader);
223    }
224}