reddb_file/
vector_value_codec.rs1#[repr(u8)]
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum ValueFlag {
10 Raw = 0,
12 Lz4 = 1,
14}
15
16impl ValueFlag {
17 pub fn from_byte(b: u8) -> Result<Self, ValueCodecError> {
19 match b {
20 0 => Ok(ValueFlag::Raw),
21 1 => Ok(ValueFlag::Lz4),
22 other => Err(ValueCodecError::UnknownFlag(other)),
23 }
24 }
25}
26
27#[derive(Debug, PartialEq, Eq)]
28pub enum ValueCodecError {
29 UnknownFlag(u8),
30 TruncatedHeader,
31 Lz4Decode(String),
32}
33
34impl std::fmt::Display for ValueCodecError {
35 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36 match self {
37 ValueCodecError::UnknownFlag(b) => write!(f, "unknown value codec flag: {}", b),
38 ValueCodecError::TruncatedHeader => write!(
39 f,
40 "compressed payload truncated: need at least 4 bytes for length header"
41 ),
42 ValueCodecError::Lz4Decode(msg) => write!(f, "lz4 decode failed: {}", msg),
43 }
44 }
45}
46
47impl std::error::Error for ValueCodecError {}
48
49pub fn encode(input: &[u8]) -> (ValueFlag, Vec<u8>) {
54 if input.is_empty() {
55 return (ValueFlag::Raw, Vec::new());
56 }
57
58 let compressed = lz4_flex::compress(input);
59 if compressed.len() + 4 < input.len() {
60 let mut out = Vec::with_capacity(compressed.len() + 4);
61 out.extend_from_slice(&(input.len() as u32).to_le_bytes());
62 out.extend_from_slice(&compressed);
63 (ValueFlag::Lz4, out)
64 } else {
65 (ValueFlag::Raw, input.to_vec())
66 }
67}
68
69pub fn would_encode_to(input: &[u8]) -> usize {
71 if input.is_empty() {
72 return 0;
73 }
74 let compressed_len = lz4_flex::compress(input).len();
75 let lz4_total = compressed_len + 4;
76 if lz4_total < input.len() {
77 lz4_total
78 } else {
79 input.len()
80 }
81}
82
83pub fn decode(flag: ValueFlag, bytes: &[u8]) -> Result<Vec<u8>, ValueCodecError> {
85 match flag {
86 ValueFlag::Raw => Ok(bytes.to_vec()),
87 ValueFlag::Lz4 => {
88 if bytes.len() < 4 {
89 return Err(ValueCodecError::TruncatedHeader);
90 }
91 let raw_len = u32::from_le_bytes(bytes[0..4].try_into().expect("len checked")) as usize;
92 lz4_flex::decompress(&bytes[4..], raw_len)
93 .map_err(|e| ValueCodecError::Lz4Decode(e.to_string()))
94 }
95 }
96}
97
98#[cfg(test)]
99mod tests {
100 use super::*;
101
102 #[test]
103 fn round_trip_compressible_text() {
104 let input = "the quick brown fox jumps over the lazy dog "
105 .repeat(64)
106 .into_bytes();
107 let (flag, bytes) = encode(&input);
108 assert_eq!(flag, ValueFlag::Lz4, "highly repetitive text must compress");
109 assert!(
110 bytes.len() < input.len(),
111 "stored size {} must be less than input {}",
112 bytes.len(),
113 input.len()
114 );
115 let decoded = decode(flag, &bytes).expect("decode");
116 assert_eq!(decoded, input);
117 }
118
119 #[test]
120 fn round_trip_incompressible_random() {
121 let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
122 let input: Vec<u8> = (0..512)
123 .map(|_| {
124 state ^= state << 13;
125 state ^= state >> 7;
126 state ^= state << 17;
127 state as u8
128 })
129 .collect();
130 let (flag, bytes) = encode(&input);
131 assert_eq!(
132 flag,
133 ValueFlag::Raw,
134 "incompressible input must fall back to raw"
135 );
136 assert_eq!(bytes, input, "raw bytes must be byte-identical");
137 let decoded = decode(flag, &bytes).expect("decode");
138 assert_eq!(decoded, input);
139 }
140
141 #[test]
142 fn empty_input_round_trips_as_raw() {
143 let (flag, bytes) = encode(&[]);
144 assert_eq!(flag, ValueFlag::Raw);
145 assert!(bytes.is_empty());
146 let decoded = decode(flag, &bytes).expect("decode empty");
147 assert!(decoded.is_empty());
148 }
149
150 #[test]
151 fn exact_threshold_falls_back_to_raw() {
152 let input = vec![0x42u8];
153 let (flag, bytes) = encode(&input);
154 assert_eq!(flag, ValueFlag::Raw);
155 assert_eq!(bytes, input);
156 }
157
158 #[test]
159 fn flag_distinguishes_compressed_and_raw() {
160 let compressible = vec![b'a'; 256];
161 let (flag_c, _) = encode(&compressible);
162 let (flag_r, _) = encode(&[0xAB, 0xCD, 0xEF]);
163 assert_eq!(flag_c, ValueFlag::Lz4);
164 assert_eq!(flag_r, ValueFlag::Raw);
165 assert_ne!(flag_c, flag_r);
166 }
167
168 #[test]
169 fn flag_byte_round_trips() {
170 assert_eq!(ValueFlag::from_byte(0).unwrap(), ValueFlag::Raw);
171 assert_eq!(ValueFlag::from_byte(1).unwrap(), ValueFlag::Lz4);
172 assert_eq!(
173 ValueFlag::from_byte(255).unwrap_err(),
174 ValueCodecError::UnknownFlag(255)
175 );
176 }
177
178 #[test]
179 fn would_encode_to_matches_actual_encode() {
180 let compressible = vec![b'x'; 1024];
181 let (_, bytes) = encode(&compressible);
182 assert_eq!(would_encode_to(&compressible), bytes.len());
183
184 let mut state: u64 = 0xDEAD_BEEF_1234_5678;
185 let random: Vec<u8> = (0..256)
186 .map(|_| {
187 state ^= state << 13;
188 state ^= state >> 7;
189 state ^= state << 17;
190 state as u8
191 })
192 .collect();
193 let (_, bytes) = encode(&random);
194 assert_eq!(would_encode_to(&random), bytes.len());
195
196 assert_eq!(would_encode_to(&[]), 0);
197 }
198
199 #[test]
200 fn would_encode_to_decouples_from_spill_decision() {
201 let blob = vec![b'z'; 4096];
202 let projected = would_encode_to(&blob);
203 let fits_in_64 = projected <= 64;
204
205 let (flag, bytes) = encode(&blob);
206 assert_eq!(bytes.len(), projected);
207 assert_eq!(decode(flag, &bytes).unwrap(), blob);
208 assert!(fits_in_64);
209 }
210
211 #[test]
212 fn decode_rejects_unknown_flag_byte() {
213 assert!(matches!(
214 ValueFlag::from_byte(7),
215 Err(ValueCodecError::UnknownFlag(7))
216 ));
217 }
218
219 #[test]
220 fn decode_rejects_truncated_lz4_header() {
221 let err = decode(ValueFlag::Lz4, &[0x01, 0x02]).unwrap_err();
222 assert_eq!(err, ValueCodecError::TruncatedHeader);
223 }
224}