1use crate::error::CodecError;
31
32use crate::CODEC_SAMPLE_SIZE;
33
34pub fn encode(values: &[f64]) -> Result<Vec<u8>, CodecError> {
40 let count = values.len() as u32;
41
42 if values.is_empty() {
43 let mut out = Vec::with_capacity(7);
44 out.extend_from_slice(&0u32.to_le_bytes());
45 out.push(0); out.extend_from_slice(&0u16.to_le_bytes());
47 return Ok(out);
48 }
49
50 let cut = find_best_cut(values);
52 let bits: Vec<u64> = values.iter().map(|v| v.to_bits()).collect();
53
54 let front_mask: u64 = if cut == 64 { 0 } else { u64::MAX << cut };
56 let tail_mask: u64 = if cut == 0 { 0 } else { (1u64 << cut) - 1 };
57 let tail_bytes_per_value = (cut as usize).div_ceil(8);
58
59 let fronts: Vec<u64> = bits.iter().map(|&b| (b & front_mask) >> cut).collect();
60
61 let mut dict: Vec<u64> = fronts.clone();
63 dict.sort_unstable();
64 dict.dedup();
65
66 let indices: Vec<u16> = fronts
70 .iter()
71 .map(|f| {
72 dict.binary_search(f)
73 .map(|idx| idx as u16)
74 .map_err(|_| CodecError::Corrupt {
75 detail: "ALP-RD front value missing from dictionary".into(),
76 })
77 })
78 .collect::<Result<_, _>>()?;
79
80 let dict_size = dict.len() as u16;
81 let use_u8_indices = dict.len() <= 256;
82
83 let mut out = Vec::with_capacity(
85 7 + dict.len() * 8
86 + values.len() * if use_u8_indices { 1 } else { 2 }
87 + values.len() * tail_bytes_per_value,
88 );
89
90 out.extend_from_slice(&count.to_le_bytes());
92 out.push(cut);
93 out.extend_from_slice(&dict_size.to_le_bytes());
94
95 for &entry in &dict {
97 out.extend_from_slice(&entry.to_le_bytes());
98 }
99
100 if use_u8_indices {
102 for &idx in &indices {
103 out.push(idx as u8);
104 }
105 } else {
106 for &idx in &indices {
107 out.extend_from_slice(&idx.to_le_bytes());
108 }
109 }
110
111 for &b in &bits {
113 let tail = b & tail_mask;
114 for byte_idx in 0..tail_bytes_per_value {
115 out.push((tail >> (byte_idx * 8)) as u8);
116 }
117 }
118
119 Ok(out)
120}
121
122pub fn decode(data: &[u8]) -> Result<Vec<f64>, CodecError> {
124 if data.len() < 7 {
125 return Err(CodecError::Truncated {
126 expected: 7,
127 actual: data.len(),
128 });
129 }
130
131 let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
132 let cut = data[4];
133 let dict_size = u16::from_le_bytes([data[5], data[6]]) as usize;
134
135 if count == 0 {
136 return Ok(Vec::new());
137 }
138
139 if cut > 64 {
140 return Err(CodecError::Corrupt {
141 detail: format!("invalid ALP-RD cut position: {cut}"),
142 });
143 }
144
145 let tail_bytes_per_value = (cut as usize).div_ceil(8);
146 let tail_mask: u64 = if cut == 0 { 0 } else { (1u64 << cut) - 1 };
147 let use_u8_indices = dict_size <= 256;
148
149 let mut pos = 7;
151 let dict_bytes = dict_size * 8;
152 if pos + dict_bytes > data.len() {
153 return Err(CodecError::Truncated {
154 expected: pos + dict_bytes,
155 actual: data.len(),
156 });
157 }
158 let mut dict = Vec::with_capacity(dict_size);
159 for _ in 0..dict_size {
160 dict.push(u64::from_le_bytes([
161 data[pos],
162 data[pos + 1],
163 data[pos + 2],
164 data[pos + 3],
165 data[pos + 4],
166 data[pos + 5],
167 data[pos + 6],
168 data[pos + 7],
169 ]));
170 pos += 8;
171 }
172
173 let index_bytes = count * if use_u8_indices { 1 } else { 2 };
175 if pos + index_bytes > data.len() {
176 return Err(CodecError::Truncated {
177 expected: pos + index_bytes,
178 actual: data.len(),
179 });
180 }
181 let mut indices = Vec::with_capacity(count);
182 if use_u8_indices {
183 for i in 0..count {
184 indices.push(data[pos + i] as usize);
185 }
186 pos += count;
187 } else {
188 for i in 0..count {
189 let idx_pos = pos + i * 2;
190 indices.push(u16::from_le_bytes([data[idx_pos], data[idx_pos + 1]]) as usize);
191 }
192 pos += count * 2;
193 }
194
195 let tail_total = count * tail_bytes_per_value;
197 if pos + tail_total > data.len() {
198 return Err(CodecError::Truncated {
199 expected: pos + tail_total,
200 actual: data.len(),
201 });
202 }
203
204 let mut values = Vec::with_capacity(count);
205 for (i, &idx) in indices.iter().enumerate() {
206 if idx >= dict.len() {
207 return Err(CodecError::Corrupt {
208 detail: format!("ALP-RD dict index {idx} out of range (max {})", dict.len()),
209 });
210 }
211
212 let front = dict[idx] << cut;
213 let mut tail = 0u64;
214 let tail_pos = pos + i * tail_bytes_per_value;
215 for byte_idx in 0..tail_bytes_per_value {
216 tail |= (data[tail_pos + byte_idx] as u64) << (byte_idx * 8);
217 }
218 tail &= tail_mask;
219
220 values.push(f64::from_bits(front | tail));
221 }
222
223 Ok(values)
224}
225
226fn find_best_cut(values: &[f64]) -> u8 {
235 let sample_end = values.len().min(CODEC_SAMPLE_SIZE);
236 let sample = &values[..sample_end];
237 let bits: Vec<u64> = sample.iter().map(|v| v.to_bits()).collect();
238
239 let mut best_cut = 48u8;
240 let mut best_unique = usize::MAX;
241
242 for cut in 40..=56 {
244 let mut fronts: Vec<u64> = bits.iter().map(|&b| b >> cut).collect();
245 fronts.sort_unstable();
246 fronts.dedup();
247
248 if fronts.len() < best_unique {
249 best_unique = fronts.len();
250 best_cut = cut;
251 }
252 }
253
254 best_cut
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260
261 #[test]
262 fn empty_roundtrip() {
263 let encoded = encode(&[]).unwrap();
264 let decoded = decode(&encoded).unwrap();
265 assert!(decoded.is_empty());
266 }
267
268 #[test]
269 fn pi_multiples() {
270 let values: Vec<f64> = (1..1000).map(|i| std::f64::consts::PI * i as f64).collect();
271 let encoded = encode(&values).unwrap();
272 let decoded = decode(&encoded).unwrap();
273 for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
274 assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
275 }
276 }
277
278 #[test]
279 fn scientific_data() {
280 let values: Vec<f64> = (0..1000).map(|i| (i as f64 * 0.001).exp()).collect();
281 let encoded = encode(&values).unwrap();
282 let decoded = decode(&encoded).unwrap();
283 for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
284 assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
285 }
286 }
287
288 #[test]
289 fn compression_ratio() {
290 let values: Vec<f64> = (1..10_000)
291 .map(|i| std::f64::consts::E * i as f64 + (i as f64).sqrt())
292 .collect();
293 let encoded = encode(&values).unwrap();
294 let raw_size = values.len() * 8;
295 let ratio = raw_size as f64 / encoded.len() as f64;
296 assert!(
300 ratio >= 0.95,
301 "ALP-RD should not expand >5%, got {ratio:.2}x"
302 );
303
304 let decoded = decode(&encoded).unwrap();
305 for (a, b) in values.iter().zip(decoded.iter()) {
306 assert_eq!(a.to_bits(), b.to_bits());
307 }
308 }
309
310 #[test]
311 fn special_values() {
312 let values = vec![
313 0.0,
314 -0.0,
315 f64::INFINITY,
316 f64::NEG_INFINITY,
317 f64::NAN,
318 f64::MIN,
319 f64::MAX,
320 f64::MIN_POSITIVE,
321 ];
322 let encoded = encode(&values).unwrap();
323 let decoded = decode(&encoded).unwrap();
324 for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
325 assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
326 }
327 }
328
329 #[test]
330 fn identical_values() {
331 let values = vec![42.0f64; 1000];
332 let encoded = encode(&values).unwrap();
333 let decoded = decode(&encoded).unwrap();
334 for (a, b) in values.iter().zip(decoded.iter()) {
335 assert_eq!(a.to_bits(), b.to_bits());
336 }
337 assert!(encoded.len() < values.len() * 8);
339 }
340
341 #[test]
342 fn truncated_errors() {
343 assert!(decode(&[]).is_err());
344 assert!(decode(&[1, 0, 0, 0, 48, 0]).is_err()); }
346}