1use crate::error::CodecError;
29
30use crate::CODEC_SAMPLE_SIZE;
31
32pub fn encode(values: &[f64]) -> Result<Vec<u8>, CodecError> {
38 let count = values.len() as u32;
39
40 if values.is_empty() {
41 let mut out = Vec::with_capacity(7);
42 out.extend_from_slice(&0u32.to_le_bytes());
43 out.push(0); out.extend_from_slice(&0u16.to_le_bytes());
45 return Ok(out);
46 }
47
48 let cut = find_best_cut(values);
50 let bits: Vec<u64> = values.iter().map(|v| v.to_bits()).collect();
51
52 let front_mask: u64 = if cut == 64 { 0 } else { u64::MAX << cut };
54 let tail_mask: u64 = if cut == 0 { 0 } else { (1u64 << cut) - 1 };
55 let tail_bytes_per_value = (cut as usize).div_ceil(8);
56
57 let fronts: Vec<u64> = bits.iter().map(|&b| (b & front_mask) >> cut).collect();
58
59 let mut dict: Vec<u64> = fronts.clone();
61 dict.sort_unstable();
62 dict.dedup();
63
64 let indices: Vec<u16> = fronts
68 .iter()
69 .map(|f| {
70 dict.binary_search(f)
71 .map(|idx| idx as u16)
72 .map_err(|_| CodecError::Corrupt {
73 detail: "ALP-RD front value missing from dictionary".into(),
74 })
75 })
76 .collect::<Result<_, _>>()?;
77
78 let dict_size = dict.len() as u16;
79 let use_u8_indices = dict.len() <= 256;
80
81 let mut out = Vec::with_capacity(
83 7 + dict.len() * 8
84 + values.len() * if use_u8_indices { 1 } else { 2 }
85 + values.len() * tail_bytes_per_value,
86 );
87
88 out.extend_from_slice(&count.to_le_bytes());
90 out.push(cut);
91 out.extend_from_slice(&dict_size.to_le_bytes());
92
93 for &entry in &dict {
95 out.extend_from_slice(&entry.to_le_bytes());
96 }
97
98 if use_u8_indices {
100 for &idx in &indices {
101 out.push(idx as u8);
102 }
103 } else {
104 for &idx in &indices {
105 out.extend_from_slice(&idx.to_le_bytes());
106 }
107 }
108
109 for &b in &bits {
111 let tail = b & tail_mask;
112 for byte_idx in 0..tail_bytes_per_value {
113 out.push((tail >> (byte_idx * 8)) as u8);
114 }
115 }
116
117 Ok(out)
118}
119
120pub fn decode(data: &[u8]) -> Result<Vec<f64>, CodecError> {
122 if data.len() < 7 {
123 return Err(CodecError::Truncated {
124 expected: 7,
125 actual: data.len(),
126 });
127 }
128
129 let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
130 let cut = data[4];
131 let dict_size = u16::from_le_bytes([data[5], data[6]]) as usize;
132
133 if count == 0 {
134 return Ok(Vec::new());
135 }
136
137 if cut > 64 {
138 return Err(CodecError::Corrupt {
139 detail: format!("invalid ALP-RD cut position: {cut}"),
140 });
141 }
142
143 let tail_bytes_per_value = (cut as usize).div_ceil(8);
144 let tail_mask: u64 = if cut == 0 { 0 } else { (1u64 << cut) - 1 };
145 let use_u8_indices = dict_size <= 256;
146
147 let mut pos = 7;
149 let dict_bytes = dict_size * 8;
150 if pos + dict_bytes > data.len() {
151 return Err(CodecError::Truncated {
152 expected: pos + dict_bytes,
153 actual: data.len(),
154 });
155 }
156 let mut dict = Vec::with_capacity(dict_size);
157 for _ in 0..dict_size {
158 dict.push(u64::from_le_bytes([
159 data[pos],
160 data[pos + 1],
161 data[pos + 2],
162 data[pos + 3],
163 data[pos + 4],
164 data[pos + 5],
165 data[pos + 6],
166 data[pos + 7],
167 ]));
168 pos += 8;
169 }
170
171 let index_bytes = count * if use_u8_indices { 1 } else { 2 };
173 if pos + index_bytes > data.len() {
174 return Err(CodecError::Truncated {
175 expected: pos + index_bytes,
176 actual: data.len(),
177 });
178 }
179 let mut indices = Vec::with_capacity(count);
180 if use_u8_indices {
181 for i in 0..count {
182 indices.push(data[pos + i] as usize);
183 }
184 pos += count;
185 } else {
186 for i in 0..count {
187 let idx_pos = pos + i * 2;
188 indices.push(u16::from_le_bytes([data[idx_pos], data[idx_pos + 1]]) as usize);
189 }
190 pos += count * 2;
191 }
192
193 let tail_total = count * tail_bytes_per_value;
195 if pos + tail_total > data.len() {
196 return Err(CodecError::Truncated {
197 expected: pos + tail_total,
198 actual: data.len(),
199 });
200 }
201
202 let mut values = Vec::with_capacity(count);
203 for (i, &idx) in indices.iter().enumerate() {
204 if idx >= dict.len() {
205 return Err(CodecError::Corrupt {
206 detail: format!("ALP-RD dict index {idx} out of range (max {})", dict.len()),
207 });
208 }
209
210 let front = dict[idx] << cut;
211 let mut tail = 0u64;
212 let tail_pos = pos + i * tail_bytes_per_value;
213 for byte_idx in 0..tail_bytes_per_value {
214 tail |= (data[tail_pos + byte_idx] as u64) << (byte_idx * 8);
215 }
216 tail &= tail_mask;
217
218 values.push(f64::from_bits(front | tail));
219 }
220
221 Ok(values)
222}
223
224fn find_best_cut(values: &[f64]) -> u8 {
233 let sample_end = values.len().min(CODEC_SAMPLE_SIZE);
234 let sample = &values[..sample_end];
235 let bits: Vec<u64> = sample.iter().map(|v| v.to_bits()).collect();
236
237 let mut best_cut = 48u8;
238 let mut best_unique = usize::MAX;
239
240 for cut in 40..=56 {
242 let mut fronts: Vec<u64> = bits.iter().map(|&b| b >> cut).collect();
243 fronts.sort_unstable();
244 fronts.dedup();
245
246 if fronts.len() < best_unique {
247 best_unique = fronts.len();
248 best_cut = cut;
249 }
250 }
251
252 best_cut
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258
259 #[test]
260 fn empty_roundtrip() {
261 let encoded = encode(&[]).unwrap();
262 let decoded = decode(&encoded).unwrap();
263 assert!(decoded.is_empty());
264 }
265
266 #[test]
267 fn pi_multiples() {
268 let values: Vec<f64> = (1..1000).map(|i| std::f64::consts::PI * i as f64).collect();
269 let encoded = encode(&values).unwrap();
270 let decoded = decode(&encoded).unwrap();
271 for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
272 assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
273 }
274 }
275
276 #[test]
277 fn scientific_data() {
278 let values: Vec<f64> = (0..1000).map(|i| (i as f64 * 0.001).exp()).collect();
279 let encoded = encode(&values).unwrap();
280 let decoded = decode(&encoded).unwrap();
281 for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
282 assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
283 }
284 }
285
286 #[test]
287 fn compression_ratio() {
288 let values: Vec<f64> = (1..10_000)
289 .map(|i| std::f64::consts::E * i as f64 + (i as f64).sqrt())
290 .collect();
291 let encoded = encode(&values).unwrap();
292 let raw_size = values.len() * 8;
293 let ratio = raw_size as f64 / encoded.len() as f64;
294 assert!(
298 ratio >= 0.95,
299 "ALP-RD should not expand >5%, got {ratio:.2}x"
300 );
301
302 let decoded = decode(&encoded).unwrap();
303 for (a, b) in values.iter().zip(decoded.iter()) {
304 assert_eq!(a.to_bits(), b.to_bits());
305 }
306 }
307
308 #[test]
309 fn special_values() {
310 let values = vec![
311 0.0,
312 -0.0,
313 f64::INFINITY,
314 f64::NEG_INFINITY,
315 f64::NAN,
316 f64::MIN,
317 f64::MAX,
318 f64::MIN_POSITIVE,
319 ];
320 let encoded = encode(&values).unwrap();
321 let decoded = decode(&encoded).unwrap();
322 for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
323 assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
324 }
325 }
326
327 #[test]
328 fn identical_values() {
329 let values = vec![42.0f64; 1000];
330 let encoded = encode(&values).unwrap();
331 let decoded = decode(&encoded).unwrap();
332 for (a, b) in values.iter().zip(decoded.iter()) {
333 assert_eq!(a.to_bits(), b.to_bits());
334 }
335 assert!(encoded.len() < values.len() * 8);
337 }
338
339 #[test]
340 fn truncated_errors() {
341 assert!(decode(&[]).is_err());
342 assert!(decode(&[1, 0, 0, 0, 48, 0]).is_err()); }
344}