1use crate::error::CodecError;
26
27const BLOCK_SIZE: usize = 1024;
31
32const GLOBAL_HEADER_SIZE: usize = 6;
34
35const BLOCK_HEADER_SIZE: usize = 11;
37
38pub fn encode(values: &[i64]) -> Vec<u8> {
44 let total_count = values.len() as u32;
45 let block_count = if values.is_empty() {
46 0u16
47 } else {
48 values.len().div_ceil(BLOCK_SIZE) as u16
49 };
50
51 let mut out = Vec::with_capacity(GLOBAL_HEADER_SIZE + values.len() * 5);
54
55 out.extend_from_slice(&total_count.to_le_bytes());
57 out.extend_from_slice(&block_count.to_le_bytes());
58
59 for chunk in values.chunks(BLOCK_SIZE) {
61 encode_block(chunk, &mut out);
62 }
63
64 out
65}
66
67pub fn decode(data: &[u8]) -> Result<Vec<i64>, CodecError> {
69 if data.len() < GLOBAL_HEADER_SIZE {
70 return Err(CodecError::Truncated {
71 expected: GLOBAL_HEADER_SIZE,
72 actual: data.len(),
73 });
74 }
75
76 let total_count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
77 let block_count = u16::from_le_bytes([data[4], data[5]]) as usize;
78
79 if total_count == 0 {
80 return Ok(Vec::new());
81 }
82
83 let mut values = Vec::with_capacity(total_count);
84 let mut offset = GLOBAL_HEADER_SIZE;
85
86 for block_idx in 0..block_count {
87 offset = decode_block(data, offset, &mut values, block_idx)?;
88 }
89
90 if values.len() != total_count {
91 return Err(CodecError::Corrupt {
92 detail: format!(
93 "value count mismatch: header says {total_count}, decoded {}",
94 values.len()
95 ),
96 });
97 }
98
99 Ok(values)
100}
101
102pub fn block_byte_offsets(data: &[u8]) -> Result<Vec<usize>, CodecError> {
107 if data.len() < GLOBAL_HEADER_SIZE {
108 return Err(CodecError::Truncated {
109 expected: GLOBAL_HEADER_SIZE,
110 actual: data.len(),
111 });
112 }
113 let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
114 let mut offsets = Vec::with_capacity(num_blocks);
115 let mut pos = GLOBAL_HEADER_SIZE;
116 for i in 0..num_blocks {
117 offsets.push(pos);
118 pos = skip_block(data, pos, i)?;
119 }
120 Ok(offsets)
121}
122
123pub fn decode_block_range(
128 data: &[u8],
129 start_block: usize,
130 end_block: usize,
131) -> Result<Vec<i64>, CodecError> {
132 if data.len() < GLOBAL_HEADER_SIZE {
133 return Err(CodecError::Truncated {
134 expected: GLOBAL_HEADER_SIZE,
135 actual: data.len(),
136 });
137 }
138 let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
139 if start_block >= num_blocks || end_block > num_blocks || start_block >= end_block {
140 return Ok(Vec::new());
141 }
142
143 let mut offset = GLOBAL_HEADER_SIZE;
145 for i in 0..start_block {
146 offset = skip_block(data, offset, i)?;
147 }
148
149 let mut values = Vec::new();
151 for i in start_block..end_block {
152 offset = decode_block(data, offset, &mut values, i)?;
153 }
154 Ok(values)
155}
156
157pub fn block_count(data: &[u8]) -> Result<usize, CodecError> {
159 if data.len() < GLOBAL_HEADER_SIZE {
160 return Err(CodecError::Truncated {
161 expected: GLOBAL_HEADER_SIZE,
162 actual: data.len(),
163 });
164 }
165 Ok(u16::from_le_bytes([data[4], data[5]]) as usize)
166}
167
168pub fn decode_single_block(data: &[u8], block_idx: usize) -> Result<Vec<i64>, CodecError> {
174 if data.len() < GLOBAL_HEADER_SIZE {
175 return Err(CodecError::Truncated {
176 expected: GLOBAL_HEADER_SIZE,
177 actual: data.len(),
178 });
179 }
180 let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
181 if block_idx >= num_blocks {
182 return Err(CodecError::Corrupt {
183 detail: format!("block_idx {block_idx} >= block_count {num_blocks}"),
184 });
185 }
186
187 let mut offset = GLOBAL_HEADER_SIZE;
189 for i in 0..block_idx {
190 offset = skip_block(data, offset, i)?;
191 }
192
193 let mut values = Vec::new();
194 decode_block(data, offset, &mut values, block_idx)?;
195 Ok(values)
196}
197
198pub struct BlockIterator<'a> {
201 data: &'a [u8],
202 offset: usize,
203 blocks_remaining: usize,
204 current_block: usize,
205}
206
207impl<'a> BlockIterator<'a> {
208 pub fn new(data: &'a [u8]) -> Result<Self, CodecError> {
210 if data.len() < GLOBAL_HEADER_SIZE {
211 return Err(CodecError::Truncated {
212 expected: GLOBAL_HEADER_SIZE,
213 actual: data.len(),
214 });
215 }
216 let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
217 Ok(Self {
218 data,
219 offset: GLOBAL_HEADER_SIZE,
220 blocks_remaining: num_blocks,
221 current_block: 0,
222 })
223 }
224
225 pub fn skip_block(&mut self) -> Result<(), CodecError> {
227 if self.blocks_remaining == 0 {
228 return Ok(());
229 }
230 self.offset = skip_block(self.data, self.offset, self.current_block)?;
231 self.current_block += 1;
232 self.blocks_remaining -= 1;
233 Ok(())
234 }
235}
236
237impl Iterator for BlockIterator<'_> {
238 type Item = Result<Vec<i64>, CodecError>;
239
240 fn next(&mut self) -> Option<Self::Item> {
241 if self.blocks_remaining == 0 {
242 return None;
243 }
244 let mut values = Vec::new();
245 match decode_block(self.data, self.offset, &mut values, self.current_block) {
246 Ok(new_offset) => {
247 self.offset = new_offset;
248 self.current_block += 1;
249 self.blocks_remaining -= 1;
250 Some(Ok(values))
251 }
252 Err(e) => Some(Err(e)),
253 }
254 }
255
256 fn size_hint(&self) -> (usize, Option<usize>) {
257 (self.blocks_remaining, Some(self.blocks_remaining))
258 }
259}
260
261fn skip_block(data: &[u8], offset: usize, block_idx: usize) -> Result<usize, CodecError> {
263 if offset + BLOCK_HEADER_SIZE > data.len() {
264 return Err(CodecError::Truncated {
265 expected: offset + BLOCK_HEADER_SIZE,
266 actual: data.len(),
267 });
268 }
269 let count = u16::from_le_bytes([data[offset], data[offset + 1]]) as usize;
270 let bit_width = data[offset + 2];
271 if bit_width > 64 {
272 return Err(CodecError::Corrupt {
273 detail: format!("block {block_idx}: invalid bit_width {bit_width}"),
274 });
275 }
276 let packed_bytes = if bit_width == 0 {
277 0
278 } else {
279 (count * bit_width as usize).div_ceil(8)
280 };
281 Ok(offset + BLOCK_HEADER_SIZE + packed_bytes)
282}
283
284fn encode_block(values: &[i64], out: &mut Vec<u8>) {
290 let count = values.len() as u16;
291
292 let mut min_val = values[0];
294 let mut max_val = values[0];
295 for &v in &values[1..] {
296 if v < min_val {
297 min_val = v;
298 }
299 if v > max_val {
300 max_val = v;
301 }
302 }
303
304 let range = (max_val as u128).wrapping_sub(min_val as u128) as u64;
306 let bit_width = if range == 0 {
307 0u8
308 } else {
309 64 - range.leading_zeros() as u8
310 };
311
312 out.extend_from_slice(&count.to_le_bytes());
314 out.push(bit_width);
315 out.extend_from_slice(&min_val.to_le_bytes());
316
317 if bit_width == 0 {
318 return;
320 }
321
322 let packed_bytes = (count as usize * bit_width as usize).div_ceil(8);
326 let pack_start = out.len();
327 out.resize(pack_start + packed_bytes, 0);
328 let packed = &mut out[pack_start..];
329
330 let bw = bit_width as u64;
331 let mask = if bw == 64 { u64::MAX } else { (1u64 << bw) - 1 };
332
333 let mut bit_offset: usize = 0;
336 for &val in values {
337 let residual = (val.wrapping_sub(min_val) as u64) & mask;
338 pack_bits(packed, bit_offset, residual, bit_width);
339 bit_offset += bit_width as usize;
340 }
341}
342
343fn decode_block(
347 data: &[u8],
348 offset: usize,
349 values: &mut Vec<i64>,
350 block_idx: usize,
351) -> Result<usize, CodecError> {
352 if offset + BLOCK_HEADER_SIZE > data.len() {
353 return Err(CodecError::Truncated {
354 expected: offset + BLOCK_HEADER_SIZE,
355 actual: data.len(),
356 });
357 }
358
359 let count = u16::from_le_bytes([data[offset], data[offset + 1]]) as usize;
360 let bit_width = data[offset + 2];
361 let min_val = i64::from_le_bytes([
362 data[offset + 3],
363 data[offset + 4],
364 data[offset + 5],
365 data[offset + 6],
366 data[offset + 7],
367 data[offset + 8],
368 data[offset + 9],
369 data[offset + 10],
370 ]);
371
372 let mut pos = offset + BLOCK_HEADER_SIZE;
373
374 if bit_width == 0 {
375 values.extend(std::iter::repeat_n(min_val, count));
377 return Ok(pos);
378 }
379
380 if bit_width > 64 {
381 return Err(CodecError::Corrupt {
382 detail: format!("block {block_idx}: invalid bit_width {bit_width}"),
383 });
384 }
385
386 let packed_bytes = (count * bit_width as usize).div_ceil(8);
387 if pos + packed_bytes > data.len() {
388 return Err(CodecError::Truncated {
389 expected: pos + packed_bytes,
390 actual: data.len(),
391 });
392 }
393
394 let packed = &data[pos..pos + packed_bytes];
395 let mask: u64 = if bit_width == 64 {
396 u64::MAX
397 } else {
398 (1u64 << bit_width) - 1
399 };
400
401 let mut bit_offset: usize = 0;
403 for _ in 0..count {
404 let residual = unpack_bits(packed, bit_offset, bit_width) & mask;
405 values.push(min_val.wrapping_add(residual as i64));
406 bit_offset += bit_width as usize;
407 }
408
409 pos += packed_bytes;
410 Ok(pos)
411}
412
413#[inline]
419fn low_mask_u8(n: usize) -> u8 {
420 if n >= 8 { 0xFF } else { (1u8 << n) - 1 }
421}
422
423#[inline]
424fn low_mask_u64(n: usize) -> u64 {
425 if n >= 64 { u64::MAX } else { (1u64 << n) - 1 }
426}
427
428#[inline]
432fn pack_bits(packed: &mut [u8], bit_offset: usize, value: u64, bit_width: u8) {
433 let bw = bit_width as usize;
434 if bw == 0 {
435 return;
436 }
437
438 let byte_idx = bit_offset / 8;
439 let bit_idx = bit_offset % 8;
440
441 let first_bits = (8 - bit_idx).min(bw);
443
444 packed[byte_idx] |= ((value & low_mask_u64(first_bits)) as u8) << bit_idx;
446
447 let mut remaining = bw - first_bits;
448 let mut val = value >> first_bits;
449 let mut bi = byte_idx + 1;
450
451 while remaining >= 8 {
453 packed[bi] = (val & 0xFF) as u8;
454 val >>= 8;
455 remaining -= 8;
456 bi += 1;
457 }
458
459 if remaining > 0 {
461 packed[bi] |= (val & low_mask_u64(remaining)) as u8;
462 }
463}
464
465#[inline]
467fn unpack_bits(packed: &[u8], bit_offset: usize, bit_width: u8) -> u64 {
468 let bw = bit_width as usize;
469 if bw == 0 {
470 return 0;
471 }
472
473 let byte_idx = bit_offset / 8;
474 let bit_idx = bit_offset % 8;
475
476 let first_bits = (8 - bit_idx).min(bw);
478 let mut value = ((packed[byte_idx] >> bit_idx) & low_mask_u8(first_bits)) as u64;
479
480 let mut collected = first_bits;
481 let mut bi = byte_idx + 1;
482
483 while collected + 8 <= bw {
485 value |= (packed[bi] as u64) << collected;
486 collected += 8;
487 bi += 1;
488 }
489
490 let remaining = bw - collected;
492 if remaining > 0 {
493 value |= ((packed[bi] & low_mask_u8(remaining)) as u64) << collected;
494 }
495
496 value
497}
498
499pub fn bit_width_for_range(min: i64, max: i64) -> u8 {
503 let range = (max as u128).wrapping_sub(min as u128) as u64;
504 if range == 0 {
505 0
506 } else {
507 64 - range.leading_zeros() as u8
508 }
509}
510
511#[cfg(test)]
512mod tests {
513 use super::*;
514
515 #[test]
516 fn empty_roundtrip() {
517 let encoded = encode(&[]);
518 let decoded = decode(&encoded).unwrap();
519 assert!(decoded.is_empty());
520 }
521
522 #[test]
523 fn single_value() {
524 let encoded = encode(&[42i64]);
525 let decoded = decode(&encoded).unwrap();
526 assert_eq!(decoded, vec![42i64]);
527 }
528
529 #[test]
530 fn identical_values_zero_bits() {
531 let values = vec![999i64; 1024];
532 let encoded = encode(&values);
533 let decoded = decode(&encoded).unwrap();
534 assert_eq!(decoded, values);
535
536 assert_eq!(encoded.len(), 17);
539 }
540
541 #[test]
542 fn small_range_values() {
543 let values: Vec<i64> = (0..1024).map(|i| 100 + (i % 8)).collect();
545 let encoded = encode(&values);
546 let decoded = decode(&encoded).unwrap();
547 assert_eq!(decoded, values);
548
549 let expected_packed = (1024usize * 3).div_ceil(8); let expected_total = GLOBAL_HEADER_SIZE + BLOCK_HEADER_SIZE + expected_packed;
552 assert_eq!(encoded.len(), expected_total);
553 }
554
555 #[test]
556 fn constant_rate_timestamps() {
557 let values: Vec<i64> = (0..10_000)
558 .map(|i| 1_700_000_000_000 + i * 10_000)
559 .collect();
560 let encoded = encode(&values);
561 let decoded = decode(&encoded).unwrap();
562 assert_eq!(decoded, values);
563
564 let bytes_per_sample = encoded.len() as f64 / values.len() as f64;
567 assert!(
568 bytes_per_sample < 4.0,
569 "timestamps should pack to <4 bytes/sample, got {bytes_per_sample:.2}"
570 );
571 }
572
573 #[test]
574 fn pre_delta_timestamps() {
575 let deltas: Vec<i64> = vec![10_000i64; 10_000];
578 let encoded = encode(&deltas);
579 let decoded = decode(&encoded).unwrap();
580 assert_eq!(decoded, deltas);
581
582 let bytes_per_sample = encoded.len() as f64 / deltas.len() as f64;
584 assert!(
585 bytes_per_sample < 0.2,
586 "constant deltas should pack to near-zero, got {bytes_per_sample:.2}"
587 );
588 }
589
590 #[test]
591 fn pre_delta_timestamps_with_jitter() {
592 let mut deltas = Vec::with_capacity(10_000);
594 let mut rng: u64 = 42;
595 for _ in 0..10_000 {
596 rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
597 let jitter = ((rng >> 33) as i64 % 101) - 50;
598 deltas.push(10_000 + jitter);
599 }
600 let encoded = encode(&deltas);
601 let decoded = decode(&encoded).unwrap();
602 assert_eq!(decoded, deltas);
603
604 let bytes_per_sample = encoded.len() as f64 / deltas.len() as f64;
605 assert!(
606 bytes_per_sample < 1.5,
607 "jittered deltas should pack to <1.5 bytes/sample, got {bytes_per_sample:.2}"
608 );
609 }
610
611 #[test]
612 fn negative_values() {
613 let values: Vec<i64> = (-500..500).collect();
614 let encoded = encode(&values);
615 let decoded = decode(&encoded).unwrap();
616 assert_eq!(decoded, values);
617 }
618
619 #[test]
620 fn boundary_values() {
621 let values = vec![i64::MIN, 0, i64::MAX];
622 let encoded = encode(&values);
623 let decoded = decode(&encoded).unwrap();
624 assert_eq!(decoded, values);
625 }
626
627 #[test]
628 fn multiple_blocks() {
629 let values: Vec<i64> = (0..3000).map(|i| i * 7 + 100).collect();
631 let encoded = encode(&values);
632 let decoded = decode(&encoded).unwrap();
633 assert_eq!(decoded, values);
634 }
635
636 #[test]
637 fn partial_last_block() {
638 let values: Vec<i64> = (0..1025).collect(); let encoded = encode(&values);
640 let decoded = decode(&encoded).unwrap();
641 assert_eq!(decoded, values);
642 }
643
644 #[test]
645 fn compression_vs_raw() {
646 let values: Vec<i64> = (0..10_000)
648 .map(|i| 1_700_000_000_000 + i * 10_000)
649 .collect();
650 let encoded = encode(&values);
651 let raw_size = values.len() * 8;
652 let ratio = raw_size as f64 / encoded.len() as f64;
653 assert!(ratio > 2.0, "expected >2x compression, got {ratio:.1}x");
654 }
655
656 #[test]
657 fn bit_width_calculation() {
658 assert_eq!(bit_width_for_range(0, 0), 0);
659 assert_eq!(bit_width_for_range(100, 100), 0);
660 assert_eq!(bit_width_for_range(0, 1), 1);
661 assert_eq!(bit_width_for_range(0, 7), 3);
662 assert_eq!(bit_width_for_range(0, 8), 4);
663 assert_eq!(bit_width_for_range(0, 255), 8);
664 assert_eq!(bit_width_for_range(0, 256), 9);
665 assert_eq!(bit_width_for_range(i64::MIN, i64::MAX), 64);
666 }
667
668 #[test]
669 fn pack_unpack_roundtrip() {
670 for bw in 1..=64u8 {
671 let max_val: u64 = if bw == 64 { u64::MAX } else { (1u64 << bw) - 1 };
672 let test_vals = [0u64, 1, max_val / 2, max_val];
673 for &val in &test_vals {
674 let mut packed = vec![0u8; 16];
675 pack_bits(&mut packed, 0, val, bw);
676 let unpacked = unpack_bits(&packed, 0, bw);
677 let mask = if bw == 64 { u64::MAX } else { (1u64 << bw) - 1 };
678 assert_eq!(
679 unpacked & mask,
680 val & mask,
681 "pack/unpack failed for bw={bw}, val={val}"
682 );
683 }
684 }
685 }
686
687 #[test]
688 fn pack_unpack_at_offsets() {
689 let mut packed = vec![0u8; 32];
691 pack_bits(&mut packed, 0, 0b101, 3); pack_bits(&mut packed, 3, 0b110, 3); pack_bits(&mut packed, 6, 0b011, 3); assert_eq!(unpack_bits(&packed, 0, 3), 0b101);
696 assert_eq!(unpack_bits(&packed, 3, 3), 0b110);
697 assert_eq!(unpack_bits(&packed, 6, 3), 0b011);
698 }
699
700 #[test]
701 fn truncated_input_errors() {
702 assert!(decode(&[]).is_err());
703 assert!(decode(&[1, 0, 0, 0, 1, 0]).is_err()); }
705
706 #[test]
707 fn large_dataset_roundtrip() {
708 let mut values = Vec::with_capacity(100_000);
709 let mut rng: u64 = 12345;
710 for _ in 0..100_000 {
711 rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
712 values.push((rng >> 1) as i64);
713 }
714 let encoded = encode(&values);
715 let decoded = decode(&encoded).unwrap();
716 assert_eq!(decoded, values);
717 }
718
719 #[test]
720 fn decode_single_block_correctness() {
721 let values: Vec<i64> = (0..3000).collect();
723 let encoded = encode(&values);
724 assert_eq!(block_count(&encoded).unwrap(), 3);
725
726 let b0 = decode_single_block(&encoded, 0).unwrap();
727 assert_eq!(b0.len(), 1024);
728 assert_eq!(b0, &values[..1024]);
729
730 let b1 = decode_single_block(&encoded, 1).unwrap();
731 assert_eq!(b1.len(), 1024);
732 assert_eq!(b1, &values[1024..2048]);
733
734 let b2 = decode_single_block(&encoded, 2).unwrap();
735 assert_eq!(b2.len(), 952);
736 assert_eq!(b2, &values[2048..]);
737 }
738
739 #[test]
740 fn block_iterator_matches_full_decode() {
741 let values: Vec<i64> = (0..5000).map(|i| i * 7 - 2000).collect();
742 let encoded = encode(&values);
743
744 let mut all = Vec::new();
745 let iter = BlockIterator::new(&encoded).unwrap();
746 for block in iter {
747 all.extend(block.unwrap());
748 }
749 assert_eq!(all, values);
750 }
751
752 #[test]
753 fn block_iterator_skip() {
754 let values: Vec<i64> = (0..3000).collect();
755 let encoded = encode(&values);
756
757 let mut iter = BlockIterator::new(&encoded).unwrap();
758 iter.skip_block().unwrap(); let b1 = iter.next().unwrap().unwrap();
760 assert_eq!(b1, &values[1024..2048]);
761 }
762}