use crate::error::CodecError;
const BLOCK_SIZE: usize = 1024;
const GLOBAL_HEADER_SIZE: usize = 6;
const BLOCK_HEADER_SIZE: usize = 11;
pub fn encode(values: &[i64]) -> Vec<u8> {
let total_count = values.len() as u32;
let block_count = if values.is_empty() {
0u16
} else {
values.len().div_ceil(BLOCK_SIZE) as u16
};
let mut out = Vec::with_capacity(GLOBAL_HEADER_SIZE + values.len() * 5);
out.extend_from_slice(&total_count.to_le_bytes());
out.extend_from_slice(&block_count.to_le_bytes());
for chunk in values.chunks(BLOCK_SIZE) {
encode_block(chunk, &mut out);
}
out
}
pub fn decode(data: &[u8]) -> Result<Vec<i64>, CodecError> {
if data.len() < GLOBAL_HEADER_SIZE {
return Err(CodecError::Truncated {
expected: GLOBAL_HEADER_SIZE,
actual: data.len(),
});
}
let total_count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let block_count = u16::from_le_bytes([data[4], data[5]]) as usize;
if total_count == 0 {
return Ok(Vec::new());
}
let mut values = Vec::with_capacity(total_count);
let mut offset = GLOBAL_HEADER_SIZE;
for block_idx in 0..block_count {
offset = decode_block(data, offset, &mut values, block_idx)?;
}
if values.len() != total_count {
return Err(CodecError::Corrupt {
detail: format!(
"value count mismatch: header says {total_count}, decoded {}",
values.len()
),
});
}
Ok(values)
}
pub fn block_byte_offsets(data: &[u8]) -> Result<Vec<usize>, CodecError> {
if data.len() < GLOBAL_HEADER_SIZE {
return Err(CodecError::Truncated {
expected: GLOBAL_HEADER_SIZE,
actual: data.len(),
});
}
let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
let mut offsets = Vec::with_capacity(num_blocks);
let mut pos = GLOBAL_HEADER_SIZE;
for i in 0..num_blocks {
offsets.push(pos);
pos = skip_block(data, pos, i)?;
}
Ok(offsets)
}
pub fn decode_block_range(
data: &[u8],
start_block: usize,
end_block: usize,
) -> Result<Vec<i64>, CodecError> {
if data.len() < GLOBAL_HEADER_SIZE {
return Err(CodecError::Truncated {
expected: GLOBAL_HEADER_SIZE,
actual: data.len(),
});
}
let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
if start_block >= num_blocks || end_block > num_blocks || start_block >= end_block {
return Ok(Vec::new());
}
let mut offset = GLOBAL_HEADER_SIZE;
for i in 0..start_block {
offset = skip_block(data, offset, i)?;
}
let mut values = Vec::new();
for i in start_block..end_block {
offset = decode_block(data, offset, &mut values, i)?;
}
Ok(values)
}
pub fn block_count(data: &[u8]) -> Result<usize, CodecError> {
if data.len() < GLOBAL_HEADER_SIZE {
return Err(CodecError::Truncated {
expected: GLOBAL_HEADER_SIZE,
actual: data.len(),
});
}
Ok(u16::from_le_bytes([data[4], data[5]]) as usize)
}
pub fn decode_single_block(data: &[u8], block_idx: usize) -> Result<Vec<i64>, CodecError> {
if data.len() < GLOBAL_HEADER_SIZE {
return Err(CodecError::Truncated {
expected: GLOBAL_HEADER_SIZE,
actual: data.len(),
});
}
let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
if block_idx >= num_blocks {
return Err(CodecError::Corrupt {
detail: format!("block_idx {block_idx} >= block_count {num_blocks}"),
});
}
let mut offset = GLOBAL_HEADER_SIZE;
for i in 0..block_idx {
offset = skip_block(data, offset, i)?;
}
let mut values = Vec::new();
decode_block(data, offset, &mut values, block_idx)?;
Ok(values)
}
pub struct BlockIterator<'a> {
data: &'a [u8],
offset: usize,
blocks_remaining: usize,
current_block: usize,
}
impl<'a> BlockIterator<'a> {
pub fn new(data: &'a [u8]) -> Result<Self, CodecError> {
if data.len() < GLOBAL_HEADER_SIZE {
return Err(CodecError::Truncated {
expected: GLOBAL_HEADER_SIZE,
actual: data.len(),
});
}
let num_blocks = u16::from_le_bytes([data[4], data[5]]) as usize;
Ok(Self {
data,
offset: GLOBAL_HEADER_SIZE,
blocks_remaining: num_blocks,
current_block: 0,
})
}
pub fn skip_block(&mut self) -> Result<(), CodecError> {
if self.blocks_remaining == 0 {
return Ok(());
}
self.offset = skip_block(self.data, self.offset, self.current_block)?;
self.current_block += 1;
self.blocks_remaining -= 1;
Ok(())
}
}
impl Iterator for BlockIterator<'_> {
type Item = Result<Vec<i64>, CodecError>;
fn next(&mut self) -> Option<Self::Item> {
if self.blocks_remaining == 0 {
return None;
}
let mut values = Vec::new();
match decode_block(self.data, self.offset, &mut values, self.current_block) {
Ok(new_offset) => {
self.offset = new_offset;
self.current_block += 1;
self.blocks_remaining -= 1;
Some(Ok(values))
}
Err(e) => Some(Err(e)),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.blocks_remaining, Some(self.blocks_remaining))
}
}
fn skip_block(data: &[u8], offset: usize, block_idx: usize) -> Result<usize, CodecError> {
if offset + BLOCK_HEADER_SIZE > data.len() {
return Err(CodecError::Truncated {
expected: offset + BLOCK_HEADER_SIZE,
actual: data.len(),
});
}
let count = u16::from_le_bytes([data[offset], data[offset + 1]]) as usize;
let bit_width = data[offset + 2];
if bit_width > 64 {
return Err(CodecError::Corrupt {
detail: format!("block {block_idx}: invalid bit_width {bit_width}"),
});
}
let packed_bytes = if bit_width == 0 {
0
} else {
(count * bit_width as usize).div_ceil(8)
};
Ok(offset + BLOCK_HEADER_SIZE + packed_bytes)
}
fn encode_block(values: &[i64], out: &mut Vec<u8>) {
let count = values.len() as u16;
let mut min_val = values[0];
let mut max_val = values[0];
for &v in &values[1..] {
if v < min_val {
min_val = v;
}
if v > max_val {
max_val = v;
}
}
let range = (max_val as u128).wrapping_sub(min_val as u128) as u64;
let bit_width = if range == 0 {
0u8
} else {
64 - range.leading_zeros() as u8
};
out.extend_from_slice(&count.to_le_bytes());
out.push(bit_width);
out.extend_from_slice(&min_val.to_le_bytes());
if bit_width == 0 {
return;
}
let packed_bytes = (count as usize * bit_width as usize).div_ceil(8);
let pack_start = out.len();
out.resize(pack_start + packed_bytes, 0);
let packed = &mut out[pack_start..];
let bw = bit_width as u64;
let mask = if bw == 64 { u64::MAX } else { (1u64 << bw) - 1 };
let mut bit_offset: usize = 0;
for &val in values {
let residual = (val.wrapping_sub(min_val) as u64) & mask;
pack_bits(packed, bit_offset, residual, bit_width);
bit_offset += bit_width as usize;
}
}
fn decode_block(
data: &[u8],
offset: usize,
values: &mut Vec<i64>,
block_idx: usize,
) -> Result<usize, CodecError> {
if offset + BLOCK_HEADER_SIZE > data.len() {
return Err(CodecError::Truncated {
expected: offset + BLOCK_HEADER_SIZE,
actual: data.len(),
});
}
let count = u16::from_le_bytes([data[offset], data[offset + 1]]) as usize;
let bit_width = data[offset + 2];
let min_val = i64::from_le_bytes([
data[offset + 3],
data[offset + 4],
data[offset + 5],
data[offset + 6],
data[offset + 7],
data[offset + 8],
data[offset + 9],
data[offset + 10],
]);
let mut pos = offset + BLOCK_HEADER_SIZE;
if bit_width == 0 {
values.extend(std::iter::repeat_n(min_val, count));
return Ok(pos);
}
if bit_width > 64 {
return Err(CodecError::Corrupt {
detail: format!("block {block_idx}: invalid bit_width {bit_width}"),
});
}
let packed_bytes = (count * bit_width as usize).div_ceil(8);
if pos + packed_bytes > data.len() {
return Err(CodecError::Truncated {
expected: pos + packed_bytes,
actual: data.len(),
});
}
let packed = &data[pos..pos + packed_bytes];
let mask: u64 = if bit_width == 64 {
u64::MAX
} else {
(1u64 << bit_width) - 1
};
let mut bit_offset: usize = 0;
for _ in 0..count {
let residual = unpack_bits(packed, bit_offset, bit_width) & mask;
values.push(min_val.wrapping_add(residual as i64));
bit_offset += bit_width as usize;
}
pos += packed_bytes;
Ok(pos)
}
#[inline]
fn low_mask_u8(n: usize) -> u8 {
if n >= 8 { 0xFF } else { (1u8 << n) - 1 }
}
#[inline]
fn low_mask_u64(n: usize) -> u64 {
if n >= 64 { u64::MAX } else { (1u64 << n) - 1 }
}
#[inline]
fn pack_bits(packed: &mut [u8], bit_offset: usize, value: u64, bit_width: u8) {
let bw = bit_width as usize;
if bw == 0 {
return;
}
let byte_idx = bit_offset / 8;
let bit_idx = bit_offset % 8;
let first_bits = (8 - bit_idx).min(bw);
packed[byte_idx] |= ((value & low_mask_u64(first_bits)) as u8) << bit_idx;
let mut remaining = bw - first_bits;
let mut val = value >> first_bits;
let mut bi = byte_idx + 1;
while remaining >= 8 {
packed[bi] = (val & 0xFF) as u8;
val >>= 8;
remaining -= 8;
bi += 1;
}
if remaining > 0 {
packed[bi] |= (val & low_mask_u64(remaining)) as u8;
}
}
#[inline]
fn unpack_bits(packed: &[u8], bit_offset: usize, bit_width: u8) -> u64 {
let bw = bit_width as usize;
if bw == 0 {
return 0;
}
let byte_idx = bit_offset / 8;
let bit_idx = bit_offset % 8;
let first_bits = (8 - bit_idx).min(bw);
let mut value = ((packed[byte_idx] >> bit_idx) & low_mask_u8(first_bits)) as u64;
let mut collected = first_bits;
let mut bi = byte_idx + 1;
while collected + 8 <= bw {
value |= (packed[bi] as u64) << collected;
collected += 8;
bi += 1;
}
let remaining = bw - collected;
if remaining > 0 {
value |= ((packed[bi] & low_mask_u8(remaining)) as u64) << collected;
}
value
}
pub fn bit_width_for_range(min: i64, max: i64) -> u8 {
let range = (max as u128).wrapping_sub(min as u128) as u64;
if range == 0 {
0
} else {
64 - range.leading_zeros() as u8
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_roundtrip() {
let encoded = encode(&[]);
let decoded = decode(&encoded).unwrap();
assert!(decoded.is_empty());
}
#[test]
fn single_value() {
let encoded = encode(&[42i64]);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, vec![42i64]);
}
#[test]
fn identical_values_zero_bits() {
let values = vec![999i64; 1024];
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
assert_eq!(encoded.len(), 17);
}
#[test]
fn small_range_values() {
let values: Vec<i64> = (0..1024).map(|i| 100 + (i % 8)).collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
let expected_packed = (1024usize * 3).div_ceil(8); let expected_total = GLOBAL_HEADER_SIZE + BLOCK_HEADER_SIZE + expected_packed;
assert_eq!(encoded.len(), expected_total);
}
#[test]
fn constant_rate_timestamps() {
let values: Vec<i64> = (0..10_000)
.map(|i| 1_700_000_000_000 + i * 10_000)
.collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
let bytes_per_sample = encoded.len() as f64 / values.len() as f64;
assert!(
bytes_per_sample < 4.0,
"timestamps should pack to <4 bytes/sample, got {bytes_per_sample:.2}"
);
}
#[test]
fn pre_delta_timestamps() {
let deltas: Vec<i64> = vec![10_000i64; 10_000];
let encoded = encode(&deltas);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, deltas);
let bytes_per_sample = encoded.len() as f64 / deltas.len() as f64;
assert!(
bytes_per_sample < 0.2,
"constant deltas should pack to near-zero, got {bytes_per_sample:.2}"
);
}
#[test]
fn pre_delta_timestamps_with_jitter() {
let mut deltas = Vec::with_capacity(10_000);
let mut rng: u64 = 42;
for _ in 0..10_000 {
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
let jitter = ((rng >> 33) as i64 % 101) - 50;
deltas.push(10_000 + jitter);
}
let encoded = encode(&deltas);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, deltas);
let bytes_per_sample = encoded.len() as f64 / deltas.len() as f64;
assert!(
bytes_per_sample < 1.5,
"jittered deltas should pack to <1.5 bytes/sample, got {bytes_per_sample:.2}"
);
}
#[test]
fn negative_values() {
let values: Vec<i64> = (-500..500).collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
}
#[test]
fn boundary_values() {
let values = vec![i64::MIN, 0, i64::MAX];
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
}
#[test]
fn multiple_blocks() {
let values: Vec<i64> = (0..3000).map(|i| i * 7 + 100).collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
}
#[test]
fn partial_last_block() {
let values: Vec<i64> = (0..1025).collect(); let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
}
#[test]
fn compression_vs_raw() {
let values: Vec<i64> = (0..10_000)
.map(|i| 1_700_000_000_000 + i * 10_000)
.collect();
let encoded = encode(&values);
let raw_size = values.len() * 8;
let ratio = raw_size as f64 / encoded.len() as f64;
assert!(ratio > 2.0, "expected >2x compression, got {ratio:.1}x");
}
#[test]
fn bit_width_calculation() {
assert_eq!(bit_width_for_range(0, 0), 0);
assert_eq!(bit_width_for_range(100, 100), 0);
assert_eq!(bit_width_for_range(0, 1), 1);
assert_eq!(bit_width_for_range(0, 7), 3);
assert_eq!(bit_width_for_range(0, 8), 4);
assert_eq!(bit_width_for_range(0, 255), 8);
assert_eq!(bit_width_for_range(0, 256), 9);
assert_eq!(bit_width_for_range(i64::MIN, i64::MAX), 64);
}
#[test]
fn pack_unpack_roundtrip() {
for bw in 1..=64u8 {
let max_val: u64 = if bw == 64 { u64::MAX } else { (1u64 << bw) - 1 };
let test_vals = [0u64, 1, max_val / 2, max_val];
for &val in &test_vals {
let mut packed = vec![0u8; 16];
pack_bits(&mut packed, 0, val, bw);
let unpacked = unpack_bits(&packed, 0, bw);
let mask = if bw == 64 { u64::MAX } else { (1u64 << bw) - 1 };
assert_eq!(
unpacked & mask,
val & mask,
"pack/unpack failed for bw={bw}, val={val}"
);
}
}
}
#[test]
fn pack_unpack_at_offsets() {
let mut packed = vec![0u8; 32];
pack_bits(&mut packed, 0, 0b101, 3); pack_bits(&mut packed, 3, 0b110, 3); pack_bits(&mut packed, 6, 0b011, 3);
assert_eq!(unpack_bits(&packed, 0, 3), 0b101);
assert_eq!(unpack_bits(&packed, 3, 3), 0b110);
assert_eq!(unpack_bits(&packed, 6, 3), 0b011);
}
#[test]
fn truncated_input_errors() {
assert!(decode(&[]).is_err());
assert!(decode(&[1, 0, 0, 0, 1, 0]).is_err()); }
#[test]
fn large_dataset_roundtrip() {
let mut values = Vec::with_capacity(100_000);
let mut rng: u64 = 12345;
for _ in 0..100_000 {
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
values.push((rng >> 1) as i64);
}
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, values);
}
#[test]
fn decode_single_block_correctness() {
let values: Vec<i64> = (0..3000).collect();
let encoded = encode(&values);
assert_eq!(block_count(&encoded).unwrap(), 3);
let b0 = decode_single_block(&encoded, 0).unwrap();
assert_eq!(b0.len(), 1024);
assert_eq!(b0, &values[..1024]);
let b1 = decode_single_block(&encoded, 1).unwrap();
assert_eq!(b1.len(), 1024);
assert_eq!(b1, &values[1024..2048]);
let b2 = decode_single_block(&encoded, 2).unwrap();
assert_eq!(b2.len(), 952);
assert_eq!(b2, &values[2048..]);
}
#[test]
fn block_iterator_matches_full_decode() {
let values: Vec<i64> = (0..5000).map(|i| i * 7 - 2000).collect();
let encoded = encode(&values);
let mut all = Vec::new();
let iter = BlockIterator::new(&encoded).unwrap();
for block in iter {
all.extend(block.unwrap());
}
assert_eq!(all, values);
}
#[test]
fn block_iterator_skip() {
let values: Vec<i64> = (0..3000).collect();
let encoded = encode(&values);
let mut iter = BlockIterator::new(&encoded).unwrap();
iter.skip_block().unwrap(); let b1 = iter.next().unwrap().unwrap();
assert_eq!(b1, &values[1024..2048]);
}
}