use std::io;
#[derive(Debug, Clone)]
pub struct DeltaEncoding {
base: u64,
deltas: Vec<u64>,
count: usize,
}
impl DeltaEncoding {
#[must_use]
pub fn encode(values: &[u64]) -> Self {
if values.is_empty() {
return Self {
base: 0,
deltas: Vec::new(),
count: 0,
};
}
debug_assert!(
values.windows(2).all(|w| w[0] <= w[1]),
"DeltaEncoding::encode requires sorted input; use encode_signed for unsorted data"
);
let base = values[0];
let deltas: Vec<u64> = values
.windows(2)
.map(|w| w[1].saturating_sub(w[0]))
.collect();
Self {
base,
deltas,
count: values.len(),
}
}
#[must_use]
pub fn encode_signed(values: &[i64]) -> Self {
if values.is_empty() {
return Self {
base: 0,
deltas: Vec::new(),
count: 0,
};
}
let base = zigzag_encode(values[0]);
let deltas: Vec<u64> = values
.windows(2)
.map(|w| zigzag_encode(w[1] - w[0]))
.collect();
Self {
base,
deltas,
count: values.len(),
}
}
#[must_use]
pub fn decode(&self) -> Vec<u64> {
if self.count == 0 {
return Vec::new();
}
let mut result = Vec::with_capacity(self.count);
let mut current = self.base;
result.push(current);
for &delta in &self.deltas {
current = current.wrapping_add(delta);
result.push(current);
}
result
}
#[must_use]
pub fn decode_signed(&self) -> Vec<i64> {
if self.count == 0 {
return Vec::new();
}
let mut result = Vec::with_capacity(self.count);
let mut current = zigzag_decode(self.base);
result.push(current);
for &delta in &self.deltas {
current += zigzag_decode(delta);
result.push(current);
}
result
}
#[must_use]
pub fn len(&self) -> usize {
self.count
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.count == 0
}
#[must_use]
pub fn base(&self) -> u64 {
self.base
}
#[must_use]
pub fn deltas(&self) -> &[u64] {
&self.deltas
}
#[must_use]
pub fn max_delta(&self) -> u64 {
self.deltas.iter().copied().max().unwrap_or(0)
}
#[must_use]
pub fn bits_for_max_delta(&self) -> u8 {
let max = self.max_delta();
if max == 0 {
1
} else {
#[allow(clippy::cast_possible_truncation)]
{
64 - max.leading_zeros() as u8
}
}
}
#[must_use]
pub fn compression_ratio(&self) -> f64 {
if self.count == 0 {
return 1.0;
}
let original_size = self.count * 8; let compressed_size = 8 + (self.deltas.len() * 8);
original_size as f64 / compressed_size as f64
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut buf = Vec::with_capacity(8 + 4 + self.deltas.len() * 8);
buf.extend_from_slice(&self.base.to_le_bytes());
#[allow(clippy::cast_possible_truncation)]
buf.extend_from_slice(&(self.count as u32).to_le_bytes());
for &delta in &self.deltas {
buf.extend_from_slice(&delta.to_le_bytes());
}
buf
}
pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
if bytes.len() < 12 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Delta encoding too short",
));
}
let base = u64::from_le_bytes(
bytes[0..8]
.try_into()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
);
let count = u32::from_le_bytes(
bytes[8..12]
.try_into()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
) as usize;
let expected_len = 12 + (count.saturating_sub(1)) * 8;
if bytes.len() < expected_len {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Delta encoding truncated",
));
}
let mut deltas = Vec::with_capacity(count.saturating_sub(1));
let mut offset = 12;
for _ in 1..count {
let delta = u64::from_le_bytes(
bytes[offset..offset + 8]
.try_into()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
);
deltas.push(delta);
offset += 8;
}
Ok(Self {
base,
deltas,
count,
})
}
}
#[inline]
#[must_use]
#[allow(clippy::cast_sign_loss)]
pub fn zigzag_encode(value: i64) -> u64 {
((value << 1) ^ (value >> 63)) as u64
}
#[inline]
#[must_use]
#[allow(clippy::cast_possible_wrap)]
pub fn zigzag_decode(value: u64) -> i64 {
((value >> 1) as i64) ^ (-((value & 1) as i64))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_delta_encode_decode() {
let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
let encoded = DeltaEncoding::encode(&values);
let decoded = encoded.decode();
assert_eq!(values, decoded);
}
#[test]
fn test_delta_empty() {
let values: Vec<u64> = vec![];
let encoded = DeltaEncoding::encode(&values);
assert!(encoded.is_empty());
assert_eq!(encoded.decode(), values);
}
#[test]
fn test_delta_single_value() {
let values = vec![42u64];
let encoded = DeltaEncoding::encode(&values);
assert_eq!(encoded.len(), 1);
assert_eq!(encoded.decode(), values);
}
#[test]
fn test_delta_sequential() {
let values: Vec<u64> = (0..100).collect();
let encoded = DeltaEncoding::encode(&values);
assert_eq!(encoded.max_delta(), 1);
assert_eq!(encoded.bits_for_max_delta(), 1);
assert_eq!(encoded.decode(), values);
}
#[test]
fn test_delta_large_gaps() {
let values = vec![0u64, 1000, 2000, 3000];
let encoded = DeltaEncoding::encode(&values);
assert_eq!(encoded.max_delta(), 1000);
assert_eq!(encoded.decode(), values);
}
#[test]
fn test_zigzag_encode_decode() {
assert_eq!(zigzag_encode(0), 0);
assert_eq!(zigzag_encode(-1), 1);
assert_eq!(zigzag_encode(1), 2);
assert_eq!(zigzag_encode(-2), 3);
assert_eq!(zigzag_encode(2), 4);
for v in [-100i64, -50, -1, 0, 1, 50, 100, i64::MIN, i64::MAX] {
assert_eq!(zigzag_decode(zigzag_encode(v)), v);
}
}
#[test]
fn test_delta_signed() {
let values = vec![-100i64, -50, 0, 50, 100];
let encoded = DeltaEncoding::encode_signed(&values);
let decoded = encoded.decode_signed();
assert_eq!(values, decoded);
}
#[test]
fn test_delta_serialization() {
let values = vec![100u64, 105, 107, 110, 115];
let encoded = DeltaEncoding::encode(&values);
let bytes = encoded.to_bytes();
let restored = DeltaEncoding::from_bytes(&bytes).unwrap();
assert_eq!(encoded.decode(), restored.decode());
}
#[test]
fn test_compression_ratio() {
let sequential: Vec<u64> = (0..1000).collect();
let encoded = DeltaEncoding::encode(&sequential);
assert!(encoded.len() == 1000);
}
}