use crate::persistence::error::{PersistenceError, PersistenceResult};
pub const BLOCK_SIZE: usize = 128;
pub mod varint {
use crate::persistence::error::{PersistenceError, PersistenceResult};
pub fn encode(value: u64) -> Vec<u8> {
let mut result = Vec::new();
let mut v = value;
loop {
let byte = (v & 0x7F) as u8;
v >>= 7;
if v == 0 {
result.push(byte | 0x80); break;
} else {
result.push(byte);
}
}
result
}
pub fn decode(data: &[u8]) -> PersistenceResult<(u64, usize)> {
let mut result = 0u64;
let mut shift = 0;
let mut bytes_read = 0;
for &byte in data {
bytes_read += 1;
result |= ((byte & 0x7F) as u64) << shift;
if (byte & 0x80) != 0 {
return Ok((result, bytes_read));
}
shift += 7;
if shift >= 64 {
return Err(PersistenceError::Format(
"Varint overflow (64 bits)".to_string(),
));
}
}
Err(PersistenceError::Format("Incomplete varint".to_string()))
}
pub fn encode_many(values: &[u64]) -> Vec<u8> {
let mut result = Vec::new();
for &value in values {
result.extend_from_slice(&encode(value));
}
result
}
pub fn decode_many(data: &[u8], count: usize) -> PersistenceResult<(Vec<u64>, usize)> {
let mut values = Vec::with_capacity(count);
let mut offset = 0;
for _ in 0..count {
let (value, bytes_read) = decode(&data[offset..])?;
values.push(value);
offset += bytes_read;
}
Ok((values, offset))
}
}
pub mod delta {
pub fn encode(sorted_values: &[u32]) -> Vec<u32> {
if sorted_values.is_empty() {
return Vec::new();
}
let mut deltas = Vec::with_capacity(sorted_values.len());
deltas.push(sorted_values[0]);
for i in 1..sorted_values.len() {
deltas.push(sorted_values[i] - sorted_values[i - 1]);
}
deltas
}
pub fn decode(deltas: &[u32]) -> Vec<u32> {
if deltas.is_empty() {
return Vec::new();
}
let mut values = Vec::with_capacity(deltas.len());
values.push(deltas[0]);
for i in 1..deltas.len() {
values.push(values[i - 1] + deltas[i]);
}
values
}
}
pub mod bitpack {
use super::*;
pub fn bit_width(value: u32) -> u8 {
if value == 0 {
return 1;
}
(32 - value.leading_zeros()) as u8
}
pub fn bit_width_many(values: &[u32]) -> u8 {
let max = values.iter().max().copied().unwrap_or(0);
bit_width(max)
}
pub fn pack(values: &[u32], bit_width: u8) -> Vec<u8> {
if values.is_empty() || bit_width == 0 {
return Vec::new();
}
let bits_needed = values.len() * bit_width as usize;
let bytes_needed = bits_needed.div_ceil(8);
let mut result = vec![0u8; bytes_needed];
let mut bit_offset = 0usize;
for &value in values {
let mut remaining = value;
let mut bits_to_write = bit_width as usize;
while bits_to_write > 0 {
let byte_idx = bit_offset / 8;
let bit_in_byte = bit_offset % 8;
let bits_available = 8 - bit_in_byte;
let bits_to_take = bits_to_write.min(bits_available);
let mask = (1u32 << bits_to_take) - 1;
let bits = remaining & mask;
remaining >>= bits_to_take;
result[byte_idx] |= (bits << bit_in_byte) as u8;
bit_offset += bits_to_take;
bits_to_write -= bits_to_take;
}
}
result
}
pub fn unpack(data: &[u8], count: usize, bit_width: u8) -> PersistenceResult<Vec<u32>> {
if count == 0 || bit_width == 0 {
return Ok(Vec::new());
}
let mut result = Vec::with_capacity(count);
let mut bit_offset = 0usize;
for _ in 0..count {
let mut value = 0u32;
let mut bits_remaining = bit_width as usize;
let mut bits_read = 0usize;
while bits_remaining > 0 {
let byte_idx = bit_offset / 8;
if byte_idx >= data.len() {
return Err(PersistenceError::Format(
"Incomplete bitpacked data".to_string(),
));
}
let bit_in_byte = bit_offset % 8;
let bits_available = 8 - bit_in_byte;
let bits_to_take = bits_remaining.min(bits_available);
let mask = ((1u32 << bits_to_take) - 1) << bit_in_byte;
let bits = ((data[byte_idx] as u32) & mask) >> bit_in_byte;
value |= bits << bits_read;
bit_offset += bits_to_take;
bits_remaining -= bits_to_take;
bits_read += bits_to_take;
}
result.push(value);
}
Ok(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_varint_encode_decode() {
let values = vec![
0,
1,
127,
128,
255,
256,
65535,
65536,
u32::MAX as u64,
u64::MAX,
];
for &value in &values {
let encoded = varint::encode(value);
let (decoded, bytes_read) = varint::decode(&encoded).unwrap();
assert_eq!(decoded, value);
assert_eq!(bytes_read, encoded.len());
}
}
#[test]
fn test_varint_encode_many() {
let values = vec![1u64, 2, 3, 100, 1000];
let encoded = varint::encode_many(&values);
let (decoded, _) = varint::decode_many(&encoded, values.len()).unwrap();
assert_eq!(decoded, values);
}
#[test]
fn test_delta_encode_decode() {
let sorted = vec![5, 7, 9, 12, 15, 20];
let deltas = delta::encode(&sorted);
assert_eq!(deltas, vec![5, 2, 2, 3, 3, 5]);
let decoded = delta::decode(&deltas);
assert_eq!(decoded, sorted);
}
#[test]
fn test_bitpack_pack_unpack() {
let values = vec![1u32, 2, 3, 4, 5, 6, 7, 8];
let bit_width = bitpack::bit_width_many(&values);
assert_eq!(bit_width, 4);
let packed = bitpack::pack(&values, bit_width);
let unpacked = bitpack::unpack(&packed, values.len(), bit_width).unwrap();
assert_eq!(unpacked, values);
}
#[test]
fn test_bitpack_block_size() {
let values: Vec<u32> = (0..BLOCK_SIZE as u32).collect();
let bit_width = bitpack::bit_width_many(&values);
let packed = bitpack::pack(&values, bit_width);
let unpacked = bitpack::unpack(&packed, values.len(), bit_width).unwrap();
assert_eq!(unpacked, values);
}
#[test]
fn test_bitpack_various_widths() {
for width in 1..=8 {
let max_value = (1u32 << width) - 1;
let values: Vec<u32> = (0..10).map(|i| (i % max_value as usize) as u32).collect();
let bit_width = bitpack::bit_width_many(&values);
let packed = bitpack::pack(&values, bit_width);
let unpacked = bitpack::unpack(&packed, values.len(), bit_width).unwrap();
assert_eq!(unpacked, values, "Failed for width {}", width);
}
}
}