#[derive(Debug, Clone, Default)]
pub struct PackedOffsets {
data: Vec<u8>,
count: usize,
}
impl PackedOffsets {
pub fn new() -> Self {
Self::default()
}
pub fn pack(&mut self, offsets: &[u32]) {
if offsets.is_empty() {
self.data.clear();
self.count = 0;
return;
}
let mut sorted = offsets.to_vec();
sorted.sort_unstable();
let mut buffer = Vec::with_capacity(sorted.len() * 5);
let mut prev = 0u32;
for &offset in &sorted {
let delta = offset - prev;
write_varint(&mut buffer, delta);
prev = offset;
}
self.data = buffer;
self.count = sorted.len();
}
pub fn unpack(&self) -> Vec<u32> {
if self.count == 0 {
return Vec::new();
}
let mut offsets = Vec::with_capacity(self.count);
let mut pos = 0;
let mut current = 0u32;
while pos < self.data.len() && offsets.len() < self.count {
let (delta, bytes_read) = read_varint(&self.data[pos..]);
current = current.wrapping_add(delta);
offsets.push(current);
pos += bytes_read;
}
offsets
}
pub fn get_count(&self) -> usize {
self.count
}
pub fn get_data(&self) -> &[u8] {
&self.data
}
pub fn is_empty(&self) -> bool {
self.count == 0
}
pub fn memory_size(&self) -> usize {
self.data.len()
}
}
fn write_varint(buffer: &mut Vec<u8>, mut value: u32) {
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80; }
buffer.push(byte);
if value == 0 {
break;
}
}
}
fn read_varint(buffer: &[u8]) -> (u32, usize) {
let mut value = 0u32;
let mut shift = 0;
let mut pos = 0;
loop {
if pos >= buffer.len() {
break;
}
let byte = buffer[pos];
pos += 1;
value |= ((byte & 0x7F) as u32) << shift;
shift += 7;
if (byte & 0x80) == 0 {
break;
}
}
(value, pos)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new() {
let packed = PackedOffsets::new();
assert!(packed.is_empty());
assert_eq!(packed.get_count(), 0);
assert_eq!(packed.get_data().len(), 0);
}
#[test]
fn test_pack_empty() {
let mut packed = PackedOffsets::new();
packed.pack(&[]);
assert!(packed.is_empty());
assert_eq!(packed.get_count(), 0);
}
#[test]
fn test_pack_single() {
let mut packed = PackedOffsets::new();
packed.pack(&[100]);
assert_eq!(packed.get_count(), 1);
assert!(!packed.is_empty());
let unpacked = packed.unpack();
assert_eq!(unpacked, vec![100]);
}
#[test]
fn test_pack_multiple() {
let mut packed = PackedOffsets::new();
let offsets = vec![100, 200, 300, 400, 500];
packed.pack(&offsets);
assert_eq!(packed.get_count(), 5);
let unpacked = packed.unpack();
assert_eq!(unpacked, offsets);
}
#[test]
fn test_pack_unsorted() {
let mut packed = PackedOffsets::new();
let offsets = vec![500, 100, 300, 200, 400];
packed.pack(&offsets);
assert_eq!(packed.get_count(), 5);
let unpacked = packed.unpack();
assert_eq!(unpacked, vec![100, 200, 300, 400, 500]);
}
#[test]
fn test_pack_with_duplicates() {
let mut packed = PackedOffsets::new();
let offsets = vec![100, 100, 200, 200, 300];
packed.pack(&offsets);
assert_eq!(packed.get_count(), 5);
let unpacked = packed.unpack();
assert_eq!(unpacked, vec![100, 100, 200, 200, 300]);
}
#[test]
fn test_pack_large_offsets() {
let mut packed = PackedOffsets::new();
let offsets = vec![1000000, 2000000, 3000000];
packed.pack(&offsets);
assert_eq!(packed.get_count(), 3);
let unpacked = packed.unpack();
assert_eq!(unpacked, offsets);
}
#[test]
fn test_varint_encoding_small() {
let mut buffer = Vec::new();
write_varint(&mut buffer, 0);
assert_eq!(buffer, vec![0]);
buffer.clear();
write_varint(&mut buffer, 127);
assert_eq!(buffer, vec![127]);
}
#[test]
fn test_varint_encoding_medium() {
let mut buffer = Vec::new();
write_varint(&mut buffer, 128);
assert_eq!(buffer, vec![0x80, 0x01]);
buffer.clear();
write_varint(&mut buffer, 300);
assert_eq!(buffer, vec![0xAC, 0x02]);
}
#[test]
fn test_varint_encoding_large() {
let mut buffer = Vec::new();
write_varint(&mut buffer, 1_000_000);
let (decoded, bytes_read) = read_varint(&buffer);
assert_eq!(decoded, 1_000_000);
assert_eq!(bytes_read, buffer.len());
}
#[test]
fn test_varint_roundtrip() {
for value in [
0,
1,
127,
128,
255,
256,
16383,
16384,
65535,
65536,
1_000_000,
u32::MAX,
] {
let mut buffer = Vec::new();
write_varint(&mut buffer, value);
let (decoded, _) = read_varint(&buffer);
assert_eq!(decoded, value, "Failed roundtrip for {}", value);
}
}
#[test]
fn test_delta_encoding_efficiency() {
let mut packed = PackedOffsets::new();
let offsets: Vec<u32> = (1000..1100).collect();
packed.pack(&offsets);
assert!(packed.memory_size() < offsets.len() * 4); }
#[test]
fn test_memory_size() {
let mut packed = PackedOffsets::new();
let offsets = vec![100, 200, 300];
packed.pack(&offsets);
assert!(packed.memory_size() > 0);
assert_eq!(packed.memory_size(), packed.get_data().len());
}
#[test]
fn test_repack() {
let mut packed = PackedOffsets::new();
packed.pack(&[100, 200, 300]);
assert_eq!(packed.get_count(), 3);
packed.pack(&[400, 500]);
assert_eq!(packed.get_count(), 2);
assert_eq!(packed.unpack(), vec![400, 500]);
}
#[test]
fn test_clone() {
let mut packed1 = PackedOffsets::new();
packed1.pack(&[100, 200, 300]);
let packed2 = packed1.clone();
assert_eq!(packed2.get_count(), 3);
assert_eq!(packed2.unpack(), vec![100, 200, 300]);
}
#[test]
fn test_default() {
let packed = PackedOffsets::default();
assert!(packed.is_empty());
assert_eq!(packed.get_count(), 0);
}
#[test]
fn test_large_dataset() {
let mut packed = PackedOffsets::new();
let offsets: Vec<u32> = (0..10000).map(|i| i * 100).collect();
packed.pack(&offsets);
assert_eq!(packed.get_count(), 10000);
let unpacked = packed.unpack();
assert_eq!(unpacked, offsets);
}
#[test]
fn test_max_u32_value() {
let mut packed = PackedOffsets::new();
let offsets = vec![u32::MAX - 2, u32::MAX - 1, u32::MAX];
packed.pack(&offsets);
let unpacked = packed.unpack();
assert_eq!(unpacked, offsets);
}
}