use std::io::{self, Read, Write};
pub fn encode_varint_scalar<W: Write>(mut writer: W, value: u32) -> io::Result<usize> {
let mut bytes_written = 0;
let mut v = value;
loop {
let byte = (v & 0x7F) as u8;
v >>= 7;
if v == 0 {
writer.write_all(&[byte])?;
bytes_written += 1;
break;
} else {
writer.write_all(&[byte | 0x80])?;
bytes_written += 1;
}
}
Ok(bytes_written)
}
pub fn decode_varint_scalar<R: Read>(mut reader: R) -> io::Result<u32> {
let mut result = 0;
let mut shift = 0;
loop {
let mut byte = [0u8; 1];
reader.read_exact(&mut byte)?;
let b = byte[0];
result |= ((b & 0x7F) as u32) << shift;
if (b & 0x80) == 0 {
break;
}
shift += 7;
if shift >= 35 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"varint too large",
));
}
}
Ok(result)
}
pub fn delta_encode_scalar(values: &[u32]) -> Vec<u32> {
if values.is_empty() {
return Vec::new();
}
let mut deltas = Vec::with_capacity(values.len());
deltas.push(values[0]);
for i in 1..values.len() {
deltas.push(values[i].wrapping_sub(values[i - 1]));
}
deltas
}
pub fn delta_decode(deltas: &[u32]) -> Vec<u32> {
let mut values = Vec::with_capacity(deltas.len());
let mut acc = 0u32;
for &delta in deltas {
acc += delta;
values.push(acc);
}
values
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
#[inline]
pub unsafe fn delta_encode_avx2(values: &[u32]) -> Vec<u32> {
unsafe {
use std::arch::x86_64::*;
if values.len() < 16 {
return delta_encode_scalar(values);
}
let mut deltas = Vec::with_capacity(values.len());
deltas.push(values[0]);
let len = values.len();
let chunks = (len - 1) / 8;
let mut i = 1;
for _ in 0..chunks {
let v1 = _mm256_loadu_si256(values.as_ptr().add(i) as *const __m256i);
let v2 = _mm256_loadu_si256(values.as_ptr().add(i - 1) as *const __m256i);
let diff = _mm256_sub_epi32(v1, v2);
let mut tmp = [0u32; 8];
_mm256_storeu_si256(tmp.as_mut_ptr() as *mut __m256i, diff);
for &val in &tmp {
deltas.push(val);
}
i += 8;
}
while i < len {
deltas.push(values[i] - values[i - 1]);
i += 1;
}
deltas
}
}
pub fn delta_encode(values: &[u32]) -> Vec<u32> {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") && values.len() >= 16 {
unsafe { delta_encode_avx2(values) }
} else {
delta_encode_scalar(values)
}
}
#[cfg(not(target_arch = "x86_64"))]
{
delta_encode_scalar(values)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_varint_roundtrip() {
let test_values = vec![
0u32,
1,
42,
127,
128,
255,
256,
300,
16383,
16384,
65535,
65536,
u32::MAX - 1,
u32::MAX,
];
for value in test_values {
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, value).unwrap();
let decoded = decode_varint_scalar(buffer.as_slice()).unwrap();
assert_eq!(
decoded, value,
"Roundtrip failed for value {} (encoded as {:?})",
value, buffer
);
}
}
#[test]
fn test_varint_single_byte() {
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, 42).unwrap();
assert_eq!(buffer.len(), 1);
assert_eq!(buffer[0], 0x2A); }
#[test]
fn test_varint_two_bytes() {
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, 300).unwrap();
assert_eq!(buffer.len(), 2);
assert_eq!(buffer[0], 0xAC); assert_eq!(buffer[1], 0x02); }
#[test]
fn test_varint_max_value() {
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, u32::MAX).unwrap();
assert_eq!(buffer.len(), 5);
assert_eq!(buffer[0], 0xFF);
assert_eq!(buffer[1], 0xFF);
assert_eq!(buffer[2], 0xFF);
assert_eq!(buffer[3], 0xFF);
assert_eq!(buffer[4], 0x0F); }
#[test]
fn test_varint_zero() {
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, 0).unwrap();
assert_eq!(buffer.len(), 1);
assert_eq!(buffer[0], 0x00);
}
#[test]
fn test_varint_continuation_bits() {
let test_cases = vec![
(127, vec![0x7F]), (128, vec![0x80, 0x01]), (16383, vec![0xFF, 0x7F]), (16384, vec![0x80, 0x80, 0x01]), ];
for (value, expected) in test_cases {
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, value).unwrap();
assert_eq!(buffer, expected, "Failed for value {}", value);
}
}
#[test]
fn test_varint_invalid_too_large() {
let malformed = vec![0x80, 0x80, 0x80, 0x80, 0x80, 0x01];
let result = decode_varint_scalar(malformed.as_slice());
assert!(result.is_err());
assert_eq!(result.unwrap_err().kind(), io::ErrorKind::InvalidData);
}
#[test]
fn test_varint_incomplete() {
let incomplete = vec![0x80];
let result = decode_varint_scalar(incomplete.as_slice());
assert!(result.is_err());
assert_eq!(result.unwrap_err().kind(), io::ErrorKind::UnexpectedEof);
}
#[test]
fn test_delta_encode_basic() {
let values = vec![100, 105, 110, 115, 120];
let deltas = delta_encode_scalar(&values);
assert_eq!(deltas, vec![100, 5, 5, 5, 5]);
}
#[test]
fn test_delta_encode_empty() {
let values: Vec<u32> = vec![];
let deltas = delta_encode_scalar(&values);
assert_eq!(deltas, Vec::<u32>::new());
}
#[test]
fn test_delta_encode_single() {
let values = vec![42];
let deltas = delta_encode_scalar(&values);
assert_eq!(deltas, vec![42]);
}
#[test]
fn test_delta_encode_large_gaps() {
let values = vec![0, 1000, 2000, 3000];
let deltas = delta_encode_scalar(&values);
assert_eq!(deltas, vec![0, 1000, 1000, 1000]);
}
#[test]
fn test_delta_encode_decreasing() {
let values = vec![100u32, 95, 90, 85, 80];
let deltas = delta_encode_scalar(&values);
assert_eq!(
deltas,
vec![100, 4294967291, 4294967291, 4294967291, 4294967291]
);
}
#[test]
fn test_delta_decode_roundtrip() {
let original = vec![100, 105, 110, 115, 120];
let deltas = delta_encode_scalar(&original);
let restored = delta_decode(&deltas);
assert_eq!(restored, original);
}
#[test]
fn test_delta_decode_empty() {
let deltas: Vec<u32> = vec![];
let values = delta_decode(&deltas);
assert_eq!(values, Vec::<u32>::new());
}
#[test]
fn test_delta_decode_single() {
let deltas = vec![42];
let values = delta_decode(&deltas);
assert_eq!(values, vec![42]);
}
#[test]
fn test_delta_accumulation() {
let deltas = vec![100, 5, 5, 5, 5];
let values = delta_decode(&deltas);
assert_eq!(values, vec![100, 105, 110, 115, 120]);
}
#[test]
fn test_delta_encode_large_array() {
let values: Vec<u32> = (0..1000).map(|i| i * 10).collect();
let deltas = delta_encode(&values);
assert_eq!(deltas.len(), 1000);
assert_eq!(deltas[0], 0); assert_eq!(deltas[1], 10); assert_eq!(deltas[999], 10);
}
#[test]
fn test_delta_encode_small_array_scalar() {
let values = vec![1, 2, 3, 4, 5];
let deltas = delta_encode(&values);
assert_eq!(deltas, vec![1, 1, 1, 1, 1]);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_delta_encode_simd_matches_scalar() {
let values: Vec<u32> = (0..1000).map(|i| i * 7).collect();
let scalar_deltas = delta_encode_scalar(&values);
if is_x86_feature_detected!("avx2") {
let simd_deltas = unsafe { delta_encode_avx2(&values) };
assert_eq!(
simd_deltas, scalar_deltas,
"SIMD and scalar produced different results"
);
} else {
println!("AVX2 not available, skipping SIMD comparison test");
}
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_avx2_availability() {
let _has_avx2 = is_x86_feature_detected!("avx2");
}
#[test]
fn test_varint_encode_bytes_written() {
let mut buffer = Vec::new();
let bytes = encode_varint_scalar(&mut buffer, 42).unwrap();
assert_eq!(bytes, 1);
buffer.clear();
let bytes = encode_varint_scalar(&mut buffer, 300).unwrap();
assert_eq!(bytes, 2);
buffer.clear();
let bytes = encode_varint_scalar(&mut buffer, u32::MAX).unwrap();
assert_eq!(bytes, 5);
}
#[test]
fn test_combined_varint_delta_workflow() {
let values = vec![1000, 1050, 1100, 1150, 1200];
let deltas = delta_encode(&values);
assert_eq!(deltas, vec![1000, 50, 50, 50, 50]);
let mut encoded_data = Vec::new();
for delta in &deltas {
encode_varint_scalar(&mut encoded_data, *delta).unwrap();
}
let mut decoded_deltas = Vec::new();
let mut slice = encoded_data.as_slice();
while !slice.is_empty() {
let value = decode_varint_scalar(slice).unwrap();
decoded_deltas.push(value);
let mut temp_buf = Vec::new();
encode_varint_scalar(&mut temp_buf, value).unwrap();
slice = &slice[temp_buf.len()..];
}
let restored_values = delta_decode(&decoded_deltas);
assert_eq!(restored_values, values);
}
}