#![cfg_attr(docsrs, feature(doc_cfg))]
use thiserror::Error;
#[derive(Error, Debug)]
pub enum Error {
#[error("Incomplete vbyte sequence")]
VbyteNoEnd,
#[error("Vbyte overflow")]
VbyteOverflow,
}
pub type Result<T> = std::result::Result<T, Error>;
const MAX_VARINT_BYTES: usize = 10;
const CONTINUATION_BIT: u8 = 0x80;
const DATA_MASK: u8 = 0x7F;
const BITS_PER_BYTE: u32 = 7;
#[inline(always)]
pub fn d(bytes: &[u8]) -> Result<(u64, usize)> {
let mut offset = 0;
let value = d_offset(bytes, &mut offset)?;
Ok((value, offset))
}
#[inline(always)]
pub fn d_offset(buf: &[u8], offset: &mut usize) -> Result<u64> {
let pos = *offset;
let len = buf.len();
if pos >= len {
return Err(Error::VbyteNoEnd);
}
let first_byte = unsafe { *buf.get_unchecked(pos) };
if first_byte < CONTINUATION_BIT {
*offset = pos + 1;
return Ok(u64::from(first_byte));
}
let remaining = len - pos;
if remaining >= MAX_VARINT_BYTES {
unsafe { decode_multi_byte_unchecked(buf, offset, pos) }
} else {
decode_multi_byte_checked(buf, offset, pos, len)
}
}
#[inline(always)]
unsafe fn decode_multi_byte_unchecked(buf: &[u8], offset: &mut usize, pos: usize) -> Result<u64> {
let ptr = unsafe { buf.as_ptr().add(pos) };
macro_rules! decode_byte {
($i:expr, $shift:expr, $value:expr) => {{
let byte = unsafe { *ptr.add($i) };
if byte < CONTINUATION_BIT {
*offset = pos + $i + 1;
return Ok($value | (u64::from(byte) << $shift));
}
$value | (u64::from(byte & DATA_MASK) << $shift)
}};
}
let v = decode_byte!(0, 0, 0u64);
let v = decode_byte!(1, 7, v);
let v = decode_byte!(2, 14, v);
let v = decode_byte!(3, 21, v);
let v = decode_byte!(4, 28, v);
let v = decode_byte!(5, 35, v);
let v = decode_byte!(6, 42, v);
let v = decode_byte!(7, 49, v);
let v = decode_byte!(8, 56, v);
let byte = unsafe { *ptr.add(9) };
if byte > 1 {
return Err(Error::VbyteOverflow);
}
*offset = pos + MAX_VARINT_BYTES;
Ok(v | (u64::from(byte) << 63))
}
fn decode_multi_byte_checked(
buf: &[u8],
offset: &mut usize,
mut pos: usize,
len: usize,
) -> Result<u64> {
let mut value = 0u64;
let mut shift = 0u32;
while pos < len {
let byte = unsafe { *buf.get_unchecked(pos) };
pos += 1;
if byte < CONTINUATION_BIT {
value |= u64::from(byte) << shift;
*offset = pos;
return Ok(value);
}
value |= u64::from(byte & DATA_MASK) << shift;
shift += BITS_PER_BYTE;
if shift >= 63 {
if pos >= len {
return Err(Error::VbyteNoEnd);
}
let byte = unsafe { *buf.get_unchecked(pos) };
if byte > 1 {
return Err(Error::VbyteOverflow);
}
value |= u64::from(byte) << 63;
*offset = pos + 1;
return Ok(value);
}
}
Err(Error::VbyteNoEnd)
}
#[inline(always)]
pub fn e(value: u64, buf: &mut Vec<u8>) {
buf.reserve(MAX_VARINT_BYTES);
unsafe {
let len = buf.len();
let ptr = buf.as_mut_ptr().add(len);
let bytes_written = if value < u64::from(CONTINUATION_BIT) {
*ptr = value as u8;
1
} else {
encode_multi_byte(ptr, value)
};
buf.set_len(len + bytes_written);
}
}
#[inline(always)]
unsafe fn encode_multi_byte(ptr: *mut u8, value: u64) -> usize {
let bits = 64 - value.leading_zeros() as usize;
let num_bytes = bits.div_ceil(BITS_PER_BYTE as usize);
match num_bytes {
2 => {
unsafe {
*ptr = (value as u8) | CONTINUATION_BIT;
*ptr.add(1) = (value >> 7) as u8;
}
}
3 => {
unsafe {
*ptr = (value as u8) | CONTINUATION_BIT;
*ptr.add(1) = ((value >> 7) as u8) | CONTINUATION_BIT;
*ptr.add(2) = (value >> 14) as u8;
}
}
4 => {
unsafe {
*ptr = (value as u8) | CONTINUATION_BIT;
*ptr.add(1) = ((value >> 7) as u8) | CONTINUATION_BIT;
*ptr.add(2) = ((value >> 14) as u8) | CONTINUATION_BIT;
*ptr.add(3) = (value >> 21) as u8;
}
}
5 => {
unsafe {
*ptr = (value as u8) | CONTINUATION_BIT;
*ptr.add(1) = ((value >> 7) as u8) | CONTINUATION_BIT;
*ptr.add(2) = ((value >> 14) as u8) | CONTINUATION_BIT;
*ptr.add(3) = ((value >> 21) as u8) | CONTINUATION_BIT;
*ptr.add(4) = (value >> 28) as u8;
}
}
_ => {
let mut v = value;
for i in 0..num_bytes - 1 {
unsafe { *ptr.add(i) = (v as u8) | CONTINUATION_BIT };
v >>= 7;
}
unsafe { *ptr.add(num_bytes - 1) = v as u8 };
}
}
num_bytes
}
#[inline]
pub fn e_li(li: impl IntoIterator<Item = u64>) -> Vec<u8> {
let iter = li.into_iter();
let (lower, _) = iter.size_hint();
let mut result = Vec::with_capacity(lower.saturating_mul(2));
for num in iter {
e(num, &mut result);
}
result
}
#[inline]
pub fn d_li(bytes: &[u8]) -> Result<Vec<u64>> {
let len = bytes.len();
let mut result = Vec::with_capacity(len / 2);
let mut offset = 0;
while offset < len {
result.push(d_offset(bytes, &mut offset)?);
}
Ok(result)
}
#[cfg(feature = "diff")]
#[cfg_attr(docsrs, doc(cfg(feature = "diff")))]
pub fn e_diff(li: impl AsRef<[u64]>) -> Vec<u8> {
let li = li.as_ref();
if li.is_empty() {
return Vec::new();
}
let mut result = Vec::with_capacity(li.len());
let mut prev = 0;
for &curr in li {
debug_assert!(curr >= prev, "e_diff requires strictly increasing sequence");
e(curr.wrapping_sub(prev), &mut result);
prev = curr;
}
result
}
#[cfg(feature = "diff")]
#[cfg_attr(docsrs, doc(cfg(feature = "diff")))]
pub fn d_diff(bytes: &[u8]) -> Result<Vec<u64>> {
let len = bytes.len();
let mut result = Vec::with_capacity(len);
let mut offset = 0;
let mut prev = 0u64;
while offset < len {
let delta = d_offset(bytes, &mut offset)?;
let val = prev.checked_add(delta).ok_or(Error::VbyteOverflow)?;
result.push(val);
prev = val;
}
Ok(result)
}