use crate::error::{InterpretError, TauqError};
#[inline(always)]
pub fn fast_decode_varint(bytes: &[u8]) -> Result<(u64, usize), TauqError> {
if bytes.is_empty() {
return Err(TauqError::Interpret(InterpretError::new(
"Empty buffer for varint",
)));
}
let b0 = bytes[0];
if b0 < 0x80 {
return Ok((b0 as u64, 1));
}
if bytes.len() < 2 {
return Err(TauqError::Interpret(InterpretError::new(
"Truncated varint",
)));
}
let b1 = bytes[1];
if b1 < 0x80 {
let value = ((b0 & 0x7F) as u64) | ((b1 as u64) << 7);
return Ok((value, 2));
}
decode_varint_slow(bytes)
}
#[cold]
#[inline(never)]
fn decode_varint_slow(bytes: &[u8]) -> Result<(u64, usize), TauqError> {
let mut result: u64 = 0;
let mut shift = 0;
for (pos, &byte) in bytes.iter().enumerate() {
result |= ((byte & 0x7F) as u64) << shift;
if byte & 0x80 == 0 {
return Ok((result, pos + 1));
}
shift += 7;
if shift >= 64 {
return Err(TauqError::Interpret(InterpretError::new("Varint overflow")));
}
}
Err(TauqError::Interpret(InterpretError::new(
"Truncated varint",
)))
}
#[inline(always)]
pub fn fast_decode_signed_varint(bytes: &[u8]) -> Result<(i64, usize), TauqError> {
let (encoded, len) = fast_decode_varint(bytes)?;
let value = ((encoded >> 1) as i64) ^ (-((encoded & 1) as i64));
Ok((value, len))
}
#[inline(always)]
#[allow(dead_code)]
pub unsafe fn fast_decode_varint_unchecked(bytes: &[u8]) -> (u64, usize) {
let b0 = unsafe { *bytes.get_unchecked(0) };
if b0 < 0x80 {
return (b0 as u64, 1);
}
let b1 = unsafe { *bytes.get_unchecked(1) };
if b1 < 0x80 {
let value = ((b0 & 0x7F) as u64) | ((b1 as u64) << 7);
return (value, 2);
}
decode_varint_slow(bytes).unwrap_or((0, 0))
}
#[inline]
pub fn batch_decode_u32(bytes: &[u8], count: usize) -> Result<(Vec<u32>, usize), TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let mut result = Vec::with_capacity(count);
let mut pos = 0;
for _ in 0..count {
if pos >= bytes.len() {
return Err(TauqError::Interpret(InterpretError::new(
"Unexpected end of buffer in batch decode",
)));
}
let (value, len) = fast_decode_varint(&bytes[pos..])?;
result.push(value as u32);
pos += len;
}
Ok((result, pos))
}
#[inline]
pub fn batch_decode_u64(bytes: &[u8], count: usize) -> Result<(Vec<u64>, usize), TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let mut result = Vec::with_capacity(count);
let mut pos = 0;
for _ in 0..count {
if pos >= bytes.len() {
return Err(TauqError::Interpret(InterpretError::new(
"Unexpected end of buffer in batch decode",
)));
}
let (value, len) = fast_decode_varint(&bytes[pos..])?;
result.push(value);
pos += len;
}
Ok((result, pos))
}
#[inline]
pub fn batch_decode_i32(bytes: &[u8], count: usize) -> Result<(Vec<i32>, usize), TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let mut result = Vec::with_capacity(count);
let mut pos = 0;
for _ in 0..count {
if pos >= bytes.len() {
return Err(TauqError::Interpret(InterpretError::new(
"Unexpected end of buffer in batch decode",
)));
}
let (value, len) = fast_decode_signed_varint(&bytes[pos..])?;
result.push(value as i32);
pos += len;
}
Ok((result, pos))
}
#[inline]
pub fn batch_decode_i64(bytes: &[u8], count: usize) -> Result<(Vec<i64>, usize), TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let mut result = Vec::with_capacity(count);
let mut pos = 0;
for _ in 0..count {
if pos >= bytes.len() {
return Err(TauqError::Interpret(InterpretError::new(
"Unexpected end of buffer in batch decode",
)));
}
let (value, len) = fast_decode_signed_varint(&bytes[pos..])?;
result.push(value);
pos += len;
}
Ok((result, pos))
}
#[inline]
pub fn batch_decode_f32(bytes: &[u8], count: usize) -> Result<Vec<f32>, TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let required = count * 4;
if bytes.len() < required {
return Err(TauqError::Interpret(InterpretError::new(
"Buffer too small for f32 batch decode",
)));
}
let mut result = Vec::with_capacity(count);
for i in 0..count {
let offset = i * 4;
let value = f32::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
]);
result.push(value);
}
Ok(result)
}
#[inline]
pub fn batch_decode_f64(bytes: &[u8], count: usize) -> Result<Vec<f64>, TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let required = count * 8;
if bytes.len() < required {
return Err(TauqError::Interpret(InterpretError::new(
"Buffer too small for f64 batch decode",
)));
}
let mut result = Vec::with_capacity(count);
for i in 0..count {
let offset = i * 8;
let value = f64::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
bytes[offset + 4],
bytes[offset + 5],
bytes[offset + 6],
bytes[offset + 7],
]);
result.push(value);
}
Ok(result)
}
#[inline]
pub fn batch_decode_bool(bytes: &[u8], count: usize) -> Result<Vec<bool>, TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
if bytes.len() < count {
return Err(TauqError::Interpret(InterpretError::new(
"Buffer too small for bool batch decode",
)));
}
Ok(bytes[..count].iter().map(|&b| b != 0).collect())
}
pub struct FastBorrowedDictionary<'a> {
strings: Vec<&'a str>,
}
const MAX_DICT_ENTRIES: usize = 1_000_000;
const MAX_BATCH_COUNT: usize = 10_000_000;
impl<'a> FastBorrowedDictionary<'a> {
pub fn decode(bytes: &'a [u8]) -> Result<(Self, usize), TauqError> {
let (count, mut pos) = fast_decode_varint(bytes)?;
let count = count as usize;
if count > MAX_DICT_ENTRIES {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Dictionary count {} exceeds maximum {}",
count, MAX_DICT_ENTRIES
))));
}
let mut strings = Vec::with_capacity(count);
for _ in 0..count {
let (len, len_bytes) = fast_decode_varint(&bytes[pos..])?;
pos += len_bytes;
let len = len as usize;
if pos + len > bytes.len() {
return Err(TauqError::Interpret(InterpretError::new(
"String extends past buffer",
)));
}
let s = std::str::from_utf8(&bytes[pos..pos + len]).map_err(|_| {
TauqError::Interpret(InterpretError::new("Invalid UTF-8 in dictionary"))
})?;
strings.push(s);
pos += len;
}
Ok((Self { strings }, pos))
}
#[inline(always)]
pub fn get(&self, idx: u32) -> Option<&'a str> {
self.strings.get(idx as usize).copied()
}
#[inline(always)]
pub unsafe fn get_unchecked(&self, idx: u32) -> &'a str {
unsafe { self.strings.get_unchecked(idx as usize) }
}
#[inline(always)]
pub fn len(&self) -> usize {
self.strings.len()
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
}
#[inline]
pub fn batch_decode_strings<'a>(
bytes: &[u8],
count: usize,
dict: &FastBorrowedDictionary<'a>,
) -> Result<(Vec<&'a str>, usize), TauqError> {
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Batch count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let mut result = Vec::with_capacity(count);
let mut pos = 0;
for _ in 0..count {
let (idx, len) = fast_decode_varint(&bytes[pos..])?;
pos += len;
let s = dict.get(idx as u32).ok_or_else(|| {
TauqError::Interpret(InterpretError::new(format!(
"Invalid string index: {}",
idx
)))
})?;
result.push(s);
}
Ok((result, pos))
}
pub trait FastDecode: Sized {
fn fast_decode_from(
bytes: &[u8],
dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError>;
fn fast_decode_slice(bytes: &[u8]) -> Result<Vec<Self>, TauqError> {
use super::{TBF_MAGIC, TBF_VERSION};
if bytes.len() < 8 {
return Err(TauqError::Interpret(InterpretError::new(
"Buffer too small for TBF header",
)));
}
if bytes[0..4] != TBF_MAGIC {
return Err(TauqError::Interpret(InterpretError::new(
"Invalid TBF magic",
)));
}
if bytes[4] > TBF_VERSION {
return Err(TauqError::Interpret(InterpretError::new(
"Unsupported TBF version",
)));
}
let mut pos = 8;
let (dict, dict_len) = FastBorrowedDictionary::decode(&bytes[pos..])?;
pos += dict_len;
let (count, len) = fast_decode_varint(&bytes[pos..])?;
pos += len;
let count = count as usize;
if count > MAX_BATCH_COUNT {
return Err(TauqError::Interpret(InterpretError::new(format!(
"Item count {} exceeds maximum {}",
count, MAX_BATCH_COUNT
))));
}
let mut result = Vec::with_capacity(count);
for _ in 0..count {
let (item, len) = Self::fast_decode_from(&bytes[pos..], &dict)?;
result.push(item);
pos += len;
}
Ok(result)
}
}
impl FastDecode for u32 {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
_dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
let (value, len) = fast_decode_varint(bytes)?;
Ok((value as u32, len))
}
}
impl FastDecode for u64 {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
_dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
fast_decode_varint(bytes)
}
}
impl FastDecode for i32 {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
_dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
let (value, len) = fast_decode_signed_varint(bytes)?;
Ok((value as i32, len))
}
}
impl FastDecode for i64 {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
_dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
fast_decode_signed_varint(bytes)
}
}
impl FastDecode for f32 {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
_dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
if bytes.len() < 4 {
return Err(TauqError::Interpret(InterpretError::new(
"Buffer too small for f32",
)));
}
let value = f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
Ok((value, 4))
}
}
impl FastDecode for f64 {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
_dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
if bytes.len() < 8 {
return Err(TauqError::Interpret(InterpretError::new(
"Buffer too small for f64",
)));
}
let value = f64::from_le_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]);
Ok((value, 8))
}
}
impl FastDecode for bool {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
_dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
if bytes.is_empty() {
return Err(TauqError::Interpret(InterpretError::new(
"Buffer too small for bool",
)));
}
Ok((bytes[0] != 0, 1))
}
}
impl FastDecode for String {
#[inline(always)]
fn fast_decode_from(
bytes: &[u8],
dict: &FastBorrowedDictionary,
) -> Result<(Self, usize), TauqError> {
let (idx, len) = fast_decode_varint(bytes)?;
let s = dict.get(idx as u32).ok_or_else(|| {
TauqError::Interpret(InterpretError::new(format!(
"Invalid string index: {}",
idx
)))
})?;
Ok((s.to_string(), len))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fast_decode_varint() {
assert_eq!(fast_decode_varint(&[0]).unwrap(), (0, 1));
assert_eq!(fast_decode_varint(&[1]).unwrap(), (1, 1));
assert_eq!(fast_decode_varint(&[127]).unwrap(), (127, 1));
assert_eq!(fast_decode_varint(&[0x80, 0x01]).unwrap(), (128, 2));
assert_eq!(fast_decode_varint(&[0xFF, 0x7F]).unwrap(), (16383, 2));
assert_eq!(fast_decode_varint(&[0x80, 0x80, 0x01]).unwrap(), (16384, 3));
}
#[test]
fn test_fast_decode_signed_varint() {
assert_eq!(fast_decode_signed_varint(&[0]).unwrap(), (0, 1));
assert_eq!(fast_decode_signed_varint(&[2]).unwrap(), (1, 1));
assert_eq!(fast_decode_signed_varint(&[4]).unwrap(), (2, 1));
assert_eq!(fast_decode_signed_varint(&[1]).unwrap(), (-1, 1));
assert_eq!(fast_decode_signed_varint(&[3]).unwrap(), (-2, 1));
}
#[test]
fn test_batch_decode_u32() {
let data = vec![1, 0x80, 0x01, 0x80, 0x80, 0x01];
let (values, consumed) = batch_decode_u32(&data, 3).unwrap();
assert_eq!(values, vec![1, 128, 16384]);
assert_eq!(consumed, 6);
}
#[test]
fn test_batch_decode_f32() {
let pi = std::f32::consts::PI;
let e = std::f32::consts::E;
let mut data = Vec::new();
data.extend_from_slice(&pi.to_le_bytes());
data.extend_from_slice(&e.to_le_bytes());
let values = batch_decode_f32(&data, 2).unwrap();
assert_eq!(values[0], pi);
assert_eq!(values[1], e);
}
#[test]
fn test_fast_borrowed_dictionary() {
let mut data = Vec::new();
data.push(2); data.push(5); data.extend_from_slice(b"hello");
data.push(5); data.extend_from_slice(b"world");
let (dict, consumed) = FastBorrowedDictionary::decode(&data).unwrap();
assert_eq!(dict.len(), 2);
assert_eq!(dict.get(0), Some("hello"));
assert_eq!(dict.get(1), Some("world"));
assert_eq!(dict.get(2), None);
assert_eq!(consumed, 13);
}
}