vb/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3#[derive(thiserror::Error, Debug)]
4pub enum Error {
5  #[error("Incomplete vbyte sequence")]
6  VbyteNoEnd,
7  #[error("Vbyte overflow")]
8  VbyteOverflow,
9}
10
11pub type Result<T> = std::result::Result<T, Error>;
12
13/// Decode a single variable-byte encoded integer from the input.
14/// Returns the value and the number of bytes consumed.
15///
16/// 从输入中解码一个变长编码的整数。
17/// 返回该值和消耗的字节数。
18pub fn d(input: impl AsRef<[u8]>) -> Result<(u64, usize)> {
19  let mut value: u64 = 0;
20  let mut shift = 0;
21  let mut consumed = 0;
22
23  for byte in input.as_ref() {
24    let data = (byte & 0x7F) as u64;
25    value |= data << shift;
26    consumed += 1;
27
28    if byte & 0x80 == 0 {
29      // MSB 为 0,结束
30      return Ok((value, consumed));
31    }
32
33    shift += 7;
34    if shift >= 64 {
35      return Err(Error::VbyteOverflow);
36    }
37  }
38
39  Err(Error::VbyteNoEnd)
40}
41
42/// Encodes a single `u64` into variable-byte format and appends to the buffer.
43///
44/// 将单个 `u64` 编码为变长格式并追加到缓冲区。
45#[inline]
46fn e(mut value: u64, bytes: &mut Vec<u8>) {
47  loop {
48    let mut byte = (value & 0x7F) as u8; // 取低7位
49    value >>= 7;
50    if value != 0 {
51      byte |= 0x80; // 高位1表示还有后续字节
52    }
53    bytes.push(byte);
54    if value == 0 {
55      break;
56    }
57  }
58}
59
60/// Encodes a list of `u64` integers into variable-byte format.
61///
62/// 将 `u64` 整数列表编码为变长格式。
63pub fn e_li(li: impl AsRef<[u64]>) -> Vec<u8> {
64  let li = li.as_ref();
65  let mut result = Vec::with_capacity(li.len()); // Heuristic: at least 1 byte per int
66  for num in li {
67    e(*num, &mut result);
68  }
69  result
70}
71
72/// Decodes a list of variable-byte encoded integers from the input.
73///
74/// 从输入中解码变长编码的整数列表。
75pub fn d_li(data: impl AsRef<[u8]>) -> Result<Vec<u64>> {
76  let bytes = data.as_ref();
77  let len = bytes.len();
78  let mut result = Vec::with_capacity(len / 2); // Heuristic estimate
79  let mut offset = 0;
80
81  while offset < len {
82    let (num, consumed) = d(&bytes[offset..])?;
83    result.push(num);
84    offset += consumed;
85  }
86
87  Ok(result)
88}
89
90/// Encodes a strictly increasing sequence of `u64` integers using differential encoding (delta encoding) combined with variable-byte encoding.
91/// This reduces the serialized size by storing the differences between consecutive values.
92///
93/// 使用差分编码(增量编码)结合变长编码对严格递增的 `u64` 整数序列进行编码。
94/// 通过存储连续值之间的差值来减少序列化后的大小,具有压缩效果。
95#[cfg(feature = "diff")]
96#[cfg_attr(docsrs, doc(cfg(feature = "diff")))]
97pub fn e_diff(li: impl AsRef<[u64]>) -> Vec<u8> {
98  let li = li.as_ref();
99  if li.is_empty() {
100    return Vec::new();
101  }
102
103  let mut result = Vec::with_capacity(li.len());
104  
105  // First element
106  e(li[0], &mut result);
107
108  // Subsequent elements (deltas)
109  for i in 1..li.len() {
110    e(li[i] - li[i - 1], &mut result);
111  }
112  
113  result
114}
115
116/// Decodes a sequence of integers encoded with `e_diff`.
117/// Reconstructs the original increasing sequence from the differences.
118///
119/// 解码使用 `e_diff` 编码的整数序列。
120/// 从差值中重建原始的递增序列。
121#[cfg(feature = "diff")]
122#[cfg_attr(docsrs, doc(cfg(feature = "diff")))]
123pub fn d_diff(vs: impl AsRef<[u8]>) -> Result<Vec<u64>> {
124  let mut li = d_li(vs)?;
125  if li.len() >= 2 {
126    for i in 1..li.len() {
127      li[i] += li[i - 1];
128    }
129  }
130
131  Ok(li)
132}