crous_core/varint.rs
1//! Varint encoding: unsigned LEB128 and signed ZigZag + LEB128.
2//!
3//! ## Why LEB128?
4//! LEB128 is the standard variable-length integer encoding used by protobuf,
5//! DWARF, WebAssembly, and many other formats. It encodes small integers in
6//! fewer bytes (1 byte for 0–127) while supporting the full u64 range in at
7//! most 10 bytes. This makes it ideal for field IDs, lengths, and small counts
8//! which dominate typical structured data.
9//!
10//! ## Why ZigZag for signed integers?
11//! ZigZag maps signed integers to unsigned integers so that small-magnitude
12//! numbers (both positive and negative) produce small varint encodings.
13//! Without ZigZag, -1 would encode as a 10-byte varint (all bits set).
14//! With ZigZag: -1 → 1, 1 → 2, -2 → 3, 2 → 4, etc.
15
16use crate::error::{CrousError, Result};
17
18// ---------------------------------------------------------------------------
19// ZigZag encoding/decoding
20// ---------------------------------------------------------------------------
21
22/// Encode a signed 64-bit integer using ZigZag encoding.
23///
24/// Maps signed integers to unsigned: 0→0, -1→1, 1→2, -2→3, 2→4, ...
25///
26/// ```
27/// use crous_core::varint::zigzag_encode;
28/// assert_eq!(zigzag_encode(0), 0);
29/// assert_eq!(zigzag_encode(-1), 1);
30/// assert_eq!(zigzag_encode(1), 2);
31/// assert_eq!(zigzag_encode(-2), 3);
32/// assert_eq!(zigzag_encode(i64::MIN), u64::MAX);
33/// ```
34#[inline]
35pub fn zigzag_encode(n: i64) -> u64 {
36 ((n << 1) ^ (n >> 63)) as u64
37}
38
39/// Decode a ZigZag-encoded unsigned integer back to signed.
40///
41/// ```
42/// use crous_core::varint::zigzag_decode;
43/// assert_eq!(zigzag_decode(0), 0);
44/// assert_eq!(zigzag_decode(1), -1);
45/// assert_eq!(zigzag_decode(2), 1);
46/// assert_eq!(zigzag_decode(3), -2);
47/// ```
48#[inline]
49pub fn zigzag_decode(n: u64) -> i64 {
50 ((n >> 1) as i64) ^ (-((n & 1) as i64))
51}
52
53// ---------------------------------------------------------------------------
54// LEB128 unsigned varint
55// ---------------------------------------------------------------------------
56
57/// Encode an unsigned 64-bit integer as LEB128 into `buf`.
58/// Returns the number of bytes written (1–10).
59///
60/// ```
61/// use crous_core::varint::encode_varint;
62/// let mut buf = [0u8; 10];
63/// assert_eq!(encode_varint(0, &mut buf), 1);
64/// assert_eq!(buf[0], 0x00);
65///
66/// assert_eq!(encode_varint(127, &mut buf), 1);
67/// assert_eq!(buf[0], 0x7f);
68///
69/// assert_eq!(encode_varint(128, &mut buf), 2);
70/// assert_eq!(&buf[..2], &[0x80, 0x01]);
71///
72/// assert_eq!(encode_varint(300, &mut buf), 2);
73/// assert_eq!(&buf[..2], &[0xac, 0x02]);
74/// ```
75#[inline]
76pub fn encode_varint(mut value: u64, buf: &mut [u8; 10]) -> usize {
77 let mut i = 0;
78 loop {
79 let mut byte = (value & 0x7F) as u8;
80 value >>= 7;
81 if value != 0 {
82 byte |= 0x80;
83 }
84 buf[i] = byte;
85 i += 1;
86 if value == 0 {
87 break;
88 }
89 }
90 i
91}
92
93/// Encode a varint and append it to a `Vec<u8>`.
94#[inline]
95pub fn encode_varint_vec(value: u64, out: &mut Vec<u8>) {
96 let mut buf = [0u8; 10];
97 let n = encode_varint(value, &mut buf);
98 out.extend_from_slice(&buf[..n]);
99}
100
101/// Decode an unsigned LEB128 varint from `data` starting at `offset`.
102/// Returns `(value, bytes_consumed)`.
103///
104/// # Errors
105/// - `VarintOverflow` if the varint exceeds 10 bytes (64-bit limit).
106/// - `UnexpectedEof` if the data ends before the varint is complete.
107///
108/// ```
109/// use crous_core::varint::decode_varint;
110/// let data = [0xac, 0x02];
111/// let (val, len) = decode_varint(&data, 0).unwrap();
112/// assert_eq!(val, 300);
113/// assert_eq!(len, 2);
114/// ```
115#[inline]
116pub fn decode_varint(data: &[u8], offset: usize) -> Result<(u64, usize)> {
117 let mut result: u64 = 0;
118 let mut shift: u32 = 0;
119 let mut i = 0usize;
120
121 loop {
122 if offset + i >= data.len() {
123 return Err(CrousError::UnexpectedEof(offset + i));
124 }
125 let byte = data[offset + i];
126 i += 1;
127
128 // Check for overflow: LEB128 for u64 is at most 10 bytes,
129 // and the 10th byte must have value ≤ 1 (bit 64).
130 if i > 10 {
131 return Err(CrousError::VarintOverflow);
132 }
133 if i == 10 && byte > 0x01 {
134 return Err(CrousError::VarintOverflow);
135 }
136
137 result |= ((byte & 0x7F) as u64) << shift;
138 if byte & 0x80 == 0 {
139 return Ok((result, i));
140 }
141 shift += 7;
142 }
143}
144
145/// Convenience: encode a signed i64 as ZigZag + LEB128 into a Vec.
146#[inline]
147pub fn encode_signed_varint_vec(value: i64, out: &mut Vec<u8>) {
148 encode_varint_vec(zigzag_encode(value), out);
149}
150
151/// Convenience: decode a ZigZag + LEB128 signed i64 from `data` at `offset`.
152#[inline]
153pub fn decode_signed_varint(data: &[u8], offset: usize) -> Result<(i64, usize)> {
154 let (raw, len) = decode_varint(data, offset)?;
155 Ok((zigzag_decode(raw), len))
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161
162 #[test]
163 fn zigzag_roundtrip() {
164 for &v in &[0i64, 1, -1, 2, -2, 127, -128, i64::MAX, i64::MIN] {
165 assert_eq!(
166 zigzag_decode(zigzag_encode(v)),
167 v,
168 "ZigZag roundtrip failed for {v}"
169 );
170 }
171 }
172
173 #[test]
174 fn varint_single_byte() {
175 let mut buf = [0u8; 10];
176 for v in 0..=127u64 {
177 let n = encode_varint(v, &mut buf);
178 assert_eq!(
179 n, 1,
180 "values 0–127 should encode in 1 byte, got {n} for {v}"
181 );
182 assert_eq!(buf[0], v as u8);
183 let (decoded, consumed) = decode_varint(&buf[..n], 0).unwrap();
184 assert_eq!(decoded, v);
185 assert_eq!(consumed, 1);
186 }
187 }
188
189 #[test]
190 fn varint_multi_byte() {
191 let test_cases: &[(u64, &[u8])] = &[
192 (128, &[0x80, 0x01]),
193 (300, &[0xac, 0x02]),
194 (16384, &[0x80, 0x80, 0x01]),
195 (
196 u64::MAX,
197 &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01],
198 ),
199 ];
200 for &(value, expected) in test_cases {
201 let mut buf = [0u8; 10];
202 let n = encode_varint(value, &mut buf);
203 assert_eq!(&buf[..n], expected, "encoding mismatch for {value}");
204 let (decoded, consumed) = decode_varint(expected, 0).unwrap();
205 assert_eq!(decoded, value, "decode mismatch for {value}");
206 assert_eq!(consumed, expected.len());
207 }
208 }
209
210 #[test]
211 fn varint_overflow_detection() {
212 // 11 continuation bytes — definitely too long.
213 let bad = [0x80u8; 11];
214 assert!(decode_varint(&bad, 0).is_err());
215 }
216
217 #[test]
218 fn varint_unexpected_eof() {
219 // Continuation bit set but no more data.
220 let truncated = [0x80u8];
221 assert!(decode_varint(&truncated, 0).is_err());
222 }
223
224 #[test]
225 fn signed_varint_roundtrip() {
226 for &v in &[0i64, 1, -1, 42, -42, 1000, -1000, i64::MAX, i64::MIN] {
227 let mut buf = Vec::new();
228 encode_signed_varint_vec(v, &mut buf);
229 let (decoded, _) = decode_signed_varint(&buf, 0).unwrap();
230 assert_eq!(decoded, v, "signed varint roundtrip failed for {v}");
231 }
232 }
233}