Skip to main content

crous_core/
varint.rs

1//! Varint encoding: unsigned LEB128 and signed ZigZag + LEB128.
2//!
3//! ## Why LEB128?
4//! LEB128 is the standard variable-length integer encoding used by protobuf,
5//! DWARF, WebAssembly, and many other formats. It encodes small integers in
6//! fewer bytes (1 byte for 0–127) while supporting the full u64 range in at
7//! most 10 bytes. This makes it ideal for field IDs, lengths, and small counts
8//! which dominate typical structured data.
9//!
10//! ## Why ZigZag for signed integers?
11//! ZigZag maps signed integers to unsigned integers so that small-magnitude
12//! numbers (both positive and negative) produce small varint encodings.
13//! Without ZigZag, -1 would encode as a 10-byte varint (all bits set).
14//! With ZigZag: -1 → 1, 1 → 2, -2 → 3, 2 → 4, etc.
15
16use crate::error::{CrousError, Result};
17
18// ---------------------------------------------------------------------------
19// ZigZag encoding/decoding
20// ---------------------------------------------------------------------------
21
22/// Encode a signed 64-bit integer using ZigZag encoding.
23///
24/// Maps signed integers to unsigned: 0→0, -1→1, 1→2, -2→3, 2→4, ...
25///
26/// ```
27/// use crous_core::varint::zigzag_encode;
28/// assert_eq!(zigzag_encode(0), 0);
29/// assert_eq!(zigzag_encode(-1), 1);
30/// assert_eq!(zigzag_encode(1), 2);
31/// assert_eq!(zigzag_encode(-2), 3);
32/// assert_eq!(zigzag_encode(i64::MIN), u64::MAX);
33/// ```
34#[inline]
35pub fn zigzag_encode(n: i64) -> u64 {
36    ((n << 1) ^ (n >> 63)) as u64
37}
38
39/// Decode a ZigZag-encoded unsigned integer back to signed.
40///
41/// ```
42/// use crous_core::varint::zigzag_decode;
43/// assert_eq!(zigzag_decode(0), 0);
44/// assert_eq!(zigzag_decode(1), -1);
45/// assert_eq!(zigzag_decode(2), 1);
46/// assert_eq!(zigzag_decode(3), -2);
47/// ```
48#[inline]
49pub fn zigzag_decode(n: u64) -> i64 {
50    ((n >> 1) as i64) ^ (-((n & 1) as i64))
51}
52
53// ---------------------------------------------------------------------------
54// LEB128 unsigned varint
55// ---------------------------------------------------------------------------
56
57/// Encode an unsigned 64-bit integer as LEB128 into `buf`.
58/// Returns the number of bytes written (1–10).
59///
60/// ```
61/// use crous_core::varint::encode_varint;
62/// let mut buf = [0u8; 10];
63/// assert_eq!(encode_varint(0, &mut buf), 1);
64/// assert_eq!(buf[0], 0x00);
65///
66/// assert_eq!(encode_varint(127, &mut buf), 1);
67/// assert_eq!(buf[0], 0x7f);
68///
69/// assert_eq!(encode_varint(128, &mut buf), 2);
70/// assert_eq!(&buf[..2], &[0x80, 0x01]);
71///
72/// assert_eq!(encode_varint(300, &mut buf), 2);
73/// assert_eq!(&buf[..2], &[0xac, 0x02]);
74/// ```
75#[inline]
76pub fn encode_varint(mut value: u64, buf: &mut [u8; 10]) -> usize {
77    let mut i = 0;
78    loop {
79        let mut byte = (value & 0x7F) as u8;
80        value >>= 7;
81        if value != 0 {
82            byte |= 0x80;
83        }
84        buf[i] = byte;
85        i += 1;
86        if value == 0 {
87            break;
88        }
89    }
90    i
91}
92
93/// Encode a varint and append it to a `Vec<u8>`.
94#[inline]
95pub fn encode_varint_vec(value: u64, out: &mut Vec<u8>) {
96    let mut buf = [0u8; 10];
97    let n = encode_varint(value, &mut buf);
98    out.extend_from_slice(&buf[..n]);
99}
100
101/// Decode an unsigned LEB128 varint from `data` starting at `offset`.
102/// Returns `(value, bytes_consumed)`.
103///
104/// # Errors
105/// - `VarintOverflow` if the varint exceeds 10 bytes (64-bit limit).
106/// - `UnexpectedEof` if the data ends before the varint is complete.
107///
108/// ```
109/// use crous_core::varint::decode_varint;
110/// let data = [0xac, 0x02];
111/// let (val, len) = decode_varint(&data, 0).unwrap();
112/// assert_eq!(val, 300);
113/// assert_eq!(len, 2);
114/// ```
115#[inline]
116pub fn decode_varint(data: &[u8], offset: usize) -> Result<(u64, usize)> {
117    let mut result: u64 = 0;
118    let mut shift: u32 = 0;
119    let mut i = 0usize;
120
121    loop {
122        if offset + i >= data.len() {
123            return Err(CrousError::UnexpectedEof(offset + i));
124        }
125        let byte = data[offset + i];
126        i += 1;
127
128        // Check for overflow: LEB128 for u64 is at most 10 bytes,
129        // and the 10th byte must have value ≤ 1 (bit 64).
130        if i > 10 {
131            return Err(CrousError::VarintOverflow);
132        }
133        if i == 10 && byte > 0x01 {
134            return Err(CrousError::VarintOverflow);
135        }
136
137        result |= ((byte & 0x7F) as u64) << shift;
138        if byte & 0x80 == 0 {
139            return Ok((result, i));
140        }
141        shift += 7;
142    }
143}
144
145/// Convenience: encode a signed i64 as ZigZag + LEB128 into a Vec.
146#[inline]
147pub fn encode_signed_varint_vec(value: i64, out: &mut Vec<u8>) {
148    encode_varint_vec(zigzag_encode(value), out);
149}
150
151/// Convenience: decode a ZigZag + LEB128 signed i64 from `data` at `offset`.
152#[inline]
153pub fn decode_signed_varint(data: &[u8], offset: usize) -> Result<(i64, usize)> {
154    let (raw, len) = decode_varint(data, offset)?;
155    Ok((zigzag_decode(raw), len))
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161
162    #[test]
163    fn zigzag_roundtrip() {
164        for &v in &[0i64, 1, -1, 2, -2, 127, -128, i64::MAX, i64::MIN] {
165            assert_eq!(
166                zigzag_decode(zigzag_encode(v)),
167                v,
168                "ZigZag roundtrip failed for {v}"
169            );
170        }
171    }
172
173    #[test]
174    fn varint_single_byte() {
175        let mut buf = [0u8; 10];
176        for v in 0..=127u64 {
177            let n = encode_varint(v, &mut buf);
178            assert_eq!(
179                n, 1,
180                "values 0–127 should encode in 1 byte, got {n} for {v}"
181            );
182            assert_eq!(buf[0], v as u8);
183            let (decoded, consumed) = decode_varint(&buf[..n], 0).unwrap();
184            assert_eq!(decoded, v);
185            assert_eq!(consumed, 1);
186        }
187    }
188
189    #[test]
190    fn varint_multi_byte() {
191        let test_cases: &[(u64, &[u8])] = &[
192            (128, &[0x80, 0x01]),
193            (300, &[0xac, 0x02]),
194            (16384, &[0x80, 0x80, 0x01]),
195            (
196                u64::MAX,
197                &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01],
198            ),
199        ];
200        for &(value, expected) in test_cases {
201            let mut buf = [0u8; 10];
202            let n = encode_varint(value, &mut buf);
203            assert_eq!(&buf[..n], expected, "encoding mismatch for {value}");
204            let (decoded, consumed) = decode_varint(expected, 0).unwrap();
205            assert_eq!(decoded, value, "decode mismatch for {value}");
206            assert_eq!(consumed, expected.len());
207        }
208    }
209
210    #[test]
211    fn varint_overflow_detection() {
212        // 11 continuation bytes — definitely too long.
213        let bad = [0x80u8; 11];
214        assert!(decode_varint(&bad, 0).is_err());
215    }
216
217    #[test]
218    fn varint_unexpected_eof() {
219        // Continuation bit set but no more data.
220        let truncated = [0x80u8];
221        assert!(decode_varint(&truncated, 0).is_err());
222    }
223
224    #[test]
225    fn signed_varint_roundtrip() {
226        for &v in &[0i64, 1, -1, 42, -42, 1000, -1000, i64::MAX, i64::MIN] {
227            let mut buf = Vec::new();
228            encode_signed_varint_vec(v, &mut buf);
229            let (decoded, _) = decode_signed_varint(&buf, 0).unwrap();
230            assert_eq!(decoded, v, "signed varint roundtrip failed for {v}");
231        }
232    }
233}