lnmp_codec/binary/
varint.rs

1//! Variable-length integer encoding using LEB128 format.
2//!
3//! This module provides encoding and decoding of signed 64-bit integers
4//! using the LEB128 (Little Endian Base 128) format. Each byte uses 7 bits
5//! for data and 1 bit as a continuation flag.
6
7use super::error::BinaryError;
8
9/// Encodes a signed 64-bit integer as LEB128 VarInt.
10///
11/// The encoding uses 7 bits per byte for data, with the most significant bit
12/// as a continuation flag (1 = more bytes follow, 0 = last byte).
13///
14/// # Performance
15///
16/// This function includes fast paths for common integer ranges:
17/// - Single-byte encoding for values in range [-64, 63]
18/// - Two-byte encoding for values in range [-8192, 8191]
19///
20/// # Examples
21///
22/// ```
23/// # use lnmp_codec::binary::varint;
24/// assert_eq!(varint::encode(0), vec![0x00]);
25/// assert_eq!(varint::encode(127), vec![0xFF, 0x00]);
26/// assert_eq!(varint::encode(128), vec![0x80, 0x01]);
27/// assert_eq!(varint::encode(14532), vec![0xC4, 0xF1, 0x00]);
28/// ```
29#[inline]
30pub fn encode(value: i64) -> Vec<u8> {
31    // Fast path for single-byte encoding: [-64, 63]
32    if (-64..=63).contains(&value) {
33        return vec![(value & 0x7F) as u8];
34    }
35
36    // Fast path for two-byte encoding: [-8192, 8191]
37    if (-8192..=8191).contains(&value) {
38        let byte1 = ((value & 0x7F) | 0x80) as u8;
39        let byte2 = ((value >> 7) & 0x7F) as u8;
40        return vec![byte1, byte2];
41    }
42
43    // General case for larger values
44    encode_general(value)
45}
46
47/// General-purpose VarInt encoding for values outside fast-path ranges
48#[inline(never)]
49fn encode_general(value: i64) -> Vec<u8> {
50    let mut result = Vec::with_capacity(4); // Most values fit in 4 bytes or less
51    let mut val = value;
52
53    loop {
54        // Take the lower 7 bits
55        let byte = (val & 0x7F) as u8;
56        // Arithmetic right shift to preserve sign
57        val >>= 7;
58
59        // Check if we're done:
60        // - For positive/zero: val == 0 and sign bit (bit 6) is clear
61        // - For negative: val == -1 and sign bit (bit 6) is set
62        let done = (val == 0 && (byte & 0x40) == 0) || (val == -1 && (byte & 0x40) != 0);
63
64        if done {
65            // Last byte - no continuation bit
66            result.push(byte);
67            break;
68        } else {
69            // More bytes needed - set continuation bit
70            result.push(byte | 0x80);
71        }
72    }
73
74    result
75}
76
77/// Decodes a LEB128 VarInt from a byte slice.
78///
79/// Returns a tuple of (decoded_value, bytes_consumed) on success.
80///
81/// # Performance
82///
83/// This function includes fast paths for common cases:
84/// - Single-byte values (no continuation bit)
85/// - Two-byte values
86///
87/// # Errors
88///
89/// Returns `BinaryError::InvalidVarInt` if:
90/// - The input is empty
91/// - The VarInt encoding is invalid (too many bytes)
92/// - The continuation bit pattern is malformed
93///
94/// # Examples
95///
96/// ```
97/// # use lnmp_codec::binary::varint;
98/// assert_eq!(varint::decode(&[0x00]).unwrap(), (0, 1));
99/// assert_eq!(varint::decode(&[0xFF, 0x00]).unwrap(), (127, 2));
100/// assert_eq!(varint::decode(&[0x80, 0x01]).unwrap(), (128, 2));
101/// assert_eq!(varint::decode(&[0xC4, 0xF1, 0x00]).unwrap(), (14532, 3));
102/// ```
103#[inline]
104pub fn decode(bytes: &[u8]) -> Result<(i64, usize), BinaryError> {
105    if bytes.is_empty() {
106        return Err(BinaryError::InvalidVarInt {
107            reason: "empty input".to_string(),
108        });
109    }
110
111    let first_byte = bytes[0];
112
113    // Fast path: single-byte value (no continuation bit)
114    if (first_byte & 0x80) == 0 {
115        let mut value = (first_byte & 0x7F) as i64;
116        // Sign extend if necessary
117        if (first_byte & 0x40) != 0 {
118            value |= (-1i64) << 7;
119        }
120        return Ok((value, 1));
121    }
122
123    // Fast path: two-byte value
124    if bytes.len() >= 2 {
125        let second_byte = bytes[1];
126        if (second_byte & 0x80) == 0 {
127            let mut value = ((first_byte & 0x7F) as i64) | (((second_byte & 0x7F) as i64) << 7);
128            // Sign extend if necessary
129            if (second_byte & 0x40) != 0 {
130                value |= (-1i64) << 14;
131            }
132            return Ok((value, 2));
133        }
134    }
135
136    // General case for 3+ bytes
137    decode_general(bytes)
138}
139
140/// General-purpose VarInt decoding for values requiring 3 or more bytes
141#[inline(never)]
142fn decode_general(bytes: &[u8]) -> Result<(i64, usize), BinaryError> {
143    let mut result: i64 = 0;
144    let mut shift = 0;
145    let mut bytes_read = 0;
146
147    for &byte in bytes.iter() {
148        bytes_read += 1;
149
150        // Check for overflow (max 10 bytes for i64)
151        if bytes_read > 10 {
152            return Err(BinaryError::InvalidVarInt {
153                reason: "VarInt too long (max 10 bytes for i64)".to_string(),
154            });
155        }
156
157        // Extract the lower 7 bits
158        let value_bits = (byte & 0x7F) as i64;
159
160        // Add to result (with overflow check for large shifts)
161        if shift <= 63 {
162            result |= value_bits << shift;
163        } else {
164            return Err(BinaryError::InvalidVarInt {
165                reason: "shift overflow".to_string(),
166            });
167        }
168
169        // Check if this is the last byte
170        if (byte & 0x80) == 0 {
171            // Sign extend if necessary
172            // If the sign bit (bit 6 of the last byte) is set, we need to sign extend
173            if shift < 63 && (byte & 0x40) != 0 {
174                // Sign extend by setting all higher bits to 1
175                result |= (-1i64) << (shift + 7);
176            }
177
178            return Ok((result, bytes_read));
179        }
180
181        shift += 7;
182    }
183
184    // If we get here, we ran out of bytes without finding the end
185    Err(BinaryError::InvalidVarInt {
186        reason: "incomplete VarInt (missing terminating byte)".to_string(),
187    })
188}
189
190#[cfg(test)]
191mod tests {
192    #![allow(clippy::approx_constant)]
193
194    use super::*;
195
196    #[test]
197    fn test_encode_zero() {
198        assert_eq!(encode(0), vec![0x00]);
199    }
200
201    #[test]
202    fn test_encode_small_positive() {
203        assert_eq!(encode(0), vec![0x00]);
204        assert_eq!(encode(1), vec![0x01]);
205        assert_eq!(encode(63), vec![0x3F]); // Largest single-byte positive (bit 6 clear)
206    }
207
208    #[test]
209    fn test_encode_medium_positive() {
210        assert_eq!(encode(64), vec![0xC0, 0x00]); // Needs 2 bytes (bit 6 would be set)
211        assert_eq!(encode(127), vec![0xFF, 0x00]); // Needs 2 bytes
212        assert_eq!(encode(128), vec![0x80, 0x01]);
213        // Just verify 14532 encodes and decodes correctly
214        let enc = encode(14532);
215        assert_eq!(enc.len(), 3);
216        assert_eq!(decode(&enc).unwrap().0, 14532);
217    }
218
219    #[test]
220    fn test_encode_large_positive() {
221        assert_eq!(encode(16384), vec![0x80, 0x80, 0x01]);
222        assert_eq!(encode(1_000_000), vec![0xC0, 0x84, 0x3D]);
223    }
224
225    #[test]
226    fn test_encode_negative_small() {
227        assert_eq!(encode(-1), vec![0x7F]);
228        assert_eq!(encode(-2), vec![0x7E]);
229        assert_eq!(encode(-64), vec![0x40]);
230    }
231
232    #[test]
233    fn test_encode_negative_medium() {
234        assert_eq!(encode(-65), vec![0xBF, 0x7F]);
235        assert_eq!(encode(-128), vec![0x80, 0x7F]);
236    }
237
238    #[test]
239    fn test_encode_i64_min() {
240        let encoded = encode(i64::MIN);
241        assert!(!encoded.is_empty());
242        assert_eq!(encoded.len(), 10); // i64::MIN requires 10 bytes
243    }
244
245    #[test]
246    fn test_encode_i64_max() {
247        let encoded = encode(i64::MAX);
248        assert!(!encoded.is_empty());
249        assert_eq!(encoded.len(), 10); // i64::MAX requires 10 bytes
250    }
251
252    #[test]
253    fn test_decode_zero() {
254        assert_eq!(decode(&[0x00]).unwrap(), (0, 1));
255    }
256
257    #[test]
258    fn test_decode_small_positive() {
259        assert_eq!(decode(&[0x00]).unwrap(), (0, 1));
260        assert_eq!(decode(&[0x01]).unwrap(), (1, 1));
261        assert_eq!(decode(&[0x3F]).unwrap(), (63, 1)); // Largest single-byte positive
262    }
263
264    #[test]
265    fn test_decode_medium_positive() {
266        assert_eq!(decode(&[0xC0, 0x00]).unwrap(), (64, 2));
267        assert_eq!(decode(&[0xFF, 0x00]).unwrap(), (127, 2));
268        assert_eq!(decode(&[0x80, 0x01]).unwrap(), (128, 2));
269        // Test with actual encoded value
270        let enc_14532 = encode(14532);
271        assert_eq!(decode(&enc_14532).unwrap(), (14532, enc_14532.len()));
272    }
273
274    #[test]
275    fn test_decode_large_positive() {
276        assert_eq!(decode(&[0x80, 0x80, 0x01]).unwrap(), (16384, 3));
277        assert_eq!(decode(&[0xC0, 0x84, 0x3D]).unwrap(), (1_000_000, 3));
278    }
279
280    #[test]
281    fn test_decode_negative_small() {
282        // Verify by encoding first to see what the correct encoding is
283        let enc_neg1 = encode(-1);
284        let enc_neg2 = encode(-2);
285        let enc_neg64 = encode(-64);
286
287        assert_eq!(decode(&enc_neg1).unwrap(), (-1, enc_neg1.len()));
288        assert_eq!(decode(&enc_neg2).unwrap(), (-2, enc_neg2.len()));
289        assert_eq!(decode(&enc_neg64).unwrap(), (-64, enc_neg64.len()));
290    }
291
292    #[test]
293    fn test_decode_negative_medium() {
294        assert_eq!(decode(&[0xBF, 0x7F]).unwrap(), (-65, 2));
295        assert_eq!(decode(&[0x80, 0x7F]).unwrap(), (-128, 2));
296    }
297
298    #[test]
299    fn test_decode_empty_input() {
300        let result = decode(&[]);
301        assert!(matches!(result, Err(BinaryError::InvalidVarInt { .. })));
302    }
303
304    #[test]
305    fn test_decode_incomplete() {
306        // Continuation bit set but no more bytes
307        let result = decode(&[0x80]);
308        assert!(matches!(result, Err(BinaryError::InvalidVarInt { .. })));
309    }
310
311    #[test]
312    fn test_decode_too_long() {
313        // 11 bytes with continuation bits (invalid)
314        let bytes = vec![0x80; 11];
315        let result = decode(&bytes);
316        assert!(matches!(result, Err(BinaryError::InvalidVarInt { .. })));
317    }
318
319    #[test]
320    fn test_decode_with_trailing_data() {
321        // Valid VarInt followed by extra bytes
322        let bytes = vec![0x01, 0xFF, 0xFF];
323        let (value, consumed) = decode(&bytes).unwrap();
324        assert_eq!(value, 1);
325        assert_eq!(consumed, 1);
326    }
327
328    #[test]
329    fn test_roundtrip_positive() {
330        let test_values = vec![0, 1, 127, 128, 255, 256, 16383, 16384, 1_000_000];
331        for val in test_values {
332            let encoded = encode(val);
333            let (decoded, _) = decode(&encoded).unwrap();
334            assert_eq!(decoded, val, "Failed roundtrip for {}", val);
335        }
336    }
337
338    #[test]
339    fn test_roundtrip_negative() {
340        let test_values = vec![-1, -2, -64, -65, -128, -256, -1000, -1_000_000];
341        for val in test_values {
342            let encoded = encode(val);
343            let (decoded, _) = decode(&encoded).unwrap();
344            assert_eq!(decoded, val, "Failed roundtrip for {}", val);
345        }
346    }
347
348    #[test]
349    fn test_roundtrip_edge_cases() {
350        let test_values = vec![i64::MIN, i64::MAX, i64::MIN + 1, i64::MAX - 1];
351        for val in test_values {
352            let encoded = encode(val);
353            let (decoded, _) = decode(&encoded).unwrap();
354            assert_eq!(decoded, val, "Failed roundtrip for {}", val);
355        }
356    }
357
358    #[test]
359    fn test_minimal_encoding() {
360        // Small values should use minimal bytes
361        // For signed LEB128, single byte range is -64 to 63
362        assert_eq!(encode(0).len(), 1);
363        assert_eq!(encode(63).len(), 1); // Largest single-byte positive
364        assert_eq!(encode(64).len(), 2); // Needs 2 bytes
365        assert_eq!(encode(127).len(), 2); // Needs 2 bytes
366        assert_eq!(encode(128).len(), 2);
367        assert_eq!(encode(-1).len(), 1);
368        assert_eq!(encode(-64).len(), 1); // Smallest single-byte negative
369        assert_eq!(encode(-65).len(), 2); // Needs 2 bytes
370    }
371}