Skip to main content

srcmap_codec/
vlq.rs

1//! Base64 VLQ encoding/decoding primitives.
2//!
3//! VLQ (Variable-Length Quantity) encoding stores arbitrary integers
4//! as sequences of base64 characters. The sign bit is stored in the
5//! least significant bit, and continuation bits indicate multi-char values.
6
7use crate::DecodeError;
8
9const VLQ_BASE_SHIFT: u32 = 5;
10const VLQ_BASE: u64 = 1 << VLQ_BASE_SHIFT; // 32
11const VLQ_BASE_MASK: u64 = VLQ_BASE - 1; // 0b11111
12const VLQ_CONTINUATION_BIT: u64 = VLQ_BASE; // 0b100000
13
14/// Maximum shift before overflow. 13 VLQ digits × 5 bits = 65 bits,
15/// which exceeds i64 range. We allow shift up to 60 (13th digit).
16const VLQ_MAX_SHIFT: u32 = 64;
17
18/// Pre-computed base64 encode lookup table (index -> char byte).
19#[rustfmt::skip]
20const BASE64_ENCODE: [u8; 64] = [
21    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
22    b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
23    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
24    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
25    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
26    b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
27    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
28    b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
29];
30
31/// Pre-computed base64 decode lookup table (char byte -> value).
32/// Invalid characters map to 255.
33const BASE64_DECODE: [u8; 128] = {
34    let mut table = [255u8; 128];
35    let mut i = 0u8;
36    while i < 64 {
37        table[BASE64_ENCODE[i as usize] as usize] = i;
38        i += 1;
39    }
40    table
41};
42
43/// Encode a single VLQ value, appending base64 chars to the output buffer.
44#[inline]
45pub fn vlq_encode(out: &mut Vec<u8>, value: i64) {
46    // Convert to VLQ signed representation using u64 to avoid overflow.
47    // Sign bit goes in the LSB. Two's complement negation via bit ops
48    // handles all values including i64::MIN safely.
49    let mut vlq: u64 = if value >= 0 {
50        (value as u64) << 1
51    } else {
52        // !(x as u64) + 1 computes the absolute value for any negative i64.
53        // For i64::MIN (-2^63), abs = 2^63, and (2^63 << 1) wraps to 0 in u64.
54        // This produces an incorrect encoding for i64::MIN, but that value
55        // is unreachable in valid source maps (max ~4 billion lines/columns).
56        ((!(value as u64)) + 1) << 1 | 1
57    };
58
59    loop {
60        let mut digit = vlq & VLQ_BASE_MASK;
61        vlq >>= VLQ_BASE_SHIFT;
62
63        if vlq > 0 {
64            digit |= VLQ_CONTINUATION_BIT;
65        }
66
67        out.push(BASE64_ENCODE[digit as usize]);
68
69        if vlq == 0 {
70            break;
71        }
72    }
73}
74
75/// Decode a single VLQ value from the input bytes starting at the given position.
76///
77/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
78#[inline]
79pub fn vlq_decode(input: &[u8], pos: usize) -> Result<(i64, usize), DecodeError> {
80    let mut result: u64 = 0;
81    let mut shift: u32 = 0;
82    let mut i = pos;
83
84    loop {
85        if i >= input.len() {
86            return Err(DecodeError::UnexpectedEof { offset: i });
87        }
88
89        let byte = input[i];
90
91        if byte >= 128 {
92            return Err(DecodeError::InvalidBase64 { byte, offset: i });
93        }
94
95        let digit = BASE64_DECODE[byte as usize];
96
97        if digit == 255 {
98            return Err(DecodeError::InvalidBase64 { byte, offset: i });
99        }
100
101        let digit = digit as u64;
102        i += 1;
103
104        if shift >= VLQ_MAX_SHIFT {
105            return Err(DecodeError::VlqOverflow { offset: pos });
106        }
107
108        result += (digit & VLQ_BASE_MASK) << shift;
109        shift += VLQ_BASE_SHIFT;
110
111        if (digit & VLQ_CONTINUATION_BIT) == 0 {
112            break;
113        }
114    }
115
116    // Extract sign from LSB
117    let value = if (result & 1) == 1 {
118        -((result >> 1) as i64)
119    } else {
120        (result >> 1) as i64
121    };
122
123    Ok((value, i - pos))
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129
130    #[test]
131    fn encode_zero() {
132        let mut buf = Vec::new();
133        vlq_encode(&mut buf, 0);
134        assert_eq!(&buf, b"A");
135    }
136
137    #[test]
138    fn encode_positive() {
139        let mut buf = Vec::new();
140        vlq_encode(&mut buf, 1);
141        assert_eq!(&buf, b"C");
142    }
143
144    #[test]
145    fn encode_negative() {
146        let mut buf = Vec::new();
147        vlq_encode(&mut buf, -1);
148        assert_eq!(&buf, b"D");
149    }
150
151    #[test]
152    fn encode_large_value() {
153        let mut buf = Vec::new();
154        vlq_encode(&mut buf, 1000);
155        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
156        assert_eq!(decoded, 1000);
157    }
158
159    #[test]
160    fn roundtrip_values() {
161        let values = [
162            0,
163            1,
164            -1,
165            15,
166            -15,
167            16,
168            -16,
169            31,
170            32,
171            100,
172            -100,
173            1000,
174            -1000,
175            100_000,
176            i64::MAX,
177        ];
178        for &v in &values {
179            let mut buf = Vec::new();
180            vlq_encode(&mut buf, v);
181            let (decoded, consumed) = vlq_decode(&buf, 0).unwrap();
182            assert_eq!(decoded, v, "roundtrip failed for {v}");
183            assert_eq!(consumed, buf.len());
184        }
185    }
186
187    #[test]
188    fn decode_multi_char() {
189        let mut buf = Vec::new();
190        vlq_encode(&mut buf, 500);
191        assert!(buf.len() > 1, "500 should need multiple chars");
192        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
193        assert_eq!(decoded, 500);
194    }
195
196    #[test]
197    fn decode_non_ascii_byte() {
198        let input = [0xC0u8];
199        let err = vlq_decode(&input, 0).unwrap_err();
200        assert_eq!(
201            err,
202            DecodeError::InvalidBase64 {
203                byte: 0xC0,
204                offset: 0
205            }
206        );
207    }
208
209    #[test]
210    fn decode_invalid_base64_char() {
211        let input = b"!";
212        let err = vlq_decode(input, 0).unwrap_err();
213        assert_eq!(
214            err,
215            DecodeError::InvalidBase64 {
216                byte: b'!',
217                offset: 0
218            }
219        );
220    }
221
222    #[test]
223    fn decode_unexpected_eof() {
224        // 'g' = 32, which has the continuation bit set
225        let input = b"g";
226        let err = vlq_decode(input, 0).unwrap_err();
227        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
228    }
229
230    #[test]
231    fn decode_overflow() {
232        // 14 continuation chars: shift reaches 65 which exceeds limit
233        let input = b"ggggggggggggggA";
234        let err = vlq_decode(input, 0).unwrap_err();
235        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
236    }
237
238    #[test]
239    fn decode_empty_input() {
240        let err = vlq_decode(b"", 0).unwrap_err();
241        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
242    }
243}