Skip to main content

srcmap_codec/
vlq.rs

1//! Base64 VLQ encoding/decoding primitives.
2//!
3//! VLQ (Variable-Length Quantity) encoding stores arbitrary integers
4//! as sequences of base64 characters. The sign bit is stored in the
5//! least significant bit, and continuation bits indicate multi-char values.
6
7use crate::DecodeError;
8
9const VLQ_BASE_SHIFT: u32 = 5;
10const VLQ_BASE: u64 = 1 << VLQ_BASE_SHIFT; // 32
11const VLQ_BASE_MASK: u64 = VLQ_BASE - 1; // 0b11111
12const VLQ_CONTINUATION_BIT: u64 = VLQ_BASE; // 0b100000
13
14/// Maximum shift before overflow. 13 VLQ digits × 5 bits = 65 bits,
15/// which exceeds i64 range. We allow shift up to 60 (13th digit).
16const VLQ_MAX_SHIFT: u32 = 64;
17
18/// Pre-computed base64 encode lookup table (index -> char byte).
19#[rustfmt::skip]
20const BASE64_ENCODE: [u8; 64] = [
21    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
22    b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
23    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
24    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
25    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
26    b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
27    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
28    b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
29];
30
31/// Pre-computed base64 decode lookup table (char byte -> value).
32/// Invalid characters map to 255.
33const BASE64_DECODE: [u8; 128] = {
34    let mut table = [255u8; 128];
35    let mut i = 0u8;
36    while i < 64 {
37        table[BASE64_ENCODE[i as usize] as usize] = i;
38        i += 1;
39    }
40    table
41};
42
43/// Encode a single VLQ value, appending base64 chars to the output buffer.
44#[inline]
45pub fn vlq_encode(out: &mut Vec<u8>, value: i64) {
46    // Convert to VLQ signed representation using u64 to avoid overflow.
47    // Sign bit goes in the LSB. Two's complement negation via bit ops
48    // handles all values including i64::MIN safely.
49    let mut vlq: u64 = if value >= 0 {
50        (value as u64) << 1
51    } else {
52        // !(x as u64) + 1 computes the absolute value for any negative i64.
53        // For i64::MIN (-2^63), abs = 2^63, and (2^63 << 1) wraps to 0 in u64.
54        // This produces an incorrect encoding for i64::MIN, but that value
55        // is unreachable in valid source maps (max ~4 billion lines/columns).
56        ((!(value as u64)) + 1) << 1 | 1
57    };
58
59    loop {
60        let mut digit = vlq & VLQ_BASE_MASK;
61        vlq >>= VLQ_BASE_SHIFT;
62
63        if vlq > 0 {
64            digit |= VLQ_CONTINUATION_BIT;
65        }
66
67        out.push(BASE64_ENCODE[digit as usize]);
68
69        if vlq == 0 {
70            break;
71        }
72    }
73}
74
75/// Decode a single VLQ value from the input bytes starting at the given position.
76///
77/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
78#[inline]
79pub fn vlq_decode(input: &[u8], pos: usize) -> Result<(i64, usize), DecodeError> {
80    let mut result: u64 = 0;
81    let mut shift: u32 = 0;
82    let mut i = pos;
83
84    loop {
85        if i >= input.len() {
86            return Err(DecodeError::UnexpectedEof { offset: i });
87        }
88
89        let byte = input[i];
90
91        if byte >= 128 {
92            return Err(DecodeError::InvalidBase64 { byte, offset: i });
93        }
94
95        let digit = BASE64_DECODE[byte as usize];
96
97        if digit == 255 {
98            return Err(DecodeError::InvalidBase64 { byte, offset: i });
99        }
100
101        let digit = digit as u64;
102        i += 1;
103
104        if shift >= VLQ_MAX_SHIFT {
105            return Err(DecodeError::VlqOverflow { offset: pos });
106        }
107
108        result += (digit & VLQ_BASE_MASK) << shift;
109        shift += VLQ_BASE_SHIFT;
110
111        if (digit & VLQ_CONTINUATION_BIT) == 0 {
112            break;
113        }
114    }
115
116    // Extract sign from LSB
117    let value = if (result & 1) == 1 {
118        -((result >> 1) as i64)
119    } else {
120        (result >> 1) as i64
121    };
122
123    Ok((value, i - pos))
124}
125
126/// Encode a single unsigned VLQ value, appending base64 chars to the output buffer.
127///
128/// Unlike signed VLQ, no sign bit is used — all 5 bits per character are data.
129/// Used by the ECMA-426 scopes proposal for tags, flags, and unsigned values.
130#[inline]
131pub fn vlq_encode_unsigned(out: &mut Vec<u8>, value: u64) {
132    let mut vlq = value;
133    loop {
134        let mut digit = vlq & VLQ_BASE_MASK;
135        vlq >>= VLQ_BASE_SHIFT;
136        if vlq > 0 {
137            digit |= VLQ_CONTINUATION_BIT;
138        }
139        out.push(BASE64_ENCODE[digit as usize]);
140        if vlq == 0 {
141            break;
142        }
143    }
144}
145
146/// Decode a single unsigned VLQ value from the input bytes starting at the given position.
147///
148/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
149/// Unlike signed VLQ, no sign bit extraction is performed.
150#[inline]
151pub fn vlq_decode_unsigned(input: &[u8], pos: usize) -> Result<(u64, usize), DecodeError> {
152    let mut result: u64 = 0;
153    let mut shift: u32 = 0;
154    let mut i = pos;
155
156    loop {
157        if i >= input.len() {
158            return Err(DecodeError::UnexpectedEof { offset: i });
159        }
160
161        let byte = input[i];
162
163        if byte >= 128 {
164            return Err(DecodeError::InvalidBase64 { byte, offset: i });
165        }
166
167        let digit = BASE64_DECODE[byte as usize];
168
169        if digit == 255 {
170            return Err(DecodeError::InvalidBase64 { byte, offset: i });
171        }
172
173        let digit = digit as u64;
174        i += 1;
175
176        if shift >= VLQ_MAX_SHIFT {
177            return Err(DecodeError::VlqOverflow { offset: pos });
178        }
179
180        result += (digit & VLQ_BASE_MASK) << shift;
181        shift += VLQ_BASE_SHIFT;
182
183        if (digit & VLQ_CONTINUATION_BIT) == 0 {
184            break;
185        }
186    }
187
188    Ok((result, i - pos))
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194
195    #[test]
196    fn encode_zero() {
197        let mut buf = Vec::new();
198        vlq_encode(&mut buf, 0);
199        assert_eq!(&buf, b"A");
200    }
201
202    #[test]
203    fn encode_positive() {
204        let mut buf = Vec::new();
205        vlq_encode(&mut buf, 1);
206        assert_eq!(&buf, b"C");
207    }
208
209    #[test]
210    fn encode_negative() {
211        let mut buf = Vec::new();
212        vlq_encode(&mut buf, -1);
213        assert_eq!(&buf, b"D");
214    }
215
216    #[test]
217    fn encode_large_value() {
218        let mut buf = Vec::new();
219        vlq_encode(&mut buf, 1000);
220        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
221        assert_eq!(decoded, 1000);
222    }
223
224    #[test]
225    fn roundtrip_values() {
226        let values = [
227            0,
228            1,
229            -1,
230            15,
231            -15,
232            16,
233            -16,
234            31,
235            32,
236            100,
237            -100,
238            1000,
239            -1000,
240            100_000,
241            i64::MAX,
242        ];
243        for &v in &values {
244            let mut buf = Vec::new();
245            vlq_encode(&mut buf, v);
246            let (decoded, consumed) = vlq_decode(&buf, 0).unwrap();
247            assert_eq!(decoded, v, "roundtrip failed for {v}");
248            assert_eq!(consumed, buf.len());
249        }
250    }
251
252    #[test]
253    fn decode_multi_char() {
254        let mut buf = Vec::new();
255        vlq_encode(&mut buf, 500);
256        assert!(buf.len() > 1, "500 should need multiple chars");
257        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
258        assert_eq!(decoded, 500);
259    }
260
261    #[test]
262    fn decode_non_ascii_byte() {
263        let input = [0xC0u8];
264        let err = vlq_decode(&input, 0).unwrap_err();
265        assert_eq!(
266            err,
267            DecodeError::InvalidBase64 {
268                byte: 0xC0,
269                offset: 0
270            }
271        );
272    }
273
274    #[test]
275    fn decode_invalid_base64_char() {
276        let input = b"!";
277        let err = vlq_decode(input, 0).unwrap_err();
278        assert_eq!(
279            err,
280            DecodeError::InvalidBase64 {
281                byte: b'!',
282                offset: 0
283            }
284        );
285    }
286
287    #[test]
288    fn decode_unexpected_eof() {
289        // 'g' = 32, which has the continuation bit set
290        let input = b"g";
291        let err = vlq_decode(input, 0).unwrap_err();
292        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
293    }
294
295    #[test]
296    fn decode_overflow() {
297        // 14 continuation chars: shift reaches 65 which exceeds limit
298        let input = b"ggggggggggggggA";
299        let err = vlq_decode(input, 0).unwrap_err();
300        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
301    }
302
303    #[test]
304    fn decode_empty_input() {
305        let err = vlq_decode(b"", 0).unwrap_err();
306        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
307    }
308
309    // --- Unsigned VLQ tests ---
310
311    #[test]
312    fn unsigned_encode_zero() {
313        let mut buf = Vec::new();
314        vlq_encode_unsigned(&mut buf, 0);
315        assert_eq!(&buf, b"A");
316    }
317
318    #[test]
319    fn unsigned_encode_small_values() {
320        // Value 1 → 'B', value 2 → 'C', ..., value 31 → base64[31] = 'f'
321        let mut buf = Vec::new();
322        vlq_encode_unsigned(&mut buf, 1);
323        assert_eq!(&buf, b"B");
324
325        buf.clear();
326        vlq_encode_unsigned(&mut buf, 8);
327        assert_eq!(&buf, b"I");
328    }
329
330    #[test]
331    fn unsigned_roundtrip() {
332        let values: [u64; 10] = [0, 1, 8, 15, 16, 31, 32, 100, 1000, 100_000];
333        for &v in &values {
334            let mut buf = Vec::new();
335            vlq_encode_unsigned(&mut buf, v);
336            let (decoded, consumed) = vlq_decode_unsigned(&buf, 0).unwrap();
337            assert_eq!(decoded, v, "unsigned roundtrip failed for {v}");
338            assert_eq!(consumed, buf.len());
339        }
340    }
341
342    #[test]
343    fn unsigned_multi_char() {
344        let mut buf = Vec::new();
345        vlq_encode_unsigned(&mut buf, 500);
346        assert!(buf.len() > 1, "500 should need multiple chars");
347        let (decoded, _) = vlq_decode_unsigned(&buf, 0).unwrap();
348        assert_eq!(decoded, 500);
349    }
350
351    #[test]
352    fn unsigned_decode_empty() {
353        let err = vlq_decode_unsigned(b"", 0).unwrap_err();
354        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
355    }
356}