Skip to main content

srcmap_codec/
vlq.rs

1//! Base64 VLQ encoding/decoding primitives.
2//!
3//! VLQ (Variable-Length Quantity) encoding stores arbitrary integers
4//! as sequences of base64 characters. The sign bit is stored in the
5//! least significant bit, and continuation bits indicate multi-char values.
6
7use crate::DecodeError;
8
9const VLQ_BASE_SHIFT: u32 = 5;
10const VLQ_BASE: u64 = 1 << VLQ_BASE_SHIFT; // 32
11const VLQ_BASE_MASK: u64 = VLQ_BASE - 1; // 0b11111
12const VLQ_CONTINUATION_BIT: u64 = VLQ_BASE; // 0b100000
13
14/// Maximum shift before overflow. 13 VLQ digits × 5 bits = 65 bits,
15/// which exceeds i64 range. We allow shift up to 60 (13th digit).
16const VLQ_MAX_SHIFT: u32 = 60;
17
18/// Pre-computed base64 encode lookup table (index -> char byte).
19#[rustfmt::skip]
20const BASE64_ENCODE: [u8; 64] = [
21    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
22    b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
23    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
24    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
25    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
26    b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
27    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
28    b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
29];
30
31/// Pre-computed base64 decode lookup table (char byte -> value).
32/// Invalid characters map to 255.
33const BASE64_DECODE: [u8; 128] = {
34    let mut table = [255u8; 128];
35    let mut i = 0u8;
36    while i < 64 {
37        table[BASE64_ENCODE[i as usize] as usize] = i;
38        i += 1;
39    }
40    table
41};
42
43/// Encode a single VLQ value, appending base64 chars to the output buffer.
44#[inline]
45pub fn vlq_encode(out: &mut Vec<u8>, value: i64) {
46    // Convert to VLQ signed representation using u64 to avoid overflow.
47    // Sign bit goes in the LSB. Two's complement negation via bit ops
48    // handles all values including i64::MIN safely.
49    let mut vlq: u64 = if value >= 0 {
50        (value as u64) << 1
51    } else {
52        // !(x as u64) + 1 computes the absolute value for any negative i64.
53        // For i64::MIN (-2^63), abs = 2^63, and (2^63 << 1) wraps to 0 in u64.
54        // This produces an incorrect encoding for i64::MIN, but that value
55        // is unreachable in valid source maps (max ~4 billion lines/columns).
56        ((!(value as u64)) + 1) << 1 | 1
57    };
58
59    // Fast path: single character (values -15..15)
60    if vlq < VLQ_BASE {
61        out.push(BASE64_ENCODE[vlq as usize]);
62        return;
63    }
64
65    loop {
66        let mut digit = vlq & VLQ_BASE_MASK;
67        vlq >>= VLQ_BASE_SHIFT;
68
69        if vlq > 0 {
70            digit |= VLQ_CONTINUATION_BIT;
71        }
72
73        out.push(BASE64_ENCODE[digit as usize]);
74
75        if vlq == 0 {
76            break;
77        }
78    }
79}
80
81/// Decode a single VLQ value from the input bytes starting at the given position.
82///
83/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
84#[inline]
85pub fn vlq_decode(input: &[u8], pos: usize) -> Result<(i64, usize), DecodeError> {
86    if pos >= input.len() {
87        return Err(DecodeError::UnexpectedEof { offset: pos });
88    }
89
90    let b0 = input[pos];
91    if b0 >= 128 {
92        return Err(DecodeError::InvalidBase64 {
93            byte: b0,
94            offset: pos,
95        });
96    }
97    let d0 = BASE64_DECODE[b0 as usize];
98    if d0 == 255 {
99        return Err(DecodeError::InvalidBase64 {
100            byte: b0,
101            offset: pos,
102        });
103    }
104
105    // Fast path: single character VLQ (values -15..15, ~60-70% of real data)
106    if (d0 & 0x20) == 0 {
107        let val = (d0 >> 1) as i64;
108        return Ok((if (d0 & 1) != 0 { -val } else { val }, 1));
109    }
110
111    // Multi-character VLQ
112    let mut result: u64 = (d0 & 0x1F) as u64;
113    let mut shift: u32 = 5;
114    let mut i = pos + 1;
115
116    loop {
117        if i >= input.len() {
118            return Err(DecodeError::UnexpectedEof { offset: i });
119        }
120
121        let byte = input[i];
122
123        if byte >= 128 {
124            return Err(DecodeError::InvalidBase64 { byte, offset: i });
125        }
126
127        let digit = BASE64_DECODE[byte as usize];
128
129        if digit == 255 {
130            return Err(DecodeError::InvalidBase64 { byte, offset: i });
131        }
132
133        i += 1;
134
135        if shift >= VLQ_MAX_SHIFT {
136            return Err(DecodeError::VlqOverflow { offset: pos });
137        }
138
139        result += ((digit & 0x1F) as u64) << shift;
140        shift += VLQ_BASE_SHIFT;
141
142        if (digit & 0x20) == 0 {
143            break;
144        }
145    }
146
147    // Extract sign from LSB
148    let value = if (result & 1) == 1 {
149        -((result >> 1) as i64)
150    } else {
151        (result >> 1) as i64
152    };
153
154    Ok((value, i - pos))
155}
156
157/// Encode a single unsigned VLQ value, appending base64 chars to the output buffer.
158///
159/// Unlike signed VLQ, no sign bit is used — all 5 bits per character are data.
160/// Used by the ECMA-426 scopes proposal for tags, flags, and unsigned values.
161#[inline]
162pub fn vlq_encode_unsigned(out: &mut Vec<u8>, value: u64) {
163    // Fast path: single character (value fits in 5 bits)
164    if value < VLQ_BASE {
165        out.push(BASE64_ENCODE[value as usize]);
166        return;
167    }
168
169    let mut vlq = value;
170    loop {
171        let mut digit = vlq & VLQ_BASE_MASK;
172        vlq >>= VLQ_BASE_SHIFT;
173        if vlq > 0 {
174            digit |= VLQ_CONTINUATION_BIT;
175        }
176        out.push(BASE64_ENCODE[digit as usize]);
177        if vlq == 0 {
178            break;
179        }
180    }
181}
182
183/// Decode a single unsigned VLQ value from the input bytes starting at the given position.
184///
185/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
186/// Unlike signed VLQ, no sign bit extraction is performed.
187#[inline]
188pub fn vlq_decode_unsigned(input: &[u8], pos: usize) -> Result<(u64, usize), DecodeError> {
189    if pos >= input.len() {
190        return Err(DecodeError::UnexpectedEof { offset: pos });
191    }
192
193    let b0 = input[pos];
194    if b0 >= 128 {
195        return Err(DecodeError::InvalidBase64 {
196            byte: b0,
197            offset: pos,
198        });
199    }
200    let d0 = BASE64_DECODE[b0 as usize];
201    if d0 == 255 {
202        return Err(DecodeError::InvalidBase64 {
203            byte: b0,
204            offset: pos,
205        });
206    }
207
208    // Fast path: single character (value fits in 5 bits, no continuation)
209    if (d0 & 0x20) == 0 {
210        return Ok((d0 as u64, 1));
211    }
212
213    // Multi-character unsigned VLQ
214    let mut result: u64 = (d0 & 0x1F) as u64;
215    let mut shift: u32 = 5;
216    let mut i = pos + 1;
217
218    loop {
219        if i >= input.len() {
220            return Err(DecodeError::UnexpectedEof { offset: i });
221        }
222
223        let byte = input[i];
224
225        if byte >= 128 {
226            return Err(DecodeError::InvalidBase64 { byte, offset: i });
227        }
228
229        let digit = BASE64_DECODE[byte as usize];
230
231        if digit == 255 {
232            return Err(DecodeError::InvalidBase64 { byte, offset: i });
233        }
234
235        i += 1;
236
237        if shift >= VLQ_MAX_SHIFT {
238            return Err(DecodeError::VlqOverflow { offset: pos });
239        }
240
241        result += ((digit & 0x1F) as u64) << shift;
242        shift += VLQ_BASE_SHIFT;
243
244        if (digit & 0x20) == 0 {
245            break;
246        }
247    }
248
249    Ok((result, i - pos))
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn encode_zero() {
258        let mut buf = Vec::new();
259        vlq_encode(&mut buf, 0);
260        assert_eq!(&buf, b"A");
261    }
262
263    #[test]
264    fn encode_positive() {
265        let mut buf = Vec::new();
266        vlq_encode(&mut buf, 1);
267        assert_eq!(&buf, b"C");
268    }
269
270    #[test]
271    fn encode_negative() {
272        let mut buf = Vec::new();
273        vlq_encode(&mut buf, -1);
274        assert_eq!(&buf, b"D");
275    }
276
277    #[test]
278    fn encode_large_value() {
279        let mut buf = Vec::new();
280        vlq_encode(&mut buf, 1000);
281        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
282        assert_eq!(decoded, 1000);
283    }
284
285    #[test]
286    fn roundtrip_values() {
287        let values = [
288            0,
289            1,
290            -1,
291            15,
292            -15,
293            16,
294            -16,
295            31,
296            32,
297            100,
298            -100,
299            1000,
300            -1000,
301            100_000,
302            1_000_000_000,
303            -1_000_000_000,
304        ];
305        for &v in &values {
306            let mut buf = Vec::new();
307            vlq_encode(&mut buf, v);
308            let (decoded, consumed) = vlq_decode(&buf, 0).unwrap();
309            assert_eq!(decoded, v, "roundtrip failed for {v}");
310            assert_eq!(consumed, buf.len());
311        }
312    }
313
314    #[test]
315    fn decode_multi_char() {
316        let mut buf = Vec::new();
317        vlq_encode(&mut buf, 500);
318        assert!(buf.len() > 1, "500 should need multiple chars");
319        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
320        assert_eq!(decoded, 500);
321    }
322
323    #[test]
324    fn decode_non_ascii_byte() {
325        let input = [0xC0u8];
326        let err = vlq_decode(&input, 0).unwrap_err();
327        assert_eq!(
328            err,
329            DecodeError::InvalidBase64 {
330                byte: 0xC0,
331                offset: 0
332            }
333        );
334    }
335
336    #[test]
337    fn decode_invalid_base64_char() {
338        let input = b"!";
339        let err = vlq_decode(input, 0).unwrap_err();
340        assert_eq!(
341            err,
342            DecodeError::InvalidBase64 {
343                byte: b'!',
344                offset: 0
345            }
346        );
347    }
348
349    #[test]
350    fn decode_unexpected_eof() {
351        // 'g' = 32, which has the continuation bit set
352        let input = b"g";
353        let err = vlq_decode(input, 0).unwrap_err();
354        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
355    }
356
357    #[test]
358    fn decode_overflow() {
359        // 14 continuation chars: shift reaches 60+ which exceeds limit
360        let input = b"ggggggggggggggA";
361        let err = vlq_decode(input, 0).unwrap_err();
362        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
363    }
364
365    #[test]
366    fn decode_empty_input() {
367        let err = vlq_decode(b"", 0).unwrap_err();
368        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
369    }
370
371    // --- Unsigned VLQ tests ---
372
373    #[test]
374    fn unsigned_encode_zero() {
375        let mut buf = Vec::new();
376        vlq_encode_unsigned(&mut buf, 0);
377        assert_eq!(&buf, b"A");
378    }
379
380    #[test]
381    fn unsigned_encode_small_values() {
382        // Value 1 → 'B', value 2 → 'C', ..., value 31 → base64[31] = 'f'
383        let mut buf = Vec::new();
384        vlq_encode_unsigned(&mut buf, 1);
385        assert_eq!(&buf, b"B");
386
387        buf.clear();
388        vlq_encode_unsigned(&mut buf, 8);
389        assert_eq!(&buf, b"I");
390    }
391
392    #[test]
393    fn unsigned_roundtrip() {
394        let values: [u64; 10] = [0, 1, 8, 15, 16, 31, 32, 100, 1000, 100_000];
395        for &v in &values {
396            let mut buf = Vec::new();
397            vlq_encode_unsigned(&mut buf, v);
398            let (decoded, consumed) = vlq_decode_unsigned(&buf, 0).unwrap();
399            assert_eq!(decoded, v, "unsigned roundtrip failed for {v}");
400            assert_eq!(consumed, buf.len());
401        }
402    }
403
404    #[test]
405    fn unsigned_multi_char() {
406        let mut buf = Vec::new();
407        vlq_encode_unsigned(&mut buf, 500);
408        assert!(buf.len() > 1, "500 should need multiple chars");
409        let (decoded, _) = vlq_decode_unsigned(&buf, 0).unwrap();
410        assert_eq!(decoded, 500);
411    }
412
413    #[test]
414    fn unsigned_decode_empty() {
415        let err = vlq_decode_unsigned(b"", 0).unwrap_err();
416        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
417    }
418
419    #[test]
420    fn unsigned_decode_non_ascii() {
421        let err = vlq_decode_unsigned(&[0xC3, 0x80], 0).unwrap_err();
422        assert_eq!(
423            err,
424            DecodeError::InvalidBase64 {
425                byte: 0xC3,
426                offset: 0
427            }
428        );
429    }
430
431    #[test]
432    fn unsigned_decode_invalid_base64_char() {
433        let err = vlq_decode_unsigned(b"!", 0).unwrap_err();
434        assert_eq!(
435            err,
436            DecodeError::InvalidBase64 {
437                byte: b'!',
438                offset: 0
439            }
440        );
441    }
442
443    #[test]
444    fn unsigned_decode_overflow() {
445        // 14 continuation chars to trigger overflow
446        let err = vlq_decode_unsigned(b"ggggggggggggggA", 0).unwrap_err();
447        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
448    }
449}