Skip to main content

srcmap_codec/
vlq.rs

1//! Base64 VLQ encoding/decoding primitives.
2//!
3//! VLQ (Variable-Length Quantity) encoding stores arbitrary integers
4//! as sequences of base64 characters. The sign bit is stored in the
5//! least significant bit, and continuation bits indicate multi-char values.
6
7use crate::DecodeError;
8
9const VLQ_BASE_SHIFT: u32 = 5;
10const VLQ_BASE: u64 = 1 << VLQ_BASE_SHIFT; // 32
11const VLQ_BASE_MASK: u64 = VLQ_BASE - 1; // 0b11111
12const VLQ_CONTINUATION_BIT: u64 = VLQ_BASE; // 0b100000
13
14/// Maximum shift before overflow. 13 VLQ digits × 5 bits = 65 bits,
15/// which exceeds i64 range. We allow shift up to 60 (13th digit).
16const VLQ_MAX_SHIFT: u32 = 60;
17
18/// Pre-computed base64 encode lookup table (index -> char byte).
19#[rustfmt::skip]
20const BASE64_ENCODE: [u8; 64] = [
21    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
22    b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
23    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
24    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
25    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
26    b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
27    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
28    b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
29];
30
31/// Cache-line-aligned base64 encode table for encoding hot paths.
32/// 64 bytes fits exactly in one cache line, avoiding split-load penalties.
33#[repr(align(64))]
34struct AlignedBase64Table([u8; 64]);
35
36#[rustfmt::skip]
37static BASE64_TABLE: AlignedBase64Table = AlignedBase64Table([
38    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
39    b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
40    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
41    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
42    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
43    b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
44    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
45    b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
46]);
47
48/// Pre-computed base64 decode lookup table (char byte -> value).
49/// Invalid characters map to 255.
50const BASE64_DECODE: [u8; 128] = {
51    let mut table = [255u8; 128];
52    let mut i = 0u8;
53    while i < 64 {
54        table[BASE64_ENCODE[i as usize] as usize] = i;
55        i += 1;
56    }
57    table
58};
59
60/// Encode a single signed VLQ value directly into the buffer using unchecked writes.
61///
62/// # Safety
63/// Caller must ensure `out` has at least 7 bytes of spare capacity
64/// (`out.capacity() - out.len() >= 7`).
65#[inline(always)]
66pub unsafe fn vlq_encode_unchecked(out: &mut Vec<u8>, value: i64) {
67    // Convert to VLQ signed representation using u64 to avoid overflow.
68    // Sign bit goes in the LSB. Two's complement negation via bit ops
69    // handles all values including i64::MIN safely.
70    let mut vlq: u64 = if value >= 0 {
71        (value as u64) << 1
72    } else {
73        // !(x as u64) + 1 computes the absolute value for any negative i64.
74        // For i64::MIN (-2^63), abs = 2^63, and (2^63 << 1) wraps to 0 in u64.
75        // This produces an incorrect encoding for i64::MIN, but that value
76        // is unreachable in valid source maps (max ~4 billion lines/columns).
77        ((!(value as u64)) + 1) << 1 | 1
78    };
79
80    let table = &BASE64_TABLE.0;
81    // SAFETY: caller guarantees at least 7 bytes of spare capacity.
82    let ptr = unsafe { out.as_mut_ptr().add(out.len()) };
83    let mut i = 0;
84
85    loop {
86        let digit = vlq & VLQ_BASE_MASK;
87        vlq >>= VLQ_BASE_SHIFT;
88        if vlq == 0 {
89            // Last byte — no continuation bit needed
90            // SAFETY: i < 7 and we have 7 bytes of spare capacity.
91            // digit is always in 0..64 so the table lookup is in bounds.
92            unsafe {
93                *ptr.add(i) = *table.get_unchecked(digit as usize);
94            }
95            i += 1;
96            break;
97        }
98        // SAFETY: same as above; digit | VLQ_CONTINUATION_BIT is in 0..64.
99        unsafe {
100            *ptr.add(i) = *table.get_unchecked((digit | VLQ_CONTINUATION_BIT) as usize);
101        }
102        i += 1;
103    }
104
105    // SAFETY: we wrote exactly `i` valid ASCII bytes into the spare capacity.
106    unsafe { out.set_len(out.len() + i) };
107}
108
109/// Encode a single VLQ value, appending base64 chars to the output buffer.
110#[inline(always)]
111pub fn vlq_encode(out: &mut Vec<u8>, value: i64) {
112    out.reserve(7);
113    // SAFETY: we just reserved 7 bytes, which is the maximum a single
114    // i64 VLQ value can produce (63 data bits / 5 bits per char = 13,
115    // but the sign bit reduces the effective range, and real source map
116    // values are far smaller — 7 bytes handles up to ±2^34).
117    unsafe { vlq_encode_unchecked(out, value) }
118}
119
120/// Decode a single VLQ value from the input bytes starting at the given position.
121///
122/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
123#[inline]
124pub fn vlq_decode(input: &[u8], pos: usize) -> Result<(i64, usize), DecodeError> {
125    if pos >= input.len() {
126        return Err(DecodeError::UnexpectedEof { offset: pos });
127    }
128
129    let b0 = input[pos];
130    if b0 >= 128 {
131        return Err(DecodeError::InvalidBase64 {
132            byte: b0,
133            offset: pos,
134        });
135    }
136    let d0 = BASE64_DECODE[b0 as usize];
137    if d0 == 255 {
138        return Err(DecodeError::InvalidBase64 {
139            byte: b0,
140            offset: pos,
141        });
142    }
143
144    // Fast path: single character VLQ (values -15..15, ~60-70% of real data)
145    if (d0 & 0x20) == 0 {
146        let val = (d0 >> 1) as i64;
147        return Ok((if (d0 & 1) != 0 { -val } else { val }, 1));
148    }
149
150    // Multi-character VLQ
151    let mut result: u64 = (d0 & 0x1F) as u64;
152    let mut shift: u32 = 5;
153    let mut i = pos + 1;
154
155    loop {
156        if i >= input.len() {
157            return Err(DecodeError::UnexpectedEof { offset: i });
158        }
159
160        let byte = input[i];
161
162        if byte >= 128 {
163            return Err(DecodeError::InvalidBase64 { byte, offset: i });
164        }
165
166        let digit = BASE64_DECODE[byte as usize];
167
168        if digit == 255 {
169            return Err(DecodeError::InvalidBase64 { byte, offset: i });
170        }
171
172        i += 1;
173
174        if shift >= VLQ_MAX_SHIFT {
175            return Err(DecodeError::VlqOverflow { offset: pos });
176        }
177
178        result += ((digit & 0x1F) as u64) << shift;
179        shift += VLQ_BASE_SHIFT;
180
181        if (digit & 0x20) == 0 {
182            break;
183        }
184    }
185
186    // Extract sign from LSB
187    let value = if (result & 1) == 1 {
188        -((result >> 1) as i64)
189    } else {
190        (result >> 1) as i64
191    };
192
193    Ok((value, i - pos))
194}
195
196/// Encode a single unsigned VLQ value directly into the buffer using unchecked writes.
197///
198/// # Safety
199/// Caller must ensure `out` has at least 7 bytes of spare capacity
200/// (`out.capacity() - out.len() >= 7`).
201#[inline(always)]
202pub unsafe fn vlq_encode_unsigned_unchecked(out: &mut Vec<u8>, value: u64) {
203    let table = &BASE64_TABLE.0;
204    // SAFETY: caller guarantees at least 7 bytes of spare capacity.
205    let ptr = unsafe { out.as_mut_ptr().add(out.len()) };
206    let mut i = 0;
207    let mut vlq = value;
208
209    loop {
210        let digit = vlq & VLQ_BASE_MASK;
211        vlq >>= VLQ_BASE_SHIFT;
212        if vlq == 0 {
213            // Last byte — no continuation bit needed
214            // SAFETY: i < 7 and we have 7 bytes of spare capacity.
215            // digit is always in 0..64 so the table lookup is in bounds.
216            unsafe {
217                *ptr.add(i) = *table.get_unchecked(digit as usize);
218            }
219            i += 1;
220            break;
221        }
222        // SAFETY: same as above; digit | VLQ_CONTINUATION_BIT is in 0..64.
223        unsafe {
224            *ptr.add(i) = *table.get_unchecked((digit | VLQ_CONTINUATION_BIT) as usize);
225        }
226        i += 1;
227    }
228
229    // SAFETY: we wrote exactly `i` valid ASCII bytes into the spare capacity.
230    unsafe { out.set_len(out.len() + i) };
231}
232
233/// Encode a single unsigned VLQ value, appending base64 chars to the output buffer.
234///
235/// Unlike signed VLQ, no sign bit is used — all 5 bits per character are data.
236/// Used by the ECMA-426 scopes proposal for tags, flags, and unsigned values.
237#[inline(always)]
238pub fn vlq_encode_unsigned(out: &mut Vec<u8>, value: u64) {
239    out.reserve(7);
240    // SAFETY: we just reserved 7 bytes, which is the maximum a single
241    // u64 VLQ value can produce in practice.
242    unsafe { vlq_encode_unsigned_unchecked(out, value) }
243}
244
245/// Decode a single unsigned VLQ value from the input bytes starting at the given position.
246///
247/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
248/// Unlike signed VLQ, no sign bit extraction is performed.
249#[inline]
250pub fn vlq_decode_unsigned(input: &[u8], pos: usize) -> Result<(u64, usize), DecodeError> {
251    if pos >= input.len() {
252        return Err(DecodeError::UnexpectedEof { offset: pos });
253    }
254
255    let b0 = input[pos];
256    if b0 >= 128 {
257        return Err(DecodeError::InvalidBase64 {
258            byte: b0,
259            offset: pos,
260        });
261    }
262    let d0 = BASE64_DECODE[b0 as usize];
263    if d0 == 255 {
264        return Err(DecodeError::InvalidBase64 {
265            byte: b0,
266            offset: pos,
267        });
268    }
269
270    // Fast path: single character (value fits in 5 bits, no continuation)
271    if (d0 & 0x20) == 0 {
272        return Ok((d0 as u64, 1));
273    }
274
275    // Multi-character unsigned VLQ
276    let mut result: u64 = (d0 & 0x1F) as u64;
277    let mut shift: u32 = 5;
278    let mut i = pos + 1;
279
280    loop {
281        if i >= input.len() {
282            return Err(DecodeError::UnexpectedEof { offset: i });
283        }
284
285        let byte = input[i];
286
287        if byte >= 128 {
288            return Err(DecodeError::InvalidBase64 { byte, offset: i });
289        }
290
291        let digit = BASE64_DECODE[byte as usize];
292
293        if digit == 255 {
294            return Err(DecodeError::InvalidBase64 { byte, offset: i });
295        }
296
297        i += 1;
298
299        if shift >= VLQ_MAX_SHIFT {
300            return Err(DecodeError::VlqOverflow { offset: pos });
301        }
302
303        result += ((digit & 0x1F) as u64) << shift;
304        shift += VLQ_BASE_SHIFT;
305
306        if (digit & 0x20) == 0 {
307            break;
308        }
309    }
310
311    Ok((result, i - pos))
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    #[test]
319    fn encode_zero() {
320        let mut buf = Vec::new();
321        vlq_encode(&mut buf, 0);
322        assert_eq!(&buf, b"A");
323    }
324
325    #[test]
326    fn encode_positive() {
327        let mut buf = Vec::new();
328        vlq_encode(&mut buf, 1);
329        assert_eq!(&buf, b"C");
330    }
331
332    #[test]
333    fn encode_negative() {
334        let mut buf = Vec::new();
335        vlq_encode(&mut buf, -1);
336        assert_eq!(&buf, b"D");
337    }
338
339    #[test]
340    fn encode_large_value() {
341        let mut buf = Vec::new();
342        vlq_encode(&mut buf, 1000);
343        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
344        assert_eq!(decoded, 1000);
345    }
346
347    #[test]
348    fn roundtrip_values() {
349        let values = [
350            0,
351            1,
352            -1,
353            15,
354            -15,
355            16,
356            -16,
357            31,
358            32,
359            100,
360            -100,
361            1000,
362            -1000,
363            100_000,
364            1_000_000_000,
365            -1_000_000_000,
366        ];
367        for &v in &values {
368            let mut buf = Vec::new();
369            vlq_encode(&mut buf, v);
370            let (decoded, consumed) = vlq_decode(&buf, 0).unwrap();
371            assert_eq!(decoded, v, "roundtrip failed for {v}");
372            assert_eq!(consumed, buf.len());
373        }
374    }
375
376    #[test]
377    fn decode_multi_char() {
378        let mut buf = Vec::new();
379        vlq_encode(&mut buf, 500);
380        assert!(buf.len() > 1, "500 should need multiple chars");
381        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
382        assert_eq!(decoded, 500);
383    }
384
385    #[test]
386    fn decode_non_ascii_byte() {
387        let input = [0xC0u8];
388        let err = vlq_decode(&input, 0).unwrap_err();
389        assert_eq!(
390            err,
391            DecodeError::InvalidBase64 {
392                byte: 0xC0,
393                offset: 0
394            }
395        );
396    }
397
398    #[test]
399    fn decode_invalid_base64_char() {
400        let input = b"!";
401        let err = vlq_decode(input, 0).unwrap_err();
402        assert_eq!(
403            err,
404            DecodeError::InvalidBase64 {
405                byte: b'!',
406                offset: 0
407            }
408        );
409    }
410
411    #[test]
412    fn decode_unexpected_eof() {
413        // 'g' = 32, which has the continuation bit set
414        let input = b"g";
415        let err = vlq_decode(input, 0).unwrap_err();
416        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
417    }
418
419    #[test]
420    fn decode_overflow() {
421        // 14 continuation chars: shift reaches 60+ which exceeds limit
422        let input = b"ggggggggggggggA";
423        let err = vlq_decode(input, 0).unwrap_err();
424        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
425    }
426
427    #[test]
428    fn decode_empty_input() {
429        let err = vlq_decode(b"", 0).unwrap_err();
430        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
431    }
432
433    // --- Unsigned VLQ tests ---
434
435    #[test]
436    fn unsigned_encode_zero() {
437        let mut buf = Vec::new();
438        vlq_encode_unsigned(&mut buf, 0);
439        assert_eq!(&buf, b"A");
440    }
441
442    #[test]
443    fn unsigned_encode_small_values() {
444        // Value 1 → 'B', value 2 → 'C', ..., value 31 → base64[31] = 'f'
445        let mut buf = Vec::new();
446        vlq_encode_unsigned(&mut buf, 1);
447        assert_eq!(&buf, b"B");
448
449        buf.clear();
450        vlq_encode_unsigned(&mut buf, 8);
451        assert_eq!(&buf, b"I");
452    }
453
454    #[test]
455    fn unsigned_roundtrip() {
456        let values: [u64; 10] = [0, 1, 8, 15, 16, 31, 32, 100, 1000, 100_000];
457        for &v in &values {
458            let mut buf = Vec::new();
459            vlq_encode_unsigned(&mut buf, v);
460            let (decoded, consumed) = vlq_decode_unsigned(&buf, 0).unwrap();
461            assert_eq!(decoded, v, "unsigned roundtrip failed for {v}");
462            assert_eq!(consumed, buf.len());
463        }
464    }
465
466    #[test]
467    fn unsigned_multi_char() {
468        let mut buf = Vec::new();
469        vlq_encode_unsigned(&mut buf, 500);
470        assert!(buf.len() > 1, "500 should need multiple chars");
471        let (decoded, _) = vlq_decode_unsigned(&buf, 0).unwrap();
472        assert_eq!(decoded, 500);
473    }
474
475    #[test]
476    fn unsigned_decode_empty() {
477        let err = vlq_decode_unsigned(b"", 0).unwrap_err();
478        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
479    }
480
481    #[test]
482    fn unsigned_decode_non_ascii() {
483        let err = vlq_decode_unsigned(&[0xC3, 0x80], 0).unwrap_err();
484        assert_eq!(
485            err,
486            DecodeError::InvalidBase64 {
487                byte: 0xC3,
488                offset: 0
489            }
490        );
491    }
492
493    #[test]
494    fn unsigned_decode_invalid_base64_char() {
495        let err = vlq_decode_unsigned(b"!", 0).unwrap_err();
496        assert_eq!(
497            err,
498            DecodeError::InvalidBase64 {
499                byte: b'!',
500                offset: 0
501            }
502        );
503    }
504
505    #[test]
506    fn unsigned_decode_overflow() {
507        // 14 continuation chars to trigger overflow
508        let err = vlq_decode_unsigned(b"ggggggggggggggA", 0).unwrap_err();
509        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
510    }
511}