Skip to main content

srcmap_codec/
vlq.rs

1//! Base64 VLQ encoding/decoding primitives.
2//!
3//! VLQ (Variable-Length Quantity) encoding stores arbitrary integers
4//! as sequences of base64 characters. The sign bit is stored in the
5//! least significant bit, and continuation bits indicate multi-char values.
6
7use crate::DecodeError;
8
9const VLQ_BASE_SHIFT: u32 = 5;
10const VLQ_BASE: u64 = 1 << VLQ_BASE_SHIFT; // 32
11const VLQ_BASE_MASK: u64 = VLQ_BASE - 1; // 0b11111
12const VLQ_CONTINUATION_BIT: u64 = VLQ_BASE; // 0b100000
13
14/// Maximum shift before overflow. 13 VLQ digits × 5 bits = 65 bits,
15/// which exceeds i64 range. We allow shift up to 60 (13th digit).
16const VLQ_MAX_SHIFT: u32 = 60;
17
18/// Pre-computed base64 encode lookup table (index -> char byte).
19#[rustfmt::skip]
20const BASE64_ENCODE: [u8; 64] = [
21    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
22    b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
23    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
24    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
25    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
26    b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
27    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
28    b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
29];
30
31/// Cache-line-aligned base64 encode table for encoding hot paths.
32/// 64 bytes fits exactly in one cache line, avoiding split-load penalties.
33#[repr(align(64))]
34struct AlignedBase64Table([u8; 64]);
35
36#[rustfmt::skip]
37static BASE64_TABLE: AlignedBase64Table = AlignedBase64Table([
38    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
39    b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
40    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
41    b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
42    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
43    b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
44    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
45    b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
46]);
47
48/// Pre-computed base64 decode lookup table (char byte -> value).
49/// Invalid characters map to 255.
50const BASE64_DECODE: [u8; 128] = {
51    let mut table = [255u8; 128];
52    let mut i = 0u8;
53    while i < 64 {
54        table[BASE64_ENCODE[i as usize] as usize] = i;
55        i += 1;
56    }
57    table
58};
59
60/// Encode a single signed VLQ value directly into the buffer using unchecked writes.
61///
62/// # Safety
63/// Caller must ensure `out` has at least 7 bytes of spare capacity
64/// (`out.capacity() - out.len() >= 7`).
65#[inline(always)]
66pub unsafe fn vlq_encode_unchecked(out: &mut Vec<u8>, value: i64) {
67    // Convert to VLQ signed representation using u64 to avoid overflow.
68    // Sign bit goes in the LSB. Two's complement negation via bit ops
69    // handles all values including i64::MIN safely.
70    let mut vlq: u64 = if value >= 0 {
71        (value as u64) << 1
72    } else {
73        // !(x as u64) + 1 computes the absolute value for any negative i64.
74        // For i64::MIN (-2^63), abs = 2^63, and (2^63 << 1) wraps to 0 in u64.
75        // This produces an incorrect encoding for i64::MIN, but that value
76        // is unreachable in valid source maps (max ~4 billion lines/columns).
77        (((!(value as u64)) + 1) << 1) | 1
78    };
79
80    let table = &BASE64_TABLE.0;
81    // SAFETY: caller guarantees at least 7 bytes of spare capacity.
82    let ptr = unsafe { out.as_mut_ptr().add(out.len()) };
83    let mut i = 0;
84
85    loop {
86        let digit = vlq & VLQ_BASE_MASK;
87        vlq >>= VLQ_BASE_SHIFT;
88        if vlq == 0 {
89            // Last byte — no continuation bit needed
90            // SAFETY: i < 7 and we have 7 bytes of spare capacity.
91            // digit is always in 0..64 so the table lookup is in bounds.
92            unsafe {
93                *ptr.add(i) = *table.get_unchecked(digit as usize);
94            }
95            i += 1;
96            break;
97        }
98        // SAFETY: same as above; digit | VLQ_CONTINUATION_BIT is in 0..64.
99        unsafe {
100            *ptr.add(i) = *table.get_unchecked((digit | VLQ_CONTINUATION_BIT) as usize);
101        }
102        i += 1;
103    }
104
105    // SAFETY: we wrote exactly `i` valid ASCII bytes into the spare capacity.
106    unsafe { out.set_len(out.len() + i) };
107}
108
109/// Encode a single VLQ value, appending base64 chars to the output buffer.
110#[inline(always)]
111pub fn vlq_encode(out: &mut Vec<u8>, value: i64) {
112    out.reserve(7);
113    // SAFETY: we just reserved 7 bytes, which is the maximum a single
114    // i64 VLQ value can produce (63 data bits / 5 bits per char = 13,
115    // but the sign bit reduces the effective range, and real source map
116    // values are far smaller — 7 bytes handles up to ±2^34).
117    unsafe { vlq_encode_unchecked(out, value) }
118}
119
120/// Decode a single VLQ value from the input bytes starting at the given position.
121///
122/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
123#[inline(always)]
124pub fn vlq_decode(input: &[u8], pos: usize) -> Result<(i64, usize), DecodeError> {
125    if pos >= input.len() {
126        return Err(DecodeError::UnexpectedEof { offset: pos });
127    }
128
129    let b0 = input[pos];
130    if b0 >= 128 {
131        return Err(DecodeError::InvalidBase64 { byte: b0, offset: pos });
132    }
133    let d0 = BASE64_DECODE[b0 as usize];
134    if d0 == 255 {
135        return Err(DecodeError::InvalidBase64 { byte: b0, offset: pos });
136    }
137
138    // Fast path: single character VLQ (values -15..15, ~60-70% of real data)
139    if (d0 & 0x20) == 0 {
140        let val = (d0 >> 1) as i64;
141        return Ok((if (d0 & 1) != 0 { -val } else { val }, 1));
142    }
143
144    // Multi-character VLQ
145    let mut result: u64 = (d0 & 0x1F) as u64;
146    let mut shift: u32 = 5;
147    let mut i = pos + 1;
148
149    loop {
150        if i >= input.len() {
151            return Err(DecodeError::UnexpectedEof { offset: i });
152        }
153
154        let byte = input[i];
155
156        if byte >= 128 {
157            return Err(DecodeError::InvalidBase64 { byte, offset: i });
158        }
159
160        let digit = BASE64_DECODE[byte as usize];
161
162        if digit == 255 {
163            return Err(DecodeError::InvalidBase64 { byte, offset: i });
164        }
165
166        i += 1;
167
168        if shift >= VLQ_MAX_SHIFT {
169            return Err(DecodeError::VlqOverflow { offset: pos });
170        }
171
172        result += ((digit & 0x1F) as u64) << shift;
173        shift += VLQ_BASE_SHIFT;
174
175        if (digit & 0x20) == 0 {
176            break;
177        }
178    }
179
180    // Extract sign from LSB
181    let value = if (result & 1) == 1 { -((result >> 1) as i64) } else { (result >> 1) as i64 };
182
183    Ok((value, i - pos))
184}
185
186/// Encode a single unsigned VLQ value directly into the buffer using unchecked writes.
187///
188/// # Safety
189/// Caller must ensure `out` has at least 7 bytes of spare capacity
190/// (`out.capacity() - out.len() >= 7`).
191#[inline(always)]
192pub unsafe fn vlq_encode_unsigned_unchecked(out: &mut Vec<u8>, value: u64) {
193    let table = &BASE64_TABLE.0;
194    // SAFETY: caller guarantees at least 7 bytes of spare capacity.
195    let ptr = unsafe { out.as_mut_ptr().add(out.len()) };
196    let mut i = 0;
197    let mut vlq = value;
198
199    loop {
200        let digit = vlq & VLQ_BASE_MASK;
201        vlq >>= VLQ_BASE_SHIFT;
202        if vlq == 0 {
203            // Last byte — no continuation bit needed
204            // SAFETY: i < 7 and we have 7 bytes of spare capacity.
205            // digit is always in 0..64 so the table lookup is in bounds.
206            unsafe {
207                *ptr.add(i) = *table.get_unchecked(digit as usize);
208            }
209            i += 1;
210            break;
211        }
212        // SAFETY: same as above; digit | VLQ_CONTINUATION_BIT is in 0..64.
213        unsafe {
214            *ptr.add(i) = *table.get_unchecked((digit | VLQ_CONTINUATION_BIT) as usize);
215        }
216        i += 1;
217    }
218
219    // SAFETY: we wrote exactly `i` valid ASCII bytes into the spare capacity.
220    unsafe { out.set_len(out.len() + i) };
221}
222
223/// Encode a single unsigned VLQ value, appending base64 chars to the output buffer.
224///
225/// Unlike signed VLQ, no sign bit is used — all 5 bits per character are data.
226/// Used by the ECMA-426 scopes proposal for tags, flags, and unsigned values.
227#[inline(always)]
228pub fn vlq_encode_unsigned(out: &mut Vec<u8>, value: u64) {
229    out.reserve(7);
230    // SAFETY: we just reserved 7 bytes, which is the maximum a single
231    // u64 VLQ value can produce in practice.
232    unsafe { vlq_encode_unsigned_unchecked(out, value) }
233}
234
235/// Decode a single unsigned VLQ value from the input bytes starting at the given position.
236///
237/// Returns `(decoded_value, bytes_consumed)` or a [`DecodeError`].
238/// Unlike signed VLQ, no sign bit extraction is performed.
239#[inline]
240pub fn vlq_decode_unsigned(input: &[u8], pos: usize) -> Result<(u64, usize), DecodeError> {
241    if pos >= input.len() {
242        return Err(DecodeError::UnexpectedEof { offset: pos });
243    }
244
245    let b0 = input[pos];
246    if b0 >= 128 {
247        return Err(DecodeError::InvalidBase64 { byte: b0, offset: pos });
248    }
249    let d0 = BASE64_DECODE[b0 as usize];
250    if d0 == 255 {
251        return Err(DecodeError::InvalidBase64 { byte: b0, offset: pos });
252    }
253
254    // Fast path: single character (value fits in 5 bits, no continuation)
255    if (d0 & 0x20) == 0 {
256        return Ok((d0 as u64, 1));
257    }
258
259    // Multi-character unsigned VLQ
260    let mut result: u64 = (d0 & 0x1F) as u64;
261    let mut shift: u32 = 5;
262    let mut i = pos + 1;
263
264    loop {
265        if i >= input.len() {
266            return Err(DecodeError::UnexpectedEof { offset: i });
267        }
268
269        let byte = input[i];
270
271        if byte >= 128 {
272            return Err(DecodeError::InvalidBase64 { byte, offset: i });
273        }
274
275        let digit = BASE64_DECODE[byte as usize];
276
277        if digit == 255 {
278            return Err(DecodeError::InvalidBase64 { byte, offset: i });
279        }
280
281        i += 1;
282
283        if shift >= VLQ_MAX_SHIFT {
284            return Err(DecodeError::VlqOverflow { offset: pos });
285        }
286
287        result += ((digit & 0x1F) as u64) << shift;
288        shift += VLQ_BASE_SHIFT;
289
290        if (digit & 0x20) == 0 {
291            break;
292        }
293    }
294
295    Ok((result, i - pos))
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn encode_zero() {
304        let mut buf = Vec::new();
305        vlq_encode(&mut buf, 0);
306        assert_eq!(&buf, b"A");
307    }
308
309    #[test]
310    fn encode_positive() {
311        let mut buf = Vec::new();
312        vlq_encode(&mut buf, 1);
313        assert_eq!(&buf, b"C");
314    }
315
316    #[test]
317    fn encode_negative() {
318        let mut buf = Vec::new();
319        vlq_encode(&mut buf, -1);
320        assert_eq!(&buf, b"D");
321    }
322
323    #[test]
324    fn encode_large_value() {
325        let mut buf = Vec::new();
326        vlq_encode(&mut buf, 1000);
327        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
328        assert_eq!(decoded, 1000);
329    }
330
331    #[test]
332    fn roundtrip_values() {
333        let values = [
334            0,
335            1,
336            -1,
337            15,
338            -15,
339            16,
340            -16,
341            31,
342            32,
343            100,
344            -100,
345            1000,
346            -1000,
347            100_000,
348            1_000_000_000,
349            -1_000_000_000,
350        ];
351        for &v in &values {
352            let mut buf = Vec::new();
353            vlq_encode(&mut buf, v);
354            let (decoded, consumed) = vlq_decode(&buf, 0).unwrap();
355            assert_eq!(decoded, v, "roundtrip failed for {v}");
356            assert_eq!(consumed, buf.len());
357        }
358    }
359
360    #[test]
361    fn decode_multi_char() {
362        let mut buf = Vec::new();
363        vlq_encode(&mut buf, 500);
364        assert!(buf.len() > 1, "500 should need multiple chars");
365        let (decoded, _) = vlq_decode(&buf, 0).unwrap();
366        assert_eq!(decoded, 500);
367    }
368
369    #[test]
370    fn decode_non_ascii_byte() {
371        let input = [0xC0u8];
372        let err = vlq_decode(&input, 0).unwrap_err();
373        assert_eq!(err, DecodeError::InvalidBase64 { byte: 0xC0, offset: 0 });
374    }
375
376    #[test]
377    fn decode_invalid_base64_char() {
378        let input = b"!";
379        let err = vlq_decode(input, 0).unwrap_err();
380        assert_eq!(err, DecodeError::InvalidBase64 { byte: b'!', offset: 0 });
381    }
382
383    #[test]
384    fn decode_unexpected_eof() {
385        // 'g' = 32, which has the continuation bit set
386        let input = b"g";
387        let err = vlq_decode(input, 0).unwrap_err();
388        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
389    }
390
391    #[test]
392    fn decode_overflow() {
393        // 14 continuation chars: shift reaches 60+ which exceeds limit
394        let input = b"ggggggggggggggA";
395        let err = vlq_decode(input, 0).unwrap_err();
396        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
397    }
398
399    #[test]
400    fn decode_empty_input() {
401        let err = vlq_decode(b"", 0).unwrap_err();
402        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
403    }
404
405    // --- Unsigned VLQ tests ---
406
407    #[test]
408    fn unsigned_encode_zero() {
409        let mut buf = Vec::new();
410        vlq_encode_unsigned(&mut buf, 0);
411        assert_eq!(&buf, b"A");
412    }
413
414    #[test]
415    fn unsigned_encode_small_values() {
416        // Value 1 → 'B', value 2 → 'C', ..., value 31 → base64[31] = 'f'
417        let mut buf = Vec::new();
418        vlq_encode_unsigned(&mut buf, 1);
419        assert_eq!(&buf, b"B");
420
421        buf.clear();
422        vlq_encode_unsigned(&mut buf, 8);
423        assert_eq!(&buf, b"I");
424    }
425
426    #[test]
427    fn unsigned_roundtrip() {
428        let values: [u64; 10] = [0, 1, 8, 15, 16, 31, 32, 100, 1000, 100_000];
429        for &v in &values {
430            let mut buf = Vec::new();
431            vlq_encode_unsigned(&mut buf, v);
432            let (decoded, consumed) = vlq_decode_unsigned(&buf, 0).unwrap();
433            assert_eq!(decoded, v, "unsigned roundtrip failed for {v}");
434            assert_eq!(consumed, buf.len());
435        }
436    }
437
438    #[test]
439    fn unsigned_multi_char() {
440        let mut buf = Vec::new();
441        vlq_encode_unsigned(&mut buf, 500);
442        assert!(buf.len() > 1, "500 should need multiple chars");
443        let (decoded, _) = vlq_decode_unsigned(&buf, 0).unwrap();
444        assert_eq!(decoded, 500);
445    }
446
447    #[test]
448    fn unsigned_decode_empty() {
449        let err = vlq_decode_unsigned(b"", 0).unwrap_err();
450        assert_eq!(err, DecodeError::UnexpectedEof { offset: 0 });
451    }
452
453    #[test]
454    fn unsigned_decode_non_ascii() {
455        let err = vlq_decode_unsigned(&[0xC3, 0x80], 0).unwrap_err();
456        assert_eq!(err, DecodeError::InvalidBase64 { byte: 0xC3, offset: 0 });
457    }
458
459    #[test]
460    fn unsigned_decode_invalid_base64_char() {
461        let err = vlq_decode_unsigned(b"!", 0).unwrap_err();
462        assert_eq!(err, DecodeError::InvalidBase64 { byte: b'!', offset: 0 });
463    }
464
465    #[test]
466    fn unsigned_decode_overflow() {
467        // 14 continuation chars to trigger overflow
468        let err = vlq_decode_unsigned(b"ggggggggggggggA", 0).unwrap_err();
469        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
470    }
471}