Skip to main content

srcmap_codec/
lib.rs

1//! High-performance VLQ source map codec.
2//!
3//! Encodes and decodes source map mappings using the Base64 VLQ format
4//! as specified in the Source Map v3 specification (ECMA-426).
5//!
6//! # Features
7//!
8//! - **`parallel`** — enables `encode_parallel` for multi-threaded encoding via rayon.
9//!   ~1.5x faster for large maps (5K+ lines).
10//!
11//! # Examples
12//!
13//! Decode and re-encode a mappings string:
14//!
15//! ```
16//! use srcmap_codec::{decode, encode};
17//!
18//! let mappings = decode("AAAA;AACA,EAAE").unwrap();
19//! assert_eq!(mappings.len(), 2); // 2 lines
20//! assert_eq!(mappings[0][0], vec![0, 0, 0, 0]); // first segment
21//!
22//! let encoded = encode(&mappings);
23//! assert_eq!(encoded, "AAAA;AACA,EAAE");
24//! ```
25//!
26//! Low-level VLQ primitives:
27//!
28//! ```
29//! use srcmap_codec::{vlq_decode, vlq_encode};
30//!
31//! let mut buf = Vec::new();
32//! vlq_encode(&mut buf, 42);
33//!
34//! let (value, bytes_read) = vlq_decode(&buf, 0).unwrap();
35//! assert_eq!(value, 42);
36//! ```
37
38mod decode;
39mod encode;
40mod vlq;
41
42pub use decode::decode;
43pub use encode::encode;
44#[cfg(feature = "parallel")]
45pub use encode::encode_parallel;
46pub use vlq::{vlq_decode, vlq_decode_unsigned, vlq_encode, vlq_encode_unsigned};
47
48use std::fmt;
49
50/// A single source map segment stored inline (no heap allocation).
51///
52/// Segments have 1, 4, or 5 fields:
53/// - 1 field:  `[generated_column]`
54/// - 4 fields: `[generated_column, source_index, original_line, original_column]`
55/// - 5 fields: `[generated_column, source_index, original_line, original_column, name_index]`
56///
57/// Implements `Deref<Target=[i64]>` so indexing, `len()`, `is_empty()`, and
58/// iteration work identically to `Vec<i64>`.
59#[derive(Debug, Clone, Copy)]
60pub struct Segment {
61    data: [i64; 5],
62    len: u8,
63}
64
65impl std::hash::Hash for Segment {
66    #[inline]
67    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
68        (**self).hash(state);
69    }
70}
71
72impl PartialOrd for Segment {
73    #[inline]
74    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
75        Some(self.cmp(other))
76    }
77}
78
79impl Ord for Segment {
80    #[inline]
81    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
82        (**self).cmp(&**other)
83    }
84}
85
86impl Segment {
87    /// Create a 1-field segment (generated column only).
88    #[inline]
89    pub fn one(a: i64) -> Self {
90        Self {
91            data: [a, 0, 0, 0, 0],
92            len: 1,
93        }
94    }
95
96    /// Create a 4-field segment (with source info, no name).
97    #[inline]
98    pub fn four(a: i64, b: i64, c: i64, d: i64) -> Self {
99        Self {
100            data: [a, b, c, d, 0],
101            len: 4,
102        }
103    }
104
105    /// Create a 5-field segment (with source info and name).
106    #[inline]
107    pub fn five(a: i64, b: i64, c: i64, d: i64, e: i64) -> Self {
108        Self {
109            data: [a, b, c, d, e],
110            len: 5,
111        }
112    }
113
114    /// Convert to a `Vec<i64>` (for interop with APIs that expect `Vec`).
115    pub fn to_vec(&self) -> Vec<i64> {
116        self.data[..self.len as usize].to_vec()
117    }
118}
119
120impl std::ops::Deref for Segment {
121    type Target = [i64];
122
123    #[inline]
124    fn deref(&self) -> &[i64] {
125        &self.data[..self.len as usize]
126    }
127}
128
129impl<'a> IntoIterator for &'a Segment {
130    type Item = &'a i64;
131    type IntoIter = std::slice::Iter<'a, i64>;
132
133    #[inline]
134    fn into_iter(self) -> Self::IntoIter {
135        self.data[..self.len as usize].iter()
136    }
137}
138
139impl PartialEq for Segment {
140    fn eq(&self, other: &Self) -> bool {
141        **self == **other
142    }
143}
144
145impl Eq for Segment {}
146
147impl PartialEq<Vec<i64>> for Segment {
148    fn eq(&self, other: &Vec<i64>) -> bool {
149        **self == **other
150    }
151}
152
153impl PartialEq<Segment> for Vec<i64> {
154    fn eq(&self, other: &Segment) -> bool {
155        **self == **other
156    }
157}
158
159impl From<Vec<i64>> for Segment {
160    fn from(v: Vec<i64>) -> Self {
161        let mut data = [0i64; 5];
162        let len = v.len().min(5);
163        data[..len].copy_from_slice(&v[..len]);
164        Self {
165            data,
166            len: len as u8,
167        }
168    }
169}
170
171impl From<&[i64]> for Segment {
172    fn from(s: &[i64]) -> Self {
173        let mut data = [0i64; 5];
174        let len = s.len().min(5);
175        data[..len].copy_from_slice(&s[..len]);
176        Self {
177            data,
178            len: len as u8,
179        }
180    }
181}
182
183/// A source map line is a list of segments.
184pub type Line = Vec<Segment>;
185
186/// Decoded source map mappings: a list of lines, each containing segments.
187pub type SourceMapMappings = Vec<Line>;
188
189/// Errors that can occur when decoding a VLQ-encoded mappings string.
190#[derive(Debug, Clone, PartialEq, Eq)]
191pub enum DecodeError {
192    /// A byte that is not a valid base64 character was encountered.
193    InvalidBase64 { byte: u8, offset: usize },
194    /// Input ended in the middle of a VLQ sequence (continuation bit was set).
195    UnexpectedEof { offset: usize },
196    /// A VLQ value exceeded the maximum representable range.
197    VlqOverflow { offset: usize },
198}
199
200impl fmt::Display for DecodeError {
201    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
202        match self {
203            Self::InvalidBase64 { byte, offset } => {
204                write!(
205                    f,
206                    "invalid base64 character 0x{byte:02x} at offset {offset}"
207                )
208            }
209            Self::UnexpectedEof { offset } => {
210                write!(f, "unexpected end of input at offset {offset}")
211            }
212            Self::VlqOverflow { offset } => {
213                write!(f, "VLQ value overflow at offset {offset}")
214            }
215        }
216    }
217}
218
219impl std::error::Error for DecodeError {}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    // --- Roundtrip tests ---
226
227    #[test]
228    fn roundtrip_empty() {
229        let decoded = decode("").unwrap();
230        assert!(decoded.is_empty());
231        assert_eq!(encode(&decoded), "");
232    }
233
234    #[test]
235    fn roundtrip_simple() {
236        let input = "AAAA;AACA";
237        let decoded = decode(input).unwrap();
238        let encoded = encode(&decoded);
239        assert_eq!(encoded, input);
240    }
241
242    #[test]
243    fn roundtrip_multiple_segments() {
244        let input = "AAAA,GAAG,EAAE;AACA";
245        let decoded = decode(input).unwrap();
246        let encoded = encode(&decoded);
247        assert_eq!(encoded, input);
248    }
249
250    #[test]
251    fn roundtrip_large_values() {
252        let mappings = vec![vec![Segment::five(1000, 50, 999, 500, 100)]];
253        let encoded = encode(&mappings);
254        let decoded = decode(&encoded).unwrap();
255        assert_eq!(decoded, mappings);
256    }
257
258    #[test]
259    fn roundtrip_negative_deltas() {
260        let mappings = vec![vec![
261            Segment::four(10, 0, 10, 10),
262            Segment::four(20, 0, 5, 5),
263        ]];
264        let encoded = encode(&mappings);
265        let decoded = decode(&encoded).unwrap();
266        assert_eq!(decoded, mappings);
267    }
268
269    // --- Decode structure tests ---
270
271    #[test]
272    fn decode_single_field_segment() {
273        let decoded = decode("A").unwrap();
274        assert_eq!(decoded.len(), 1);
275        assert_eq!(decoded[0].len(), 1);
276        assert_eq!(decoded[0][0], vec![0]);
277    }
278
279    #[test]
280    fn decode_four_field_segment() {
281        let decoded = decode("AAAA").unwrap();
282        assert_eq!(decoded.len(), 1);
283        assert_eq!(decoded[0].len(), 1);
284        assert_eq!(decoded[0][0], vec![0, 0, 0, 0]);
285    }
286
287    #[test]
288    fn decode_five_field_segment() {
289        let decoded = decode("AAAAA").unwrap();
290        assert_eq!(decoded.len(), 1);
291        assert_eq!(decoded[0].len(), 1);
292        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 0]);
293    }
294
295    #[test]
296    fn decode_negative_values() {
297        let decoded = decode("DADD").unwrap();
298        assert_eq!(decoded[0][0], vec![-1, 0, -1, -1]);
299    }
300
301    #[test]
302    fn decode_multiple_lines() {
303        let decoded = decode("AAAA;AACA;AACA").unwrap();
304        assert_eq!(decoded.len(), 3);
305    }
306
307    #[test]
308    fn decode_empty_lines() {
309        let decoded = decode("AAAA;;;AACA").unwrap();
310        assert_eq!(decoded.len(), 4);
311        assert!(decoded[1].is_empty());
312        assert!(decoded[2].is_empty());
313    }
314
315    #[test]
316    fn decode_trailing_semicolon() {
317        // Trailing `;` means an empty line follows
318        let decoded = decode("AAAA;").unwrap();
319        assert_eq!(decoded.len(), 2);
320        assert_eq!(decoded[0].len(), 1);
321        assert!(decoded[1].is_empty());
322    }
323
324    #[test]
325    fn decode_only_semicolons() {
326        let decoded = decode(";;;").unwrap();
327        assert_eq!(decoded.len(), 4);
328        for line in &decoded {
329            assert!(line.is_empty());
330        }
331    }
332
333    // --- Malformed input tests ---
334
335    #[test]
336    fn decode_invalid_ascii_char() {
337        let err = decode("AA!A").unwrap_err();
338        assert_eq!(
339            err,
340            DecodeError::InvalidBase64 {
341                byte: b'!',
342                offset: 2
343            }
344        );
345    }
346
347    #[test]
348    fn decode_non_ascii_byte() {
349        // 'À' is UTF-8 bytes [0xC3, 0x80] — both >= 128, caught by non-ASCII guard
350        let err = decode("AAÀ").unwrap_err();
351        assert_eq!(
352            err,
353            DecodeError::InvalidBase64 {
354                byte: 0xC3,
355                offset: 2
356            }
357        );
358    }
359
360    #[test]
361    fn decode_truncated_vlq() {
362        // 'g' has value 32, which has the continuation bit set — needs more chars
363        let err = decode("g").unwrap_err();
364        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
365    }
366
367    #[test]
368    fn decode_vlq_overflow() {
369        // 14 continuation characters: each 'g' = value 32 (continuation bit set)
370        // After 12 digits, shift reaches 60 which exceeds the VLQ_MAX_SHIFT limit
371        let err = decode("gggggggggggggg").unwrap_err();
372        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
373    }
374
375    #[test]
376    fn decode_truncated_segment() {
377        // "AC" = two VLQ values (0, 1) — starts a 4-field segment but only has 2 values
378        let err = decode("AC").unwrap_err();
379        assert!(matches!(
380            err,
381            DecodeError::UnexpectedEof { .. } | DecodeError::InvalidBase64 { .. }
382        ));
383    }
384
385    // --- Encode edge cases ---
386
387    #[test]
388    fn encode_empty_segments_no_dangling_comma() {
389        // Empty segments should be skipped without producing dangling commas
390        let empty = Segment::from(&[] as &[i64]);
391        let mappings = vec![vec![
392            empty,
393            Segment::four(0, 0, 0, 0),
394            empty,
395            Segment::four(2, 0, 0, 1),
396        ]];
397        let encoded = encode(&mappings);
398        assert!(
399            !encoded.contains(",,"),
400            "should not contain dangling commas"
401        );
402        // Should encode as if empty segments don't exist
403        let expected = encode(&vec![vec![
404            Segment::four(0, 0, 0, 0),
405            Segment::four(2, 0, 0, 1),
406        ]]);
407        assert_eq!(encoded, expected);
408    }
409
410    #[test]
411    fn encode_all_empty_segments() {
412        let empty = Segment::from(&[] as &[i64]);
413        let mappings = vec![vec![empty, empty, empty]];
414        let encoded = encode(&mappings);
415        assert_eq!(encoded, "");
416    }
417
418    // --- Parallel encoding tests ---
419
420    #[cfg(feature = "parallel")]
421    mod parallel_tests {
422        use super::*;
423
424        fn build_large_mappings(lines: usize, segments_per_line: usize) -> SourceMapMappings {
425            let mut mappings = Vec::with_capacity(lines);
426            for line in 0..lines {
427                let mut line_segments = Vec::with_capacity(segments_per_line);
428                for seg in 0..segments_per_line {
429                    line_segments.push(Segment::five(
430                        (seg * 10) as i64, // generated column
431                        (seg % 5) as i64,  // source index
432                        line as i64,       // original line
433                        (seg * 5) as i64,  // original column
434                        (seg % 3) as i64,  // name index
435                    ));
436                }
437                mappings.push(line_segments);
438            }
439            mappings
440        }
441
442        #[test]
443        fn parallel_matches_sequential_large() {
444            let mappings = build_large_mappings(2000, 10);
445            let sequential = encode(&mappings);
446            let parallel = encode_parallel(&mappings);
447            assert_eq!(sequential, parallel);
448        }
449
450        #[test]
451        fn parallel_matches_sequential_with_empty_lines() {
452            let mut mappings = build_large_mappings(1500, 8);
453            // Insert empty lines
454            for i in (0..mappings.len()).step_by(3) {
455                mappings[i] = Vec::new();
456            }
457            let sequential = encode(&mappings);
458            let parallel = encode_parallel(&mappings);
459            assert_eq!(sequential, parallel);
460        }
461
462        #[test]
463        fn parallel_matches_sequential_mixed_segments() {
464            let mut mappings: SourceMapMappings = Vec::with_capacity(2000);
465            for line in 0..2000 {
466                let mut line_segments = Vec::new();
467                for seg in 0..8 {
468                    if seg % 4 == 0 {
469                        line_segments.push(Segment::one((seg * 10) as i64));
470                    } else if seg % 4 == 3 {
471                        line_segments.push(Segment::five(
472                            (seg * 10) as i64,
473                            (seg % 3) as i64,
474                            line as i64,
475                            (seg * 5) as i64,
476                            (seg % 2) as i64,
477                        ));
478                    } else {
479                        line_segments.push(Segment::four(
480                            (seg * 10) as i64,
481                            (seg % 3) as i64,
482                            line as i64,
483                            (seg * 5) as i64,
484                        ));
485                    }
486                }
487                mappings.push(line_segments);
488            }
489            let sequential = encode(&mappings);
490            let parallel = encode_parallel(&mappings);
491            assert_eq!(sequential, parallel);
492        }
493
494        #[test]
495        fn parallel_roundtrip() {
496            let mappings = build_large_mappings(2000, 10);
497            let encoded = encode_parallel(&mappings);
498            let decoded = decode(&encoded).unwrap();
499            assert_eq!(decoded, mappings);
500        }
501
502        #[test]
503        fn parallel_fallback_for_small_maps() {
504            // Below threshold — should still produce correct output
505            let mappings = build_large_mappings(10, 5);
506            let sequential = encode(&mappings);
507            let parallel = encode_parallel(&mappings);
508            assert_eq!(sequential, parallel);
509        }
510    }
511
512    // --- DecodeError Display tests ---
513
514    #[test]
515    fn decode_error_display_invalid_base64() {
516        let err = DecodeError::InvalidBase64 {
517            byte: b'!',
518            offset: 2,
519        };
520        assert_eq!(err.to_string(), "invalid base64 character 0x21 at offset 2");
521    }
522
523    #[test]
524    fn decode_error_display_unexpected_eof() {
525        let err = DecodeError::UnexpectedEof { offset: 5 };
526        assert_eq!(err.to_string(), "unexpected end of input at offset 5");
527    }
528
529    #[test]
530    fn decode_error_display_overflow() {
531        let err = DecodeError::VlqOverflow { offset: 10 };
532        assert_eq!(err.to_string(), "VLQ value overflow at offset 10");
533    }
534
535    // --- Decode edge case: 5-field segment with name ---
536
537    #[test]
538    fn decode_five_field_with_name_index() {
539        // Ensure the name field (5th) is decoded correctly
540        let input = "AAAAC"; // 0,0,0,0,1
541        let decoded = decode(input).unwrap();
542        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 1]);
543    }
544
545    // --- Encode edge case: encode with only 1 line ---
546
547    #[test]
548    fn encode_single_segment_one_field() {
549        let mappings = vec![vec![Segment::one(5)]];
550        let encoded = encode(&mappings);
551        let decoded = decode(&encoded).unwrap();
552        assert_eq!(decoded, mappings);
553    }
554}