Skip to main content

srcmap_codec/
lib.rs

1//! High-performance VLQ source map codec.
2//!
3//! Encodes and decodes source map mappings using the Base64 VLQ format
4//! as specified in the Source Map v3 specification (ECMA-426).
5//!
6//! # Features
7//!
8//! - **`parallel`** — enables `encode_parallel` for multi-threaded encoding via rayon.
9//!   ~1.5x faster for large maps (5K+ lines).
10//!
11//! # Examples
12//!
13//! Decode and re-encode a mappings string:
14//!
15//! ```
16//! use srcmap_codec::{decode, encode};
17//!
18//! let mappings = decode("AAAA;AACA,EAAE").unwrap();
19//! assert_eq!(mappings.len(), 2); // 2 lines
20//! assert_eq!(mappings[0][0], vec![0, 0, 0, 0]); // first segment
21//!
22//! let encoded = encode(&mappings);
23//! assert_eq!(encoded, "AAAA;AACA,EAAE");
24//! ```
25//!
26//! Low-level VLQ primitives:
27//!
28//! ```
29//! use srcmap_codec::{vlq_decode, vlq_encode};
30//!
31//! let mut buf = Vec::new();
32//! vlq_encode(&mut buf, 42);
33//!
34//! let (value, bytes_read) = vlq_decode(&buf, 0).unwrap();
35//! assert_eq!(value, 42);
36//! ```
37
38mod decode;
39mod encode;
40mod vlq;
41
42pub use decode::decode;
43pub use encode::encode;
44#[cfg(feature = "parallel")]
45pub use encode::encode_parallel;
46pub use vlq::{vlq_decode, vlq_decode_unsigned, vlq_encode, vlq_encode_unsigned};
47
48use std::fmt;
49
50/// A single source map segment stored inline (no heap allocation).
51///
52/// Segments have 1, 4, or 5 fields:
53/// - 1 field:  `[generated_column]`
54/// - 4 fields: `[generated_column, source_index, original_line, original_column]`
55/// - 5 fields: `[generated_column, source_index, original_line, original_column, name_index]`
56///
57/// Implements `Deref<Target=[i64]>` so indexing, `len()`, `is_empty()`, and
58/// iteration work identically to `Vec<i64>`.
59#[derive(Debug, Clone, Copy)]
60pub struct Segment {
61    data: [i64; 5],
62    len: u8,
63}
64
65impl std::hash::Hash for Segment {
66    #[inline]
67    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
68        (**self).hash(state);
69    }
70}
71
72impl PartialOrd for Segment {
73    #[inline]
74    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
75        Some(self.cmp(other))
76    }
77}
78
79impl Ord for Segment {
80    #[inline]
81    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
82        (**self).cmp(&**other)
83    }
84}
85
86impl Segment {
87    /// Create a 1-field segment (generated column only).
88    #[inline]
89    pub fn one(a: i64) -> Self {
90        Self {
91            data: [a, 0, 0, 0, 0],
92            len: 1,
93        }
94    }
95
96    /// Create a 4-field segment (with source info, no name).
97    #[inline]
98    pub fn four(a: i64, b: i64, c: i64, d: i64) -> Self {
99        Self {
100            data: [a, b, c, d, 0],
101            len: 4,
102        }
103    }
104
105    /// Create a 5-field segment (with source info and name).
106    #[inline]
107    pub fn five(a: i64, b: i64, c: i64, d: i64, e: i64) -> Self {
108        Self {
109            data: [a, b, c, d, e],
110            len: 5,
111        }
112    }
113
114    /// Convert to a `Vec<i64>` (for interop with APIs that expect `Vec`).
115    pub fn to_vec(&self) -> Vec<i64> {
116        self.data[..self.len as usize].to_vec()
117    }
118}
119
120impl std::ops::Deref for Segment {
121    type Target = [i64];
122
123    #[inline]
124    fn deref(&self) -> &[i64] {
125        &self.data[..self.len as usize]
126    }
127}
128
129impl<'a> IntoIterator for &'a Segment {
130    type Item = &'a i64;
131    type IntoIter = std::slice::Iter<'a, i64>;
132
133    #[inline]
134    fn into_iter(self) -> Self::IntoIter {
135        self.data[..self.len as usize].iter()
136    }
137}
138
139impl PartialEq for Segment {
140    fn eq(&self, other: &Self) -> bool {
141        **self == **other
142    }
143}
144
145impl Eq for Segment {}
146
147impl PartialEq<Vec<i64>> for Segment {
148    fn eq(&self, other: &Vec<i64>) -> bool {
149        **self == **other
150    }
151}
152
153impl PartialEq<Segment> for Vec<i64> {
154    fn eq(&self, other: &Segment) -> bool {
155        **self == **other
156    }
157}
158
159impl From<Vec<i64>> for Segment {
160    fn from(v: Vec<i64>) -> Self {
161        let mut data = [0i64; 5];
162        let len = v.len().min(5);
163        data[..len].copy_from_slice(&v[..len]);
164        Self {
165            data,
166            len: len as u8,
167        }
168    }
169}
170
171impl From<&[i64]> for Segment {
172    fn from(s: &[i64]) -> Self {
173        let mut data = [0i64; 5];
174        let len = s.len().min(5);
175        data[..len].copy_from_slice(&s[..len]);
176        Self {
177            data,
178            len: len as u8,
179        }
180    }
181}
182
183/// A source map line is a list of segments.
184pub type Line = Vec<Segment>;
185
186/// Decoded source map mappings: a list of lines, each containing segments.
187pub type SourceMapMappings = Vec<Line>;
188
189/// Errors that can occur when decoding a VLQ-encoded mappings string.
190#[derive(Debug, Clone, PartialEq, Eq)]
191pub enum DecodeError {
192    /// A byte that is not a valid base64 character was encountered.
193    InvalidBase64 { byte: u8, offset: usize },
194    /// Input ended in the middle of a VLQ sequence (continuation bit was set).
195    UnexpectedEof { offset: usize },
196    /// A VLQ value exceeded the maximum representable range.
197    VlqOverflow { offset: usize },
198    /// A segment has an invalid number of fields (only 1, 4, or 5 are valid per ECMA-426).
199    InvalidSegmentLength { fields: u8, offset: usize },
200}
201
202impl fmt::Display for DecodeError {
203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204        match self {
205            Self::InvalidBase64 { byte, offset } => {
206                write!(
207                    f,
208                    "invalid base64 character 0x{byte:02x} at offset {offset}"
209                )
210            }
211            Self::UnexpectedEof { offset } => {
212                write!(f, "unexpected end of input at offset {offset}")
213            }
214            Self::VlqOverflow { offset } => {
215                write!(f, "VLQ value overflow at offset {offset}")
216            }
217            Self::InvalidSegmentLength { fields, offset } => {
218                write!(
219                    f,
220                    "invalid segment with {fields} fields at offset {offset} (expected 1, 4, or 5)"
221                )
222            }
223        }
224    }
225}
226
227impl std::error::Error for DecodeError {}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    // --- Roundtrip tests ---
234
235    #[test]
236    fn roundtrip_empty() {
237        let decoded = decode("").unwrap();
238        assert!(decoded.is_empty());
239        assert_eq!(encode(&decoded), "");
240    }
241
242    #[test]
243    fn roundtrip_simple() {
244        let input = "AAAA;AACA";
245        let decoded = decode(input).unwrap();
246        let encoded = encode(&decoded);
247        assert_eq!(encoded, input);
248    }
249
250    #[test]
251    fn roundtrip_multiple_segments() {
252        let input = "AAAA,GAAG,EAAE;AACA";
253        let decoded = decode(input).unwrap();
254        let encoded = encode(&decoded);
255        assert_eq!(encoded, input);
256    }
257
258    #[test]
259    fn roundtrip_large_values() {
260        let mappings = vec![vec![Segment::five(1000, 50, 999, 500, 100)]];
261        let encoded = encode(&mappings);
262        let decoded = decode(&encoded).unwrap();
263        assert_eq!(decoded, mappings);
264    }
265
266    #[test]
267    fn roundtrip_negative_deltas() {
268        let mappings = vec![vec![
269            Segment::four(10, 0, 10, 10),
270            Segment::four(20, 0, 5, 5),
271        ]];
272        let encoded = encode(&mappings);
273        let decoded = decode(&encoded).unwrap();
274        assert_eq!(decoded, mappings);
275    }
276
277    // --- Decode structure tests ---
278
279    #[test]
280    fn decode_single_field_segment() {
281        let decoded = decode("A").unwrap();
282        assert_eq!(decoded.len(), 1);
283        assert_eq!(decoded[0].len(), 1);
284        assert_eq!(decoded[0][0], vec![0]);
285    }
286
287    #[test]
288    fn decode_four_field_segment() {
289        let decoded = decode("AAAA").unwrap();
290        assert_eq!(decoded.len(), 1);
291        assert_eq!(decoded[0].len(), 1);
292        assert_eq!(decoded[0][0], vec![0, 0, 0, 0]);
293    }
294
295    #[test]
296    fn decode_five_field_segment() {
297        let decoded = decode("AAAAA").unwrap();
298        assert_eq!(decoded.len(), 1);
299        assert_eq!(decoded[0].len(), 1);
300        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 0]);
301    }
302
303    #[test]
304    fn decode_negative_values() {
305        let decoded = decode("DADD").unwrap();
306        assert_eq!(decoded[0][0], vec![-1, 0, -1, -1]);
307    }
308
309    #[test]
310    fn decode_multiple_lines() {
311        let decoded = decode("AAAA;AACA;AACA").unwrap();
312        assert_eq!(decoded.len(), 3);
313    }
314
315    #[test]
316    fn decode_empty_lines() {
317        let decoded = decode("AAAA;;;AACA").unwrap();
318        assert_eq!(decoded.len(), 4);
319        assert!(decoded[1].is_empty());
320        assert!(decoded[2].is_empty());
321    }
322
323    #[test]
324    fn decode_trailing_semicolon() {
325        // Trailing `;` means an empty line follows
326        let decoded = decode("AAAA;").unwrap();
327        assert_eq!(decoded.len(), 2);
328        assert_eq!(decoded[0].len(), 1);
329        assert!(decoded[1].is_empty());
330    }
331
332    #[test]
333    fn decode_only_semicolons() {
334        let decoded = decode(";;;").unwrap();
335        assert_eq!(decoded.len(), 4);
336        for line in &decoded {
337            assert!(line.is_empty());
338        }
339    }
340
341    // --- Malformed input tests ---
342
343    #[test]
344    fn decode_invalid_ascii_char() {
345        let err = decode("AA!A").unwrap_err();
346        assert_eq!(
347            err,
348            DecodeError::InvalidBase64 {
349                byte: b'!',
350                offset: 2
351            }
352        );
353    }
354
355    #[test]
356    fn decode_non_ascii_byte() {
357        // 'À' is UTF-8 bytes [0xC3, 0x80] — both >= 128, caught by non-ASCII guard
358        let err = decode("AAÀ").unwrap_err();
359        assert_eq!(
360            err,
361            DecodeError::InvalidBase64 {
362                byte: 0xC3,
363                offset: 2
364            }
365        );
366    }
367
368    #[test]
369    fn decode_truncated_vlq() {
370        // 'g' has value 32, which has the continuation bit set — needs more chars
371        let err = decode("g").unwrap_err();
372        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
373    }
374
375    #[test]
376    fn decode_vlq_overflow() {
377        // 14 continuation characters: each 'g' = value 32 (continuation bit set)
378        // After 12 digits, shift reaches 60 which exceeds the VLQ_MAX_SHIFT limit
379        let err = decode("gggggggggggggg").unwrap_err();
380        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
381    }
382
383    #[test]
384    fn decode_truncated_segment_two_fields() {
385        // "AC" = two VLQ values (0, 1) — 2-field segment is invalid per ECMA-426
386        let err = decode("AC").unwrap_err();
387        assert!(matches!(
388            err,
389            DecodeError::InvalidSegmentLength { fields: 2, .. }
390        ));
391    }
392
393    #[test]
394    fn decode_truncated_segment_three_fields() {
395        // "ACA" = three VLQ values (0, 1, 0) — 3-field segment is invalid per ECMA-426
396        let err = decode("ACA").unwrap_err();
397        assert!(matches!(
398            err,
399            DecodeError::InvalidSegmentLength { fields: 3, .. }
400        ));
401    }
402
403    #[test]
404    fn decode_two_field_segment_followed_by_separator() {
405        // "AC,AAAA" — first segment has 2 fields, invalid
406        let err = decode("AC,AAAA").unwrap_err();
407        assert!(matches!(
408            err,
409            DecodeError::InvalidSegmentLength { fields: 2, .. }
410        ));
411    }
412
413    #[test]
414    fn decode_three_field_segment_followed_by_separator() {
415        // "ACA;AAAA" — first segment has 3 fields, invalid
416        let err = decode("ACA;AAAA").unwrap_err();
417        assert!(matches!(
418            err,
419            DecodeError::InvalidSegmentLength { fields: 3, .. }
420        ));
421    }
422
423    // --- Encode edge cases ---
424
425    #[test]
426    fn encode_empty_segments_no_dangling_comma() {
427        // Empty segments should be skipped without producing dangling commas
428        let empty = Segment::from(&[] as &[i64]);
429        let mappings = vec![vec![
430            empty,
431            Segment::four(0, 0, 0, 0),
432            empty,
433            Segment::four(2, 0, 0, 1),
434        ]];
435        let encoded = encode(&mappings);
436        assert!(
437            !encoded.contains(",,"),
438            "should not contain dangling commas"
439        );
440        // Should encode as if empty segments don't exist
441        let expected = encode(&vec![vec![
442            Segment::four(0, 0, 0, 0),
443            Segment::four(2, 0, 0, 1),
444        ]]);
445        assert_eq!(encoded, expected);
446    }
447
448    #[test]
449    fn encode_all_empty_segments() {
450        let empty = Segment::from(&[] as &[i64]);
451        let mappings = vec![vec![empty, empty, empty]];
452        let encoded = encode(&mappings);
453        assert_eq!(encoded, "");
454    }
455
456    // --- Parallel encoding tests ---
457
458    #[cfg(feature = "parallel")]
459    mod parallel_tests {
460        use super::*;
461
462        fn build_large_mappings(lines: usize, segments_per_line: usize) -> SourceMapMappings {
463            let mut mappings = Vec::with_capacity(lines);
464            for line in 0..lines {
465                let mut line_segments = Vec::with_capacity(segments_per_line);
466                for seg in 0..segments_per_line {
467                    line_segments.push(Segment::five(
468                        (seg * 10) as i64, // generated column
469                        (seg % 5) as i64,  // source index
470                        line as i64,       // original line
471                        (seg * 5) as i64,  // original column
472                        (seg % 3) as i64,  // name index
473                    ));
474                }
475                mappings.push(line_segments);
476            }
477            mappings
478        }
479
480        #[test]
481        fn parallel_matches_sequential_large() {
482            let mappings = build_large_mappings(2000, 10);
483            let sequential = encode(&mappings);
484            let parallel = encode_parallel(&mappings);
485            assert_eq!(sequential, parallel);
486        }
487
488        #[test]
489        fn parallel_matches_sequential_with_empty_lines() {
490            let mut mappings = build_large_mappings(1500, 8);
491            // Insert empty lines
492            for i in (0..mappings.len()).step_by(3) {
493                mappings[i] = Vec::new();
494            }
495            let sequential = encode(&mappings);
496            let parallel = encode_parallel(&mappings);
497            assert_eq!(sequential, parallel);
498        }
499
500        #[test]
501        fn parallel_matches_sequential_mixed_segments() {
502            let mut mappings: SourceMapMappings = Vec::with_capacity(2000);
503            for line in 0..2000 {
504                let mut line_segments = Vec::new();
505                for seg in 0..8 {
506                    if seg % 4 == 0 {
507                        line_segments.push(Segment::one((seg * 10) as i64));
508                    } else if seg % 4 == 3 {
509                        line_segments.push(Segment::five(
510                            (seg * 10) as i64,
511                            (seg % 3) as i64,
512                            line as i64,
513                            (seg * 5) as i64,
514                            (seg % 2) as i64,
515                        ));
516                    } else {
517                        line_segments.push(Segment::four(
518                            (seg * 10) as i64,
519                            (seg % 3) as i64,
520                            line as i64,
521                            (seg * 5) as i64,
522                        ));
523                    }
524                }
525                mappings.push(line_segments);
526            }
527            let sequential = encode(&mappings);
528            let parallel = encode_parallel(&mappings);
529            assert_eq!(sequential, parallel);
530        }
531
532        #[test]
533        fn parallel_roundtrip() {
534            let mappings = build_large_mappings(2000, 10);
535            let encoded = encode_parallel(&mappings);
536            let decoded = decode(&encoded).unwrap();
537            assert_eq!(decoded, mappings);
538        }
539
540        #[test]
541        fn parallel_fallback_for_small_maps() {
542            // Below threshold — should still produce correct output
543            let mappings = build_large_mappings(10, 5);
544            let sequential = encode(&mappings);
545            let parallel = encode_parallel(&mappings);
546            assert_eq!(sequential, parallel);
547        }
548    }
549
550    // --- DecodeError Display tests ---
551
552    #[test]
553    fn decode_error_display_invalid_base64() {
554        let err = DecodeError::InvalidBase64 {
555            byte: b'!',
556            offset: 2,
557        };
558        assert_eq!(err.to_string(), "invalid base64 character 0x21 at offset 2");
559    }
560
561    #[test]
562    fn decode_error_display_unexpected_eof() {
563        let err = DecodeError::UnexpectedEof { offset: 5 };
564        assert_eq!(err.to_string(), "unexpected end of input at offset 5");
565    }
566
567    #[test]
568    fn decode_error_display_overflow() {
569        let err = DecodeError::VlqOverflow { offset: 10 };
570        assert_eq!(err.to_string(), "VLQ value overflow at offset 10");
571    }
572
573    #[test]
574    fn decode_error_display_invalid_segment_length() {
575        let err = DecodeError::InvalidSegmentLength {
576            fields: 2,
577            offset: 3,
578        };
579        assert_eq!(
580            err.to_string(),
581            "invalid segment with 2 fields at offset 3 (expected 1, 4, or 5)"
582        );
583    }
584
585    // --- Decode edge case: 5-field segment with name ---
586
587    #[test]
588    fn decode_five_field_with_name_index() {
589        // Ensure the name field (5th) is decoded correctly
590        let input = "AAAAC"; // 0,0,0,0,1
591        let decoded = decode(input).unwrap();
592        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 1]);
593    }
594
595    // --- Encode edge case: encode with only 1 line ---
596
597    #[test]
598    fn encode_single_segment_one_field() {
599        let mappings = vec![vec![Segment::one(5)]];
600        let encoded = encode(&mappings);
601        let decoded = decode(&encoded).unwrap();
602        assert_eq!(decoded, mappings);
603    }
604}