Skip to main content

srcmap_codec/
lib.rs

1//! High-performance VLQ source map codec.
2//!
3//! Encodes and decodes source map mappings using the Base64 VLQ format
4//! as specified in the Source Map v3 specification (ECMA-426).
5//!
6//! # Features
7//!
8//! - **`parallel`** — enables `encode_parallel` for multi-threaded encoding via rayon.
9//!   ~1.5x faster for large maps (5K+ lines).
10//!
11//! # Examples
12//!
13//! Decode and re-encode a mappings string:
14//!
15//! ```
16//! use srcmap_codec::{decode, encode};
17//!
18//! let mappings = decode("AAAA;AACA,EAAE").unwrap();
19//! assert_eq!(mappings.len(), 2); // 2 lines
20//! assert_eq!(mappings[0][0], vec![0, 0, 0, 0]); // first segment
21//!
22//! let encoded = encode(&mappings);
23//! assert_eq!(encoded, "AAAA;AACA,EAAE");
24//! ```
25//!
26//! Low-level VLQ primitives:
27//!
28//! ```
29//! use srcmap_codec::{vlq_decode, vlq_encode};
30//!
31//! let mut buf = Vec::new();
32//! vlq_encode(&mut buf, 42);
33//!
34//! let (value, bytes_read) = vlq_decode(&buf, 0).unwrap();
35//! assert_eq!(value, 42);
36//! ```
37
38mod decode;
39mod encode;
40mod vlq;
41
42pub use decode::decode;
43pub use encode::encode;
44#[cfg(feature = "parallel")]
45pub use encode::encode_parallel;
46pub use vlq::{
47    vlq_decode, vlq_decode_unsigned, vlq_encode, vlq_encode_unchecked, vlq_encode_unsigned,
48    vlq_encode_unsigned_unchecked,
49};
50
51use std::fmt;
52
53/// A single source map segment stored inline (no heap allocation).
54///
55/// Segments have 1, 4, or 5 fields:
56/// - 1 field:  `[generated_column]`
57/// - 4 fields: `[generated_column, source_index, original_line, original_column]`
58/// - 5 fields: `[generated_column, source_index, original_line, original_column, name_index]`
59///
60/// Implements `Deref<Target=[i64]>` so indexing, `len()`, `is_empty()`, and
61/// iteration work identically to `Vec<i64>`.
62#[derive(Debug, Clone, Copy)]
63pub struct Segment {
64    data: [i64; 5],
65    len: u8,
66}
67
68impl std::hash::Hash for Segment {
69    #[inline]
70    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
71        (**self).hash(state);
72    }
73}
74
75impl PartialOrd for Segment {
76    #[inline]
77    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
78        Some(self.cmp(other))
79    }
80}
81
82impl Ord for Segment {
83    #[inline]
84    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
85        (**self).cmp(&**other)
86    }
87}
88
89impl Segment {
90    /// Create a 1-field segment (generated column only).
91    #[inline]
92    pub fn one(a: i64) -> Self {
93        Self {
94            data: [a, 0, 0, 0, 0],
95            len: 1,
96        }
97    }
98
99    /// Create a 4-field segment (with source info, no name).
100    #[inline]
101    pub fn four(a: i64, b: i64, c: i64, d: i64) -> Self {
102        Self {
103            data: [a, b, c, d, 0],
104            len: 4,
105        }
106    }
107
108    /// Create a 5-field segment (with source info and name).
109    #[inline]
110    pub fn five(a: i64, b: i64, c: i64, d: i64, e: i64) -> Self {
111        Self {
112            data: [a, b, c, d, e],
113            len: 5,
114        }
115    }
116
117    /// Convert to a `Vec<i64>` (for interop with APIs that expect `Vec`).
118    pub fn to_vec(&self) -> Vec<i64> {
119        self.data[..self.len as usize].to_vec()
120    }
121}
122
123impl std::ops::Deref for Segment {
124    type Target = [i64];
125
126    #[inline]
127    fn deref(&self) -> &[i64] {
128        &self.data[..self.len as usize]
129    }
130}
131
132impl<'a> IntoIterator for &'a Segment {
133    type Item = &'a i64;
134    type IntoIter = std::slice::Iter<'a, i64>;
135
136    #[inline]
137    fn into_iter(self) -> Self::IntoIter {
138        self.data[..self.len as usize].iter()
139    }
140}
141
142impl PartialEq for Segment {
143    fn eq(&self, other: &Self) -> bool {
144        **self == **other
145    }
146}
147
148impl Eq for Segment {}
149
150impl PartialEq<Vec<i64>> for Segment {
151    fn eq(&self, other: &Vec<i64>) -> bool {
152        **self == **other
153    }
154}
155
156impl PartialEq<Segment> for Vec<i64> {
157    fn eq(&self, other: &Segment) -> bool {
158        **self == **other
159    }
160}
161
162impl From<Vec<i64>> for Segment {
163    fn from(v: Vec<i64>) -> Self {
164        let mut data = [0i64; 5];
165        let len = v.len().min(5);
166        data[..len].copy_from_slice(&v[..len]);
167        Self {
168            data,
169            len: len as u8,
170        }
171    }
172}
173
174impl From<&[i64]> for Segment {
175    fn from(s: &[i64]) -> Self {
176        let mut data = [0i64; 5];
177        let len = s.len().min(5);
178        data[..len].copy_from_slice(&s[..len]);
179        Self {
180            data,
181            len: len as u8,
182        }
183    }
184}
185
186/// A source map line is a list of segments.
187pub type Line = Vec<Segment>;
188
189/// Decoded source map mappings: a list of lines, each containing segments.
190pub type SourceMapMappings = Vec<Line>;
191
192/// Errors that can occur when decoding a VLQ-encoded mappings string.
193#[derive(Debug, Clone, PartialEq, Eq)]
194pub enum DecodeError {
195    /// A byte that is not a valid base64 character was encountered.
196    InvalidBase64 { byte: u8, offset: usize },
197    /// Input ended in the middle of a VLQ sequence (continuation bit was set).
198    UnexpectedEof { offset: usize },
199    /// A VLQ value exceeded the maximum representable range.
200    VlqOverflow { offset: usize },
201    /// A segment has an invalid number of fields (only 1, 4, or 5 are valid per ECMA-426).
202    InvalidSegmentLength { fields: u8, offset: usize },
203}
204
205impl fmt::Display for DecodeError {
206    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
207        match self {
208            Self::InvalidBase64 { byte, offset } => {
209                write!(
210                    f,
211                    "invalid base64 character 0x{byte:02x} at offset {offset}"
212                )
213            }
214            Self::UnexpectedEof { offset } => {
215                write!(f, "unexpected end of input at offset {offset}")
216            }
217            Self::VlqOverflow { offset } => {
218                write!(f, "VLQ value overflow at offset {offset}")
219            }
220            Self::InvalidSegmentLength { fields, offset } => {
221                write!(
222                    f,
223                    "invalid segment with {fields} fields at offset {offset} (expected 1, 4, or 5)"
224                )
225            }
226        }
227    }
228}
229
230impl std::error::Error for DecodeError {}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    // --- Roundtrip tests ---
237
238    #[test]
239    fn roundtrip_empty() {
240        let decoded = decode("").unwrap();
241        assert!(decoded.is_empty());
242        assert_eq!(encode(&decoded), "");
243    }
244
245    #[test]
246    fn roundtrip_simple() {
247        let input = "AAAA;AACA";
248        let decoded = decode(input).unwrap();
249        let encoded = encode(&decoded);
250        assert_eq!(encoded, input);
251    }
252
253    #[test]
254    fn roundtrip_multiple_segments() {
255        let input = "AAAA,GAAG,EAAE;AACA";
256        let decoded = decode(input).unwrap();
257        let encoded = encode(&decoded);
258        assert_eq!(encoded, input);
259    }
260
261    #[test]
262    fn roundtrip_large_values() {
263        let mappings = vec![vec![Segment::five(1000, 50, 999, 500, 100)]];
264        let encoded = encode(&mappings);
265        let decoded = decode(&encoded).unwrap();
266        assert_eq!(decoded, mappings);
267    }
268
269    #[test]
270    fn roundtrip_negative_deltas() {
271        let mappings = vec![vec![
272            Segment::four(10, 0, 10, 10),
273            Segment::four(20, 0, 5, 5),
274        ]];
275        let encoded = encode(&mappings);
276        let decoded = decode(&encoded).unwrap();
277        assert_eq!(decoded, mappings);
278    }
279
280    // --- Decode structure tests ---
281
282    #[test]
283    fn decode_single_field_segment() {
284        let decoded = decode("A").unwrap();
285        assert_eq!(decoded.len(), 1);
286        assert_eq!(decoded[0].len(), 1);
287        assert_eq!(decoded[0][0], vec![0]);
288    }
289
290    #[test]
291    fn decode_four_field_segment() {
292        let decoded = decode("AAAA").unwrap();
293        assert_eq!(decoded.len(), 1);
294        assert_eq!(decoded[0].len(), 1);
295        assert_eq!(decoded[0][0], vec![0, 0, 0, 0]);
296    }
297
298    #[test]
299    fn decode_five_field_segment() {
300        let decoded = decode("AAAAA").unwrap();
301        assert_eq!(decoded.len(), 1);
302        assert_eq!(decoded[0].len(), 1);
303        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 0]);
304    }
305
306    #[test]
307    fn decode_negative_values() {
308        let decoded = decode("DADD").unwrap();
309        assert_eq!(decoded[0][0], vec![-1, 0, -1, -1]);
310    }
311
312    #[test]
313    fn decode_multiple_lines() {
314        let decoded = decode("AAAA;AACA;AACA").unwrap();
315        assert_eq!(decoded.len(), 3);
316    }
317
318    #[test]
319    fn decode_empty_lines() {
320        let decoded = decode("AAAA;;;AACA").unwrap();
321        assert_eq!(decoded.len(), 4);
322        assert!(decoded[1].is_empty());
323        assert!(decoded[2].is_empty());
324    }
325
326    #[test]
327    fn decode_trailing_semicolon() {
328        // Trailing `;` means an empty line follows
329        let decoded = decode("AAAA;").unwrap();
330        assert_eq!(decoded.len(), 2);
331        assert_eq!(decoded[0].len(), 1);
332        assert!(decoded[1].is_empty());
333    }
334
335    #[test]
336    fn decode_only_semicolons() {
337        let decoded = decode(";;;").unwrap();
338        assert_eq!(decoded.len(), 4);
339        for line in &decoded {
340            assert!(line.is_empty());
341        }
342    }
343
344    // --- Malformed input tests ---
345
346    #[test]
347    fn decode_invalid_ascii_char() {
348        let err = decode("AA!A").unwrap_err();
349        assert_eq!(
350            err,
351            DecodeError::InvalidBase64 {
352                byte: b'!',
353                offset: 2
354            }
355        );
356    }
357
358    #[test]
359    fn decode_non_ascii_byte() {
360        // 'À' is UTF-8 bytes [0xC3, 0x80] — both >= 128, caught by non-ASCII guard
361        let err = decode("AAÀ").unwrap_err();
362        assert_eq!(
363            err,
364            DecodeError::InvalidBase64 {
365                byte: 0xC3,
366                offset: 2
367            }
368        );
369    }
370
371    #[test]
372    fn decode_truncated_vlq() {
373        // 'g' has value 32, which has the continuation bit set — needs more chars
374        let err = decode("g").unwrap_err();
375        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
376    }
377
378    #[test]
379    fn decode_vlq_overflow() {
380        // 14 continuation characters: each 'g' = value 32 (continuation bit set)
381        // After 12 digits, shift reaches 60 which exceeds the VLQ_MAX_SHIFT limit
382        let err = decode("gggggggggggggg").unwrap_err();
383        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
384    }
385
386    #[test]
387    fn decode_truncated_segment_two_fields() {
388        // "AC" = two VLQ values (0, 1) — 2-field segment is invalid per ECMA-426
389        let err = decode("AC").unwrap_err();
390        assert!(matches!(
391            err,
392            DecodeError::InvalidSegmentLength { fields: 2, .. }
393        ));
394    }
395
396    #[test]
397    fn decode_truncated_segment_three_fields() {
398        // "ACA" = three VLQ values (0, 1, 0) — 3-field segment is invalid per ECMA-426
399        let err = decode("ACA").unwrap_err();
400        assert!(matches!(
401            err,
402            DecodeError::InvalidSegmentLength { fields: 3, .. }
403        ));
404    }
405
406    #[test]
407    fn decode_two_field_segment_followed_by_separator() {
408        // "AC,AAAA" — first segment has 2 fields, invalid
409        let err = decode("AC,AAAA").unwrap_err();
410        assert!(matches!(
411            err,
412            DecodeError::InvalidSegmentLength { fields: 2, .. }
413        ));
414    }
415
416    #[test]
417    fn decode_three_field_segment_followed_by_separator() {
418        // "ACA;AAAA" — first segment has 3 fields, invalid
419        let err = decode("ACA;AAAA").unwrap_err();
420        assert!(matches!(
421            err,
422            DecodeError::InvalidSegmentLength { fields: 3, .. }
423        ));
424    }
425
426    // --- Encode edge cases ---
427
428    #[test]
429    fn encode_empty_segments_no_dangling_comma() {
430        // Empty segments should be skipped without producing dangling commas
431        let empty = Segment::from(&[] as &[i64]);
432        let mappings = vec![vec![
433            empty,
434            Segment::four(0, 0, 0, 0),
435            empty,
436            Segment::four(2, 0, 0, 1),
437        ]];
438        let encoded = encode(&mappings);
439        assert!(
440            !encoded.contains(",,"),
441            "should not contain dangling commas"
442        );
443        // Should encode as if empty segments don't exist
444        let expected = encode(&vec![vec![
445            Segment::four(0, 0, 0, 0),
446            Segment::four(2, 0, 0, 1),
447        ]]);
448        assert_eq!(encoded, expected);
449    }
450
451    #[test]
452    fn encode_all_empty_segments() {
453        let empty = Segment::from(&[] as &[i64]);
454        let mappings = vec![vec![empty, empty, empty]];
455        let encoded = encode(&mappings);
456        assert_eq!(encoded, "");
457    }
458
459    // --- Parallel encoding tests ---
460
461    #[cfg(feature = "parallel")]
462    mod parallel_tests {
463        use super::*;
464
465        fn build_large_mappings(lines: usize, segments_per_line: usize) -> SourceMapMappings {
466            let mut mappings = Vec::with_capacity(lines);
467            for line in 0..lines {
468                let mut line_segments = Vec::with_capacity(segments_per_line);
469                for seg in 0..segments_per_line {
470                    line_segments.push(Segment::five(
471                        (seg * 10) as i64, // generated column
472                        (seg % 5) as i64,  // source index
473                        line as i64,       // original line
474                        (seg * 5) as i64,  // original column
475                        (seg % 3) as i64,  // name index
476                    ));
477                }
478                mappings.push(line_segments);
479            }
480            mappings
481        }
482
483        #[test]
484        fn parallel_matches_sequential_large() {
485            let mappings = build_large_mappings(2000, 10);
486            let sequential = encode(&mappings);
487            let parallel = encode_parallel(&mappings);
488            assert_eq!(sequential, parallel);
489        }
490
491        #[test]
492        fn parallel_matches_sequential_with_empty_lines() {
493            let mut mappings = build_large_mappings(1500, 8);
494            // Insert empty lines
495            for i in (0..mappings.len()).step_by(3) {
496                mappings[i] = Vec::new();
497            }
498            let sequential = encode(&mappings);
499            let parallel = encode_parallel(&mappings);
500            assert_eq!(sequential, parallel);
501        }
502
503        #[test]
504        fn parallel_matches_sequential_mixed_segments() {
505            let mut mappings: SourceMapMappings = Vec::with_capacity(2000);
506            for line in 0..2000 {
507                let mut line_segments = Vec::new();
508                for seg in 0..8 {
509                    if seg % 4 == 0 {
510                        line_segments.push(Segment::one((seg * 10) as i64));
511                    } else if seg % 4 == 3 {
512                        line_segments.push(Segment::five(
513                            (seg * 10) as i64,
514                            (seg % 3) as i64,
515                            line as i64,
516                            (seg * 5) as i64,
517                            (seg % 2) as i64,
518                        ));
519                    } else {
520                        line_segments.push(Segment::four(
521                            (seg * 10) as i64,
522                            (seg % 3) as i64,
523                            line as i64,
524                            (seg * 5) as i64,
525                        ));
526                    }
527                }
528                mappings.push(line_segments);
529            }
530            let sequential = encode(&mappings);
531            let parallel = encode_parallel(&mappings);
532            assert_eq!(sequential, parallel);
533        }
534
535        #[test]
536        fn parallel_roundtrip() {
537            let mappings = build_large_mappings(2000, 10);
538            let encoded = encode_parallel(&mappings);
539            let decoded = decode(&encoded).unwrap();
540            assert_eq!(decoded, mappings);
541        }
542
543        #[test]
544        fn parallel_fallback_for_small_maps() {
545            // Below threshold — should still produce correct output
546            let mappings = build_large_mappings(10, 5);
547            let sequential = encode(&mappings);
548            let parallel = encode_parallel(&mappings);
549            assert_eq!(sequential, parallel);
550        }
551    }
552
553    // --- DecodeError Display tests ---
554
555    #[test]
556    fn decode_error_display_invalid_base64() {
557        let err = DecodeError::InvalidBase64 {
558            byte: b'!',
559            offset: 2,
560        };
561        assert_eq!(err.to_string(), "invalid base64 character 0x21 at offset 2");
562    }
563
564    #[test]
565    fn decode_error_display_unexpected_eof() {
566        let err = DecodeError::UnexpectedEof { offset: 5 };
567        assert_eq!(err.to_string(), "unexpected end of input at offset 5");
568    }
569
570    #[test]
571    fn decode_error_display_overflow() {
572        let err = DecodeError::VlqOverflow { offset: 10 };
573        assert_eq!(err.to_string(), "VLQ value overflow at offset 10");
574    }
575
576    #[test]
577    fn decode_error_display_invalid_segment_length() {
578        let err = DecodeError::InvalidSegmentLength {
579            fields: 2,
580            offset: 3,
581        };
582        assert_eq!(
583            err.to_string(),
584            "invalid segment with 2 fields at offset 3 (expected 1, 4, or 5)"
585        );
586    }
587
588    // --- Decode edge case: 5-field segment with name ---
589
590    #[test]
591    fn decode_five_field_with_name_index() {
592        // Ensure the name field (5th) is decoded correctly
593        let input = "AAAAC"; // 0,0,0,0,1
594        let decoded = decode(input).unwrap();
595        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 1]);
596    }
597
598    // --- Encode edge case: encode with only 1 line ---
599
600    #[test]
601    fn encode_single_segment_one_field() {
602        let mappings = vec![vec![Segment::one(5)]];
603        let encoded = encode(&mappings);
604        let decoded = decode(&encoded).unwrap();
605        assert_eq!(decoded, mappings);
606    }
607}