Skip to main content

srcmap_codec/
lib.rs

1//! High-performance VLQ source map codec.
2//!
3//! Encodes and decodes source map mappings using the Base64 VLQ format
4//! as specified in the Source Map v3 specification (ECMA-426).
5//!
6//! # Features
7//!
8//! - **`parallel`** — enables `encode_parallel` for multi-threaded encoding via rayon.
9//!   ~1.5x faster for large maps (5K+ lines).
10//!
11//! # Examples
12//!
13//! Decode and re-encode a mappings string:
14//!
15//! ```
16//! use srcmap_codec::{decode, encode};
17//!
18//! let mappings = decode("AAAA;AACA,EAAE").unwrap();
19//! assert_eq!(mappings.len(), 2); // 2 lines
20//! assert_eq!(mappings[0][0], vec![0, 0, 0, 0]); // first segment
21//!
22//! let encoded = encode(&mappings);
23//! assert_eq!(encoded, "AAAA;AACA,EAAE");
24//! ```
25//!
26//! Low-level VLQ primitives:
27//!
28//! ```
29//! use srcmap_codec::{vlq_decode, vlq_encode};
30//!
31//! let mut buf = Vec::new();
32//! vlq_encode(&mut buf, 42);
33//!
34//! let (value, bytes_read) = vlq_decode(&buf, 0).unwrap();
35//! assert_eq!(value, 42);
36//! ```
37
38mod decode;
39mod encode;
40mod vlq;
41
42pub use decode::decode;
43pub use encode::encode;
44#[cfg(feature = "parallel")]
45pub use encode::encode_parallel;
46pub use vlq::{
47    vlq_decode, vlq_decode_unsigned, vlq_encode, vlq_encode_unchecked, vlq_encode_unsigned,
48    vlq_encode_unsigned_unchecked,
49};
50
51use std::fmt;
52
53/// A single source map segment stored inline (no heap allocation).
54///
55/// Segments have 1, 4, or 5 fields:
56/// - 1 field:  `[generated_column]`
57/// - 4 fields: `[generated_column, source_index, original_line, original_column]`
58/// - 5 fields: `[generated_column, source_index, original_line, original_column, name_index]`
59///
60/// Implements `Deref<Target=[i64]>` so indexing, `len()`, `is_empty()`, and
61/// iteration work identically to `Vec<i64>`.
62#[derive(Debug, Clone, Copy)]
63pub struct Segment {
64    data: [i64; 5],
65    len: u8,
66}
67
68impl std::hash::Hash for Segment {
69    #[inline]
70    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
71        (**self).hash(state);
72    }
73}
74
75impl PartialOrd for Segment {
76    #[inline]
77    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
78        Some(self.cmp(other))
79    }
80}
81
82impl Ord for Segment {
83    #[inline]
84    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
85        (**self).cmp(&**other)
86    }
87}
88
89impl Segment {
90    /// Create a 1-field segment (generated column only).
91    #[inline]
92    pub fn one(a: i64) -> Self {
93        Self { data: [a, 0, 0, 0, 0], len: 1 }
94    }
95
96    /// Create a 4-field segment (with source info, no name).
97    #[inline]
98    pub fn four(a: i64, b: i64, c: i64, d: i64) -> Self {
99        Self { data: [a, b, c, d, 0], len: 4 }
100    }
101
102    /// Create a 5-field segment (with source info and name).
103    #[inline]
104    pub fn five(a: i64, b: i64, c: i64, d: i64, e: i64) -> Self {
105        Self { data: [a, b, c, d, e], len: 5 }
106    }
107
108    /// Convert to a `Vec<i64>` (for interop with APIs that expect `Vec`).
109    pub fn to_vec(&self) -> Vec<i64> {
110        self.data[..self.len as usize].to_vec()
111    }
112}
113
114impl std::ops::Deref for Segment {
115    type Target = [i64];
116
117    #[inline]
118    fn deref(&self) -> &[i64] {
119        &self.data[..self.len as usize]
120    }
121}
122
123impl<'a> IntoIterator for &'a Segment {
124    type Item = &'a i64;
125    type IntoIter = std::slice::Iter<'a, i64>;
126
127    #[inline]
128    fn into_iter(self) -> Self::IntoIter {
129        self.data[..self.len as usize].iter()
130    }
131}
132
133impl PartialEq for Segment {
134    fn eq(&self, other: &Self) -> bool {
135        **self == **other
136    }
137}
138
139impl Eq for Segment {}
140
141impl PartialEq<Vec<i64>> for Segment {
142    fn eq(&self, other: &Vec<i64>) -> bool {
143        **self == **other
144    }
145}
146
147impl PartialEq<Segment> for Vec<i64> {
148    fn eq(&self, other: &Segment) -> bool {
149        **self == **other
150    }
151}
152
153impl From<Vec<i64>> for Segment {
154    fn from(v: Vec<i64>) -> Self {
155        let mut data = [0i64; 5];
156        let len = v.len().min(5);
157        data[..len].copy_from_slice(&v[..len]);
158        Self { data, len: len as u8 }
159    }
160}
161
162impl From<&[i64]> for Segment {
163    fn from(s: &[i64]) -> Self {
164        let mut data = [0i64; 5];
165        let len = s.len().min(5);
166        data[..len].copy_from_slice(&s[..len]);
167        Self { data, len: len as u8 }
168    }
169}
170
171/// A source map line is a list of segments.
172pub type Line = Vec<Segment>;
173
174/// Decoded source map mappings: a list of lines, each containing segments.
175pub type SourceMapMappings = Vec<Line>;
176
177/// Errors that can occur when decoding a VLQ-encoded mappings string.
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub enum DecodeError {
180    /// A byte that is not a valid base64 character was encountered.
181    InvalidBase64 { byte: u8, offset: usize },
182    /// Input ended in the middle of a VLQ sequence (continuation bit was set).
183    UnexpectedEof { offset: usize },
184    /// A VLQ value exceeded the maximum representable range.
185    VlqOverflow { offset: usize },
186    /// A segment has an invalid number of fields (only 1, 4, or 5 are valid per ECMA-426).
187    InvalidSegmentLength { fields: u8, offset: usize },
188}
189
190impl fmt::Display for DecodeError {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        match self {
193            Self::InvalidBase64 { byte, offset } => {
194                write!(f, "invalid base64 character 0x{byte:02x} at offset {offset}")
195            }
196            Self::UnexpectedEof { offset } => {
197                write!(f, "unexpected end of input at offset {offset}")
198            }
199            Self::VlqOverflow { offset } => {
200                write!(f, "VLQ value overflow at offset {offset}")
201            }
202            Self::InvalidSegmentLength { fields, offset } => {
203                write!(
204                    f,
205                    "invalid segment with {fields} fields at offset {offset} (expected 1, 4, or 5)"
206                )
207            }
208        }
209    }
210}
211
212impl std::error::Error for DecodeError {}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    // --- Roundtrip tests ---
219
220    #[test]
221    fn roundtrip_empty() {
222        let decoded = decode("").unwrap();
223        assert!(decoded.is_empty());
224        assert_eq!(encode(&decoded), "");
225    }
226
227    #[test]
228    fn roundtrip_simple() {
229        let input = "AAAA;AACA";
230        let decoded = decode(input).unwrap();
231        let encoded = encode(&decoded);
232        assert_eq!(encoded, input);
233    }
234
235    #[test]
236    fn roundtrip_multiple_segments() {
237        let input = "AAAA,GAAG,EAAE;AACA";
238        let decoded = decode(input).unwrap();
239        let encoded = encode(&decoded);
240        assert_eq!(encoded, input);
241    }
242
243    #[test]
244    fn roundtrip_large_values() {
245        let mappings = vec![vec![Segment::five(1000, 50, 999, 500, 100)]];
246        let encoded = encode(&mappings);
247        let decoded = decode(&encoded).unwrap();
248        assert_eq!(decoded, mappings);
249    }
250
251    #[test]
252    fn roundtrip_negative_deltas() {
253        let mappings = vec![vec![Segment::four(10, 0, 10, 10), Segment::four(20, 0, 5, 5)]];
254        let encoded = encode(&mappings);
255        let decoded = decode(&encoded).unwrap();
256        assert_eq!(decoded, mappings);
257    }
258
259    // --- Decode structure tests ---
260
261    #[test]
262    fn decode_single_field_segment() {
263        let decoded = decode("A").unwrap();
264        assert_eq!(decoded.len(), 1);
265        assert_eq!(decoded[0].len(), 1);
266        assert_eq!(decoded[0][0], vec![0]);
267    }
268
269    #[test]
270    fn decode_four_field_segment() {
271        let decoded = decode("AAAA").unwrap();
272        assert_eq!(decoded.len(), 1);
273        assert_eq!(decoded[0].len(), 1);
274        assert_eq!(decoded[0][0], vec![0, 0, 0, 0]);
275    }
276
277    #[test]
278    fn decode_five_field_segment() {
279        let decoded = decode("AAAAA").unwrap();
280        assert_eq!(decoded.len(), 1);
281        assert_eq!(decoded[0].len(), 1);
282        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 0]);
283    }
284
285    #[test]
286    fn decode_negative_values() {
287        let decoded = decode("DADD").unwrap();
288        assert_eq!(decoded[0][0], vec![-1, 0, -1, -1]);
289    }
290
291    #[test]
292    fn decode_multiple_lines() {
293        let decoded = decode("AAAA;AACA;AACA").unwrap();
294        assert_eq!(decoded.len(), 3);
295    }
296
297    #[test]
298    fn decode_empty_lines() {
299        let decoded = decode("AAAA;;;AACA").unwrap();
300        assert_eq!(decoded.len(), 4);
301        assert!(decoded[1].is_empty());
302        assert!(decoded[2].is_empty());
303    }
304
305    #[test]
306    fn decode_trailing_semicolon() {
307        // Trailing `;` means an empty line follows
308        let decoded = decode("AAAA;").unwrap();
309        assert_eq!(decoded.len(), 2);
310        assert_eq!(decoded[0].len(), 1);
311        assert!(decoded[1].is_empty());
312    }
313
314    #[test]
315    fn decode_only_semicolons() {
316        let decoded = decode(";;;").unwrap();
317        assert_eq!(decoded.len(), 4);
318        for line in &decoded {
319            assert!(line.is_empty());
320        }
321    }
322
323    // --- Malformed input tests ---
324
325    #[test]
326    fn decode_invalid_ascii_char() {
327        let err = decode("AA!A").unwrap_err();
328        assert_eq!(err, DecodeError::InvalidBase64 { byte: b'!', offset: 2 });
329    }
330
331    #[test]
332    fn decode_non_ascii_byte() {
333        // 'À' is UTF-8 bytes [0xC3, 0x80] — both >= 128, caught by non-ASCII guard
334        let err = decode("AAÀ").unwrap_err();
335        assert_eq!(err, DecodeError::InvalidBase64 { byte: 0xC3, offset: 2 });
336    }
337
338    #[test]
339    fn decode_truncated_vlq() {
340        // 'g' has value 32, which has the continuation bit set — needs more chars
341        let err = decode("g").unwrap_err();
342        assert_eq!(err, DecodeError::UnexpectedEof { offset: 1 });
343    }
344
345    #[test]
346    fn decode_vlq_overflow() {
347        // 14 continuation characters: each 'g' = value 32 (continuation bit set)
348        // After 12 digits, shift reaches 60 which exceeds the VLQ_MAX_SHIFT limit
349        let err = decode("gggggggggggggg").unwrap_err();
350        assert!(matches!(err, DecodeError::VlqOverflow { .. }));
351    }
352
353    #[test]
354    fn decode_truncated_segment_two_fields() {
355        // "AC" = two VLQ values (0, 1) — 2-field segment is invalid per ECMA-426
356        let err = decode("AC").unwrap_err();
357        assert!(matches!(err, DecodeError::InvalidSegmentLength { fields: 2, .. }));
358    }
359
360    #[test]
361    fn decode_truncated_segment_three_fields() {
362        // "ACA" = three VLQ values (0, 1, 0) — 3-field segment is invalid per ECMA-426
363        let err = decode("ACA").unwrap_err();
364        assert!(matches!(err, DecodeError::InvalidSegmentLength { fields: 3, .. }));
365    }
366
367    #[test]
368    fn decode_two_field_segment_followed_by_separator() {
369        // "AC,AAAA" — first segment has 2 fields, invalid
370        let err = decode("AC,AAAA").unwrap_err();
371        assert!(matches!(err, DecodeError::InvalidSegmentLength { fields: 2, .. }));
372    }
373
374    #[test]
375    fn decode_three_field_segment_followed_by_separator() {
376        // "ACA;AAAA" — first segment has 3 fields, invalid
377        let err = decode("ACA;AAAA").unwrap_err();
378        assert!(matches!(err, DecodeError::InvalidSegmentLength { fields: 3, .. }));
379    }
380
381    // --- Encode edge cases ---
382
383    #[test]
384    fn encode_empty_segments_no_dangling_comma() {
385        // Empty segments should be skipped without producing dangling commas
386        let empty = Segment::from(&[] as &[i64]);
387        let mappings =
388            vec![vec![empty, Segment::four(0, 0, 0, 0), empty, Segment::four(2, 0, 0, 1)]];
389        let encoded = encode(&mappings);
390        assert!(!encoded.contains(",,"), "should not contain dangling commas");
391        // Should encode as if empty segments don't exist
392        let expected = encode(&vec![vec![Segment::four(0, 0, 0, 0), Segment::four(2, 0, 0, 1)]]);
393        assert_eq!(encoded, expected);
394    }
395
396    #[test]
397    fn encode_all_empty_segments() {
398        let empty = Segment::from(&[] as &[i64]);
399        let mappings = vec![vec![empty, empty, empty]];
400        let encoded = encode(&mappings);
401        assert_eq!(encoded, "");
402    }
403
404    // --- Parallel encoding tests ---
405
406    #[cfg(feature = "parallel")]
407    mod parallel_tests {
408        use super::*;
409
410        fn build_large_mappings(lines: usize, segments_per_line: usize) -> SourceMapMappings {
411            let mut mappings = Vec::with_capacity(lines);
412            for line in 0..lines {
413                let mut line_segments = Vec::with_capacity(segments_per_line);
414                for seg in 0..segments_per_line {
415                    line_segments.push(Segment::five(
416                        (seg * 10) as i64, // generated column
417                        (seg % 5) as i64,  // source index
418                        line as i64,       // original line
419                        (seg * 5) as i64,  // original column
420                        (seg % 3) as i64,  // name index
421                    ));
422                }
423                mappings.push(line_segments);
424            }
425            mappings
426        }
427
428        #[test]
429        fn parallel_matches_sequential_large() {
430            let mappings = build_large_mappings(2000, 10);
431            let sequential = encode(&mappings);
432            let parallel = encode_parallel(&mappings);
433            assert_eq!(sequential, parallel);
434        }
435
436        #[test]
437        fn parallel_matches_sequential_with_empty_lines() {
438            let mut mappings = build_large_mappings(1500, 8);
439            // Insert empty lines
440            for i in (0..mappings.len()).step_by(3) {
441                mappings[i] = Vec::new();
442            }
443            let sequential = encode(&mappings);
444            let parallel = encode_parallel(&mappings);
445            assert_eq!(sequential, parallel);
446        }
447
448        #[test]
449        fn parallel_matches_sequential_mixed_segments() {
450            let mut mappings: SourceMapMappings = Vec::with_capacity(2000);
451            for line in 0..2000 {
452                let mut line_segments = Vec::new();
453                for seg in 0..8 {
454                    if seg % 4 == 0 {
455                        line_segments.push(Segment::one((seg * 10) as i64));
456                    } else if seg % 4 == 3 {
457                        line_segments.push(Segment::five(
458                            (seg * 10) as i64,
459                            (seg % 3) as i64,
460                            line as i64,
461                            (seg * 5) as i64,
462                            (seg % 2) as i64,
463                        ));
464                    } else {
465                        line_segments.push(Segment::four(
466                            (seg * 10) as i64,
467                            (seg % 3) as i64,
468                            line as i64,
469                            (seg * 5) as i64,
470                        ));
471                    }
472                }
473                mappings.push(line_segments);
474            }
475            let sequential = encode(&mappings);
476            let parallel = encode_parallel(&mappings);
477            assert_eq!(sequential, parallel);
478        }
479
480        #[test]
481        fn parallel_roundtrip() {
482            let mappings = build_large_mappings(2000, 10);
483            let encoded = encode_parallel(&mappings);
484            let decoded = decode(&encoded).unwrap();
485            assert_eq!(decoded, mappings);
486        }
487
488        #[test]
489        fn parallel_fallback_for_small_maps() {
490            // Below threshold — should still produce correct output
491            let mappings = build_large_mappings(10, 5);
492            let sequential = encode(&mappings);
493            let parallel = encode_parallel(&mappings);
494            assert_eq!(sequential, parallel);
495        }
496    }
497
498    // --- DecodeError Display tests ---
499
500    #[test]
501    fn decode_error_display_invalid_base64() {
502        let err = DecodeError::InvalidBase64 { byte: b'!', offset: 2 };
503        assert_eq!(err.to_string(), "invalid base64 character 0x21 at offset 2");
504    }
505
506    #[test]
507    fn decode_error_display_unexpected_eof() {
508        let err = DecodeError::UnexpectedEof { offset: 5 };
509        assert_eq!(err.to_string(), "unexpected end of input at offset 5");
510    }
511
512    #[test]
513    fn decode_error_display_overflow() {
514        let err = DecodeError::VlqOverflow { offset: 10 };
515        assert_eq!(err.to_string(), "VLQ value overflow at offset 10");
516    }
517
518    #[test]
519    fn decode_error_display_invalid_segment_length() {
520        let err = DecodeError::InvalidSegmentLength { fields: 2, offset: 3 };
521        assert_eq!(
522            err.to_string(),
523            "invalid segment with 2 fields at offset 3 (expected 1, 4, or 5)"
524        );
525    }
526
527    // --- Decode edge case: 5-field segment with name ---
528
529    #[test]
530    fn decode_five_field_with_name_index() {
531        // Ensure the name field (5th) is decoded correctly
532        let input = "AAAAC"; // 0,0,0,0,1
533        let decoded = decode(input).unwrap();
534        assert_eq!(decoded[0][0], vec![0, 0, 0, 0, 1]);
535    }
536
537    // --- Encode edge case: encode with only 1 line ---
538
539    #[test]
540    fn encode_single_segment_one_field() {
541        let mappings = vec![vec![Segment::one(5)]];
542        let encoded = encode(&mappings);
543        let decoded = decode(&encoded).unwrap();
544        assert_eq!(decoded, mappings);
545    }
546}