Skip to main content

srcmap_sourcemap/
offset_lookup.rs

1use crate::{GeneratedLocation, LazySourceMap, OriginalLocation, SourceMap};
2
3const NEWLINE_LF: u8 = b'\n';
4const NEWLINE_CR: u8 = b'\r';
5
6/// Source map lookup shape used by [`GeneratedOffsetLookup`].
7///
8/// Implemented for [`SourceMap`] and [`LazySourceMap`], so runtime coverage
9/// callers can normalize generated byte offsets once and use either eager or
10/// lazy source-map decoding.
11pub trait OriginalPositionLookup {
12    /// Look up an original source location from a generated position.
13    ///
14    /// Lines and columns are 0-based. Columns are UTF-16 code units, matching
15    /// JavaScript source-map semantics.
16    fn original_position_for(&self, line: u32, column: u32) -> Option<OriginalLocation>;
17}
18
19impl OriginalPositionLookup for SourceMap {
20    fn original_position_for(&self, line: u32, column: u32) -> Option<OriginalLocation> {
21        SourceMap::original_position_for(self, line, column)
22    }
23}
24
25impl OriginalPositionLookup for LazySourceMap {
26    fn original_position_for(&self, line: u32, column: u32) -> Option<OriginalLocation> {
27        LazySourceMap::original_position_for(self, line, column)
28    }
29}
30
31/// Converts generated UTF-8 byte offsets into source-map generated positions.
32///
33/// V8 coverage ranges are often reported as byte offsets in generated code,
34/// while source maps use generated line and UTF-16 column pairs. This helper
35/// builds a reusable line index for one generated asset and converts offsets
36/// before forwarding them to a source map lookup.
37#[derive(Debug, Clone)]
38pub struct GeneratedOffsetLookup<'a> {
39    generated_source: &'a str,
40    line_start_bytes: Vec<usize>,
41    line_end_bytes: Vec<usize>,
42    line_ascii_only: Vec<bool>,
43}
44
45impl<'a> GeneratedOffsetLookup<'a> {
46    /// Create a lookup for one generated source asset.
47    pub fn new(generated_source: &'a str) -> Self {
48        let bytes = generated_source.as_bytes();
49        let mut line_start_bytes = vec![0];
50        let mut line_end_bytes = Vec::new();
51        let mut line_ascii_only = Vec::new();
52        let mut current_line_ascii_only = true;
53        let mut offset = 0;
54
55        while offset < bytes.len() {
56            match bytes[offset] {
57                NEWLINE_LF => {
58                    line_end_bytes.push(offset);
59                    line_ascii_only.push(current_line_ascii_only);
60                    offset += 1;
61                    line_start_bytes.push(offset);
62                    current_line_ascii_only = true;
63                }
64                NEWLINE_CR => {
65                    line_end_bytes.push(offset);
66                    line_ascii_only.push(current_line_ascii_only);
67                    offset += 1;
68                    if offset < bytes.len() && bytes[offset] == NEWLINE_LF {
69                        offset += 1;
70                    }
71                    line_start_bytes.push(offset);
72                    current_line_ascii_only = true;
73                }
74                byte if byte.is_ascii() => {
75                    offset += 1;
76                }
77                _ => {
78                    current_line_ascii_only = false;
79                    let ch = generated_source[offset..]
80                        .chars()
81                        .next()
82                        .expect("offset is always on a UTF-8 character boundary");
83                    offset += ch.len_utf8();
84                }
85            }
86        }
87
88        line_end_bytes.push(bytes.len());
89        line_ascii_only.push(current_line_ascii_only);
90
91        Self { generated_source, line_start_bytes, line_end_bytes, line_ascii_only }
92    }
93
94    /// Number of generated lines tracked by this lookup.
95    #[inline]
96    pub fn line_count(&self) -> usize {
97        self.line_start_bytes.len()
98    }
99
100    /// Total byte length of the generated source.
101    #[inline]
102    pub fn total_bytes(&self) -> usize {
103        self.generated_source.len()
104    }
105
106    /// Convert a generated UTF-8 byte offset to a 0-based line and UTF-16 column.
107    ///
108    /// Returns `None` when the offset is out of bounds, does not fit in `u32`,
109    /// or points into the middle of a UTF-8 code point. Offsets that point at a
110    /// line terminator resolve to the end column of that line. Offsets after a
111    /// line terminator resolve to column 0 of the next line.
112    pub fn byte_offset_to_position(&self, byte_offset: u32) -> Option<GeneratedLocation> {
113        let offset = byte_offset as usize;
114        if offset > self.generated_source.len() || !self.generated_source.is_char_boundary(offset) {
115            return None;
116        }
117
118        let line = self
119            .line_start_bytes
120            .partition_point(|line_start| *line_start <= offset)
121            .saturating_sub(1);
122        let line_start = self.line_start_bytes[line];
123        let line_end = self.line_end_bytes[line];
124        let column = if offset <= line_end {
125            self.byte_offset_to_utf16_column(line_start, offset, line)?
126        } else {
127            self.byte_offset_to_utf16_column(line_start, line_end, line)?
128        };
129
130        Some(GeneratedLocation { line: u32::try_from(line).ok()?, column })
131    }
132
133    /// Look up the original source position for a generated byte offset.
134    pub fn original_position_for_offset<M>(
135        &self,
136        source_map: &M,
137        byte_offset: u32,
138    ) -> Option<OriginalLocation>
139    where
140        M: OriginalPositionLookup + ?Sized,
141    {
142        let position = self.byte_offset_to_position(byte_offset)?;
143        source_map.original_position_for(position.line, position.column)
144    }
145
146    /// Convert multiple generated byte offsets to generated positions.
147    ///
148    /// Returns `None` if any offset is invalid.
149    pub fn byte_offsets_to_positions(
150        &self,
151        byte_offsets: &[u32],
152    ) -> Option<Vec<GeneratedLocation>> {
153        byte_offsets.iter().map(|byte_offset| self.byte_offset_to_position(*byte_offset)).collect()
154    }
155
156    /// Look up original source positions for multiple generated byte offsets.
157    pub fn original_positions_for_offsets<M>(
158        &self,
159        source_map: &M,
160        byte_offsets: &[u32],
161    ) -> Vec<Option<OriginalLocation>>
162    where
163        M: OriginalPositionLookup + ?Sized,
164    {
165        byte_offsets
166            .iter()
167            .map(|byte_offset| self.original_position_for_offset(source_map, *byte_offset))
168            .collect()
169    }
170
171    fn byte_offset_to_utf16_column(
172        &self,
173        line_start: usize,
174        offset: usize,
175        line: usize,
176    ) -> Option<u32> {
177        let byte_column = offset.checked_sub(line_start)?;
178        if self.line_ascii_only[line] {
179            return u32::try_from(byte_column).ok();
180        }
181
182        let line_source = self.generated_source.get(line_start..offset)?;
183        line_source.chars().try_fold(0u32, |column, ch| column.checked_add(ch.len_utf16() as u32))
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::GeneratedOffsetLookup;
190    use crate::{LazySourceMap, SourceMap};
191
192    const TWO_LINE_MAP: &str = r#"{
193        "version": 3,
194        "sources": ["input.ts"],
195        "names": ["lineOne", "lineTwo"],
196        "mappings": "AAAAA;AACAC"
197    }"#;
198
199    #[test]
200    fn generated_offset_lookup_maps_ascii_offsets() {
201        let lookup = GeneratedOffsetLookup::new("const value = 1;\nvalue;\n");
202
203        assert_eq!(lookup.line_count(), 3);
204        assert_eq!(lookup.total_bytes(), 24);
205
206        let start = lookup.byte_offset_to_position(0).unwrap();
207        assert_eq!(start.line, 0);
208        assert_eq!(start.column, 0);
209
210        let second_line =
211            lookup.byte_offset_to_position("const value = 1;\n".len() as u32).unwrap();
212        assert_eq!(second_line.line, 1);
213        assert_eq!(second_line.column, 0);
214    }
215
216    #[test]
217    fn generated_offset_lookup_converts_utf8_offsets_to_utf16_columns() {
218        let source = "const letter = 'é';\nconst emoji = '😀';\n";
219        let lookup = GeneratedOffsetLookup::new(source);
220
221        let before_accent = "const letter = '".len() as u32;
222        let after_accent = "const letter = 'é".len() as u32;
223        let before_emoji = "const letter = 'é';\nconst emoji = '".len() as u32;
224        let after_emoji = "const letter = 'é';\nconst emoji = '😀".len() as u32;
225
226        let before_accent_pos = lookup.byte_offset_to_position(before_accent).unwrap();
227        let after_accent_pos = lookup.byte_offset_to_position(after_accent).unwrap();
228        let before_emoji_pos = lookup.byte_offset_to_position(before_emoji).unwrap();
229        let after_emoji_pos = lookup.byte_offset_to_position(after_emoji).unwrap();
230
231        assert_eq!(before_accent_pos.line, 0);
232        assert_eq!(before_accent_pos.column, 16);
233        assert_eq!(after_accent_pos.line, 0);
234        assert_eq!(after_accent_pos.column, 17);
235        assert_eq!(before_emoji_pos.line, 1);
236        assert_eq!(before_emoji_pos.column, 15);
237        assert_eq!(after_emoji_pos.line, 1);
238        assert_eq!(after_emoji_pos.column, 17);
239    }
240
241    #[test]
242    fn generated_offset_lookup_handles_crlf_and_line_boundaries() {
243        let source = "alpha();\r\nbeta();\r\ngamma();";
244        let lookup = GeneratedOffsetLookup::new(source);
245        let first_cr = "alpha();".len() as u32;
246        let first_lf = first_cr + 1;
247        let second_line = "alpha();\r\n".len() as u32;
248
249        let at_cr = lookup.byte_offset_to_position(first_cr).unwrap();
250        let at_lf = lookup.byte_offset_to_position(first_lf).unwrap();
251        let after_crlf = lookup.byte_offset_to_position(second_line).unwrap();
252
253        assert_eq!(at_cr.line, 0);
254        assert_eq!(at_cr.column, 8);
255        assert_eq!(at_lf.line, 0);
256        assert_eq!(at_lf.column, 8);
257        assert_eq!(after_crlf.line, 1);
258        assert_eq!(after_crlf.column, 0);
259    }
260
261    #[test]
262    fn generated_offset_lookup_rejects_invalid_offsets() {
263        let source = "é";
264        let lookup = GeneratedOffsetLookup::new(source);
265
266        assert!(lookup.byte_offset_to_position(1).is_none());
267        assert!(lookup.byte_offset_to_position(3).is_none());
268    }
269
270    #[test]
271    fn generated_offset_lookup_resolves_original_positions_for_eager_and_lazy_maps() {
272        let generated = "alpha();\nbeta();\n";
273        let offset = "alpha();\n".len() as u32;
274        let lookup = GeneratedOffsetLookup::new(generated);
275        let eager = SourceMap::from_json(TWO_LINE_MAP).unwrap();
276        let lazy = LazySourceMap::from_json(TWO_LINE_MAP).unwrap();
277
278        let eager_loc = lookup.original_position_for_offset(&eager, offset).unwrap();
279        let lazy_loc = lookup.original_position_for_offset(&lazy, offset).unwrap();
280
281        assert_eq!(eager.source(eager_loc.source), "input.ts");
282        assert_eq!(lazy.source(lazy_loc.source), "input.ts");
283        assert_eq!(eager_loc.line, 1);
284        assert_eq!(eager_loc.column, 0);
285        assert_eq!(eager.name(eager_loc.name.unwrap()), "lineTwo");
286        assert_eq!(lazy_loc.line, eager_loc.line);
287        assert_eq!(lazy_loc.column, eager_loc.column);
288        assert_eq!(lazy_loc.name, eager_loc.name);
289    }
290}