Skip to main content

symbolic_sourcemapcache/
writer.rs

1use std::io::Write;
2use std::ops::Range;
3
4use itertools::Itertools;
5use js_source_scopes::{
6    extract_scope_names, NameResolver, ScopeIndex, ScopeIndexError, SourceContext,
7    SourceContextError,
8};
9use sourcemap::DecodedMap;
10use watto::{Pod, StringTable, Writer};
11
12use super::raw;
13use super::{ScopeLookupResult, SourcePosition};
14
15/// A structure that allows quick resolution of minified source position
16/// to the original source position it maps to.
17pub struct SourceMapCacheWriter {
18    string_table: StringTable,
19    files: Vec<raw::File>,
20    line_offsets: Vec<raw::LineOffset>,
21    mappings: Vec<(raw::MinifiedSourcePosition, raw::OriginalSourceLocation)>,
22}
23
24impl SourceMapCacheWriter {
25    /// Constructs a new Cache from a minified source file and its corresponding SourceMap.
26    #[tracing::instrument(level = "trace", name = "SourceMapCacheWriter::new", skip_all)]
27    pub fn new(source: &str, sourcemap: &str) -> Result<Self, SourceMapCacheWriterError> {
28        let sm = tracing::trace_span!("decode sourcemap").in_scope(
29            || -> Result<DecodedMap, SourceMapCacheWriterError> {
30                let sm = sourcemap::decode_slice(sourcemap.as_bytes())
31                    .map_err(SourceMapCacheErrorInner::SourceMap)?;
32                // flatten the `SourceMapIndex`, as we want to iterate tokens
33                Ok(match sm {
34                    DecodedMap::Regular(sm) => DecodedMap::Regular(sm),
35                    DecodedMap::Index(smi) => DecodedMap::Regular(
36                        smi.flatten().map_err(SourceMapCacheErrorInner::SourceMap)?,
37                    ),
38                    DecodedMap::Hermes(smh) => DecodedMap::Hermes(smh),
39                })
40            },
41        )?;
42
43        let tokens = match &sm {
44            DecodedMap::Regular(sm) => sm.tokens(),
45            DecodedMap::Hermes(smh) => smh.tokens(),
46            DecodedMap::Index(_smi) => unreachable!(),
47        };
48
49        // Hermes/Metro SourceMaps have scope information embedded in them which we can use.
50        // In that case, we can skip parsing the minified source, which in most cases is empty / non-existent
51        // as Hermes ships bytecode that we are not able to parse anyway.
52        // Skipping this whole code would be nice, but that gets us into borrow-checker hell, so
53        // just clearing the minified source skips the whole code there anyways.
54        let source = if matches!(&sm, DecodedMap::Hermes(_)) {
55            ""
56        } else {
57            source
58        };
59
60        // parse scopes out of the minified source
61        let scopes = match extract_scope_names(source) {
62            Ok(scopes) => scopes,
63            Err(err) => {
64                let err: &dyn std::error::Error = &err;
65                tracing::error!(error = err, "failed parsing minified source");
66                // even if the minified source failed parsing, we can still use the information
67                // from the sourcemap itself.
68                vec![]
69            }
70        };
71
72        // resolve scopes to original names
73        let ctx = SourceContext::new(source).map_err(SourceMapCacheErrorInner::SourceContext)?;
74        let resolver = NameResolver::new(&ctx, &sm);
75
76        let scopes: Vec<_> = tracing::trace_span!("resolve original names").in_scope(|| {
77            scopes
78                .into_iter()
79                .map(|(range, name)| {
80                    let orig_name = name.as_ref().map(|name| name.to_string());
81                    let resolved_name = name
82                        .map(|n| resolver.resolve_name(&n))
83                        .filter(|s| !s.is_empty());
84
85                    // A hack specifically for Flutter. If the resolved scope name is the same as the original name,
86                    // that indicates that we probably couldn't resolve the scope. In that case, we find the name
87                    // at the very end of the scope, if it exists, and use it instead of the "conventionally"
88                    // resolved scope.
89                    let name_at_end_of_scope = if orig_name == resolved_name {
90                        Self::try_resolve_closing_name(&ctx, &sm, range.clone())
91                    } else {
92                        None
93                    };
94
95                    (range, name_at_end_of_scope.or(resolved_name))
96                })
97                .collect()
98        });
99
100        // convert our offset index to a source position index
101        let scope_index = ScopeIndex::new(scopes).map_err(SourceMapCacheErrorInner::ScopeIndex)?;
102        let scope_index: Vec<_> = tracing::trace_span!("convert scope index").in_scope(|| {
103            scope_index
104                .iter()
105                .filter_map(|(offset, result)| {
106                    let pos = ctx.offset_to_position(offset);
107                    pos.map(|pos| (pos, result))
108                })
109                .collect()
110        });
111        let lookup_scope = |sp: &SourcePosition| {
112            if let DecodedMap::Hermes(smh) = &sm {
113                let token = smh.lookup_token(sp.line, sp.column);
114                return match token.and_then(|token| smh.get_scope_for_token(token)) {
115                    Some(name) => ScopeLookupResult::NamedScope(name),
116                    None => ScopeLookupResult::Unknown,
117                };
118            }
119
120            let idx = match scope_index.binary_search_by_key(&sp, |idx| &idx.0) {
121                Ok(idx) => idx,
122                Err(0) => 0,
123                Err(idx) => idx - 1,
124            };
125            match scope_index.get(idx) {
126                Some(r) => r.1,
127                None => ScopeLookupResult::Unknown,
128            }
129        };
130
131        let orig_files = match &sm {
132            DecodedMap::Regular(sm) => sm.sources().zip_longest(sm.source_contents()),
133            DecodedMap::Hermes(smh) => smh.sources().zip_longest(smh.source_contents()),
134            DecodedMap::Index(_smi) => unreachable!(),
135        };
136
137        let mut string_table = StringTable::new();
138        let mut mappings = Vec::new();
139
140        let mut line_offsets = vec![];
141        let mut files = vec![];
142        tracing::trace_span!("extract original files").in_scope(|| {
143            for orig_file in orig_files {
144                let (name, source) = orig_file.or_default();
145
146                let name_offset = string_table.insert(name) as u32;
147
148                let source_offset =
149                    source.map_or(u32::MAX, |source| string_table.insert(source) as u32);
150
151                let line_offsets_start = line_offsets.len() as u32;
152                Self::append_line_offsets(source.unwrap_or_default(), &mut line_offsets);
153                let line_offsets_end = line_offsets.len() as u32;
154
155                files.push((
156                    name,
157                    raw::File {
158                        name_offset,
159                        source_offset,
160                        line_offsets_start,
161                        line_offsets_end,
162                    },
163                ));
164            }
165        });
166
167        // iterate over the tokens and create our index
168        let mut last = None;
169        tracing::trace_span!("create index").in_scope(|| {
170            for token in tokens {
171                let (min_line, min_col) = token.get_dst();
172                let sp = SourcePosition::new(min_line, min_col);
173                let line = token.get_src_line();
174                let column = token.get_src_col();
175                let scope = lookup_scope(&sp);
176                let mut file_idx = token.get_src_id();
177
178                if file_idx >= files.len() as u32 {
179                    file_idx = raw::NO_FILE_SENTINEL;
180                }
181
182                let scope_idx = match scope {
183                    ScopeLookupResult::NamedScope(name) => {
184                        std::cmp::min(string_table.insert(name) as u32, raw::GLOBAL_SCOPE_SENTINEL)
185                    }
186                    ScopeLookupResult::AnonymousScope => raw::ANONYMOUS_SCOPE_SENTINEL,
187                    ScopeLookupResult::Unknown => raw::GLOBAL_SCOPE_SENTINEL,
188                };
189
190                let name = token.get_name();
191                let name_idx = match name {
192                    Some(name) => string_table.insert(name) as u32,
193                    None => raw::NO_NAME_SENTINEL,
194                };
195
196                let sl = raw::OriginalSourceLocation {
197                    file_idx,
198                    line,
199                    column,
200                    name_idx,
201                    scope_idx,
202                };
203
204                if last == Some(sl) {
205                    continue;
206                }
207                mappings.push((
208                    raw::MinifiedSourcePosition {
209                        line: sp.line,
210                        column: sp.column,
211                    },
212                    sl,
213                ));
214                last = Some(sl);
215            }
216        });
217
218        let files = files.into_iter().map(|(_name, file)| file).collect();
219
220        Ok(Self {
221            string_table,
222            files,
223            line_offsets,
224            mappings,
225        })
226    }
227
228    /// Returns the name attached to the token at the given range's end, if any.
229    fn try_resolve_closing_name(
230        ctx: &SourceContext<&str>,
231        sourcemap: &DecodedMap,
232        range: Range<u32>,
233    ) -> Option<String> {
234        let sp = ctx.offset_to_position(range.end - 1)?;
235        let token = sourcemap.lookup_token(sp.line, sp.column)?;
236
237        // Validate that the token really is exactly at the scope's end
238        if token.get_dst() != (sp.line, sp.column) {
239            return None;
240        }
241
242        let sp_past_end = ctx.offset_to_position(range.end);
243        let token_past_end = sp_past_end.and_then(|sp| sourcemap.lookup_token(sp.line, sp.column));
244
245        // Validate that the token one past the scope's end (if it exists) is different
246        if token_past_end == Some(token) {
247            return None;
248        }
249
250        let token_name = token.get_name()?;
251        Some(token_name.to_owned())
252    }
253
254    /// Serialize the converted data.
255    ///
256    /// This writes the SourceMapCache binary format into the given [`Write`].
257    #[tracing::instrument(level = "trace", name = "SourceMapCacheWriter::serialize", skip_all)]
258    pub fn serialize<W: Write>(self, writer: &mut W) -> std::io::Result<()> {
259        let mut writer = Writer::new(writer);
260        let string_bytes = self.string_table.into_bytes();
261
262        let header = raw::Header {
263            magic: raw::SOURCEMAPCACHE_MAGIC,
264            version: raw::SOURCEMAPCACHE_VERSION,
265            num_mappings: self.mappings.len() as u32,
266            num_files: self.files.len() as u32,
267            num_line_offsets: self.line_offsets.len() as u32,
268            string_bytes: string_bytes.len() as u32,
269            _reserved: [0; 8],
270        };
271
272        writer.write_all(header.as_bytes())?;
273        writer.align_to(8)?;
274
275        for (min_sp, _) in &self.mappings {
276            writer.write_all(min_sp.as_bytes())?;
277        }
278        writer.align_to(8)?;
279
280        for (_, orig_sl) in self.mappings {
281            writer.write_all(orig_sl.as_bytes())?;
282        }
283        writer.align_to(8)?;
284
285        writer.write_all(self.files.as_bytes())?;
286        writer.align_to(8)?;
287
288        writer.write_all(self.line_offsets.as_bytes())?;
289        writer.align_to(8)?;
290
291        writer.write_all(&string_bytes)?;
292
293        Ok(())
294    }
295
296    /// Compute line offsets for a source file and append them to the given  vector.
297    ///
298    /// There is always one line offset at the start of the file (even if the file is empty)
299    /// and then another one after every newline (even if the file ends on a newline).
300    pub(crate) fn append_line_offsets(source: &str, out: &mut Vec<raw::LineOffset>) {
301        // The empty file has only one line offset for the start.
302        if source.is_empty() {
303            out.push(raw::LineOffset(0));
304            return;
305        }
306
307        let buf_ptr = source.as_ptr();
308        out.extend(source.lines().map(move |line| {
309            raw::LineOffset(unsafe { line.as_ptr().offset_from(buf_ptr) as usize } as u32)
310        }));
311
312        // If the file ends with a line break, add another line offset for the empty last line
313        // (the lines iterator skips it).
314        if source.ends_with('\n') {
315            out.push(raw::LineOffset(source.len() as u32));
316        }
317    }
318}
319
320/// An Error that can happen when building a [`SourceMapCache`](super::SourceMapCache).
321#[derive(Debug)]
322pub struct SourceMapCacheWriterError(SourceMapCacheErrorInner);
323
324impl From<SourceMapCacheErrorInner> for SourceMapCacheWriterError {
325    fn from(inner: SourceMapCacheErrorInner) -> Self {
326        SourceMapCacheWriterError(inner)
327    }
328}
329
330#[derive(Debug)]
331pub(crate) enum SourceMapCacheErrorInner {
332    SourceMap(sourcemap::Error),
333    ScopeIndex(ScopeIndexError),
334    SourceContext(SourceContextError),
335}
336
337impl std::error::Error for SourceMapCacheWriterError {}
338
339impl std::fmt::Display for SourceMapCacheWriterError {
340    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
341        match &self.0 {
342            SourceMapCacheErrorInner::SourceMap(e) => e.fmt(f),
343            SourceMapCacheErrorInner::ScopeIndex(e) => e.fmt(f),
344            SourceMapCacheErrorInner::SourceContext(e) => e.fmt(f),
345        }
346    }
347}
348
349#[cfg(test)]
350mod tests {
351
352    use super::*;
353    use crate::raw::LineOffset;
354
355    #[test]
356    fn line_offsets_empty_file() {
357        let source = "";
358        let mut line_offsets = Vec::new();
359        SourceMapCacheWriter::append_line_offsets(source, &mut line_offsets);
360
361        assert_eq!(line_offsets, [LineOffset(0)]);
362    }
363
364    #[test]
365    fn line_offsets_almost_empty_file() {
366        let source = "\n";
367        let mut line_offsets = Vec::new();
368        SourceMapCacheWriter::append_line_offsets(source, &mut line_offsets);
369
370        assert_eq!(line_offsets, [LineOffset(0), LineOffset(1)]);
371    }
372
373    #[test]
374    fn line_offsets_several_lines() {
375        let source = "a\n\nb\nc";
376        let mut line_offsets = Vec::new();
377        SourceMapCacheWriter::append_line_offsets(source, &mut line_offsets);
378
379        assert_eq!(
380            line_offsets,
381            [LineOffset(0), LineOffset(2), LineOffset(3), LineOffset(5),]
382        );
383    }
384
385    #[test]
386    fn line_offsets_several_lines_trailing_newline() {
387        let source = "a\n\nb\nc\n";
388        let mut line_offsets = Vec::new();
389        SourceMapCacheWriter::append_line_offsets(source, &mut line_offsets);
390
391        assert_eq!(
392            line_offsets,
393            [
394                LineOffset(0),
395                LineOffset(2),
396                LineOffset(3),
397                LineOffset(5),
398                LineOffset(7),
399            ]
400        );
401    }
402}