sourcemap/
sourceview.rs

1use std::fmt;
2use std::str;
3use std::sync::Arc;
4use std::sync::Mutex;
5
6use if_chain::if_chain;
7
8use crate::detector::{locate_sourcemap_reference_slice, SourceMapRef};
9use crate::errors::Result;
10use crate::js_identifiers::{get_javascript_token, is_valid_javascript_identifier};
11use crate::types::Token;
12
13/// An iterator that iterates over tokens in reverse.
14pub struct RevTokenIter<'view, 'map> {
15    sv: &'view SourceView,
16    token: Option<Token<'map>>,
17    source_line: Option<(&'view str, usize, usize, usize)>,
18}
19
20impl<'view, 'map> Iterator for RevTokenIter<'view, 'map> {
21    type Item = (Token<'map>, Option<&'view str>);
22
23    fn next(&mut self) -> Option<(Token<'map>, Option<&'view str>)> {
24        let token = self.token.take()?;
25        let idx = token.idx;
26
27        if idx > 0 {
28            self.token = token.sm.get_token(idx - 1);
29        }
30
31        // if we are going to the same line as we did last iteration, we don't have to scan
32        // up to it again.  For normal sourcemaps this should mean we only ever go to the
33        // line once.
34        let (source_line, last_char_offset, last_byte_offset) = if_chain! {
35            if let Some((source_line, dst_line, last_char_offset,
36                         last_byte_offset)) = self.source_line;
37
38            if dst_line == token.get_dst_line() as usize;
39            then {
40                (source_line, last_char_offset, last_byte_offset)
41            } else {
42                if let Some(source_line) = self.sv.get_line(token.get_dst_line()) {
43                    (source_line, !0, !0)
44                } else {
45                    // if we can't find the line, return am empty one
46                    ("", !0, !0)
47                }
48            }
49        };
50
51        // find the byte offset where our token starts
52        let byte_offset = if last_byte_offset == !0 {
53            let mut off = 0;
54            let mut idx = 0;
55            for c in source_line.chars() {
56                if idx >= token.get_dst_col() as usize {
57                    break;
58                }
59                off += c.len_utf8();
60                idx += c.len_utf16();
61            }
62            off
63        } else {
64            let chars_to_move = last_char_offset - token.get_dst_col() as usize;
65            let mut new_offset = last_byte_offset;
66            let mut idx = 0;
67            for c in source_line
68                .get(..last_byte_offset)
69                .unwrap_or("")
70                .chars()
71                .rev()
72            {
73                if idx >= chars_to_move {
74                    break;
75                }
76                new_offset -= c.len_utf8();
77                idx += c.len_utf16();
78            }
79            new_offset
80        };
81
82        // remember where we were
83        self.source_line = Some((
84            source_line,
85            token.get_dst_line() as usize,
86            token.get_dst_col() as usize,
87            byte_offset,
88        ));
89
90        // in case we run out of bounds here we reset the cache
91        if byte_offset >= source_line.len() {
92            self.source_line = None;
93            Some((token, None))
94        } else {
95            Some((
96                token,
97                source_line
98                    .get(byte_offset..)
99                    .and_then(get_javascript_token),
100            ))
101        }
102    }
103}
104
105pub struct Lines<'a> {
106    sv: &'a SourceView,
107    idx: u32,
108}
109
110impl<'a> Iterator for Lines<'a> {
111    type Item = &'a str;
112
113    fn next(&mut self) -> Option<&'a str> {
114        if let Some(line) = self.sv.get_line(self.idx) {
115            self.idx += 1;
116            Some(line)
117        } else {
118            None
119        }
120    }
121}
122
123/// Provides efficient access to minified sources.
124///
125/// This type is used to implement fairly efficient source mapping
126/// operations.
127pub struct SourceView {
128    source: Arc<str>,
129    line_end_offsets: Mutex<Vec<LineEndOffset>>,
130}
131
132impl Clone for SourceView {
133    fn clone(&self) -> SourceView {
134        SourceView {
135            source: self.source.clone(),
136            line_end_offsets: Mutex::new(vec![]),
137        }
138    }
139}
140
141impl fmt::Debug for SourceView {
142    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
143        f.debug_struct("SourceView")
144            .field("source", &self.source())
145            .finish()
146    }
147}
148
149impl PartialEq for SourceView {
150    fn eq(&self, other: &Self) -> bool {
151        self.source == other.source
152    }
153}
154
155impl SourceView {
156    /// Creates an optimized view of a given source.
157    pub fn new(source: Arc<str>) -> SourceView {
158        SourceView {
159            source,
160            line_end_offsets: Mutex::new(vec![]),
161        }
162    }
163
164    /// Creates an optimized view from a given source string
165    pub fn from_string(source: String) -> SourceView {
166        SourceView {
167            source: source.into(),
168            line_end_offsets: Mutex::new(vec![]),
169        }
170    }
171
172    /// Returns a requested minified line.
173    pub fn get_line(&self, idx: u32) -> Option<&str> {
174        let idx = idx as usize;
175
176        let get_from_line_ends = |line_ends: &[LineEndOffset]| {
177            let end = line_ends.get(idx)?.to_end_index();
178            let start = if idx == 0 {
179                0
180            } else {
181                line_ends[idx - 1].to_start_index()
182            };
183            Some(&self.source[start..end])
184        };
185
186        let mut line_ends = self
187            .line_end_offsets
188            .lock()
189            .unwrap_or_else(|e| e.into_inner());
190
191        if let Some(line) = get_from_line_ends(&line_ends) {
192            return Some(line);
193        }
194
195        // check whether we've processed the entire string - the end of the
196        // last-processed line would be the same as the end of the string
197        if line_ends
198            .last()
199            .is_some_and(|i| i.to_end_index() == self.source.len())
200        {
201            return None;
202        }
203
204        let mut rest_offset = line_ends.last().map_or(0, |i| i.to_start_index());
205        let mut rest = &self.source[rest_offset..];
206        let mut done = false;
207
208        while !done {
209            let line_term = if let Some(idx) = rest.find(['\n', '\r']) {
210                rest_offset += idx;
211                rest = &rest[idx..];
212                if rest.starts_with("\r\n") {
213                    LineTerminator::CrLf
214                } else {
215                    LineTerminator::LfOrCr
216                }
217            } else {
218                rest_offset += rest.len();
219                rest = &rest[rest.len()..];
220                done = true;
221                LineTerminator::Eof
222            };
223
224            line_ends.push(LineEndOffset::new(rest_offset, line_term));
225            rest_offset += line_term as usize;
226            rest = &rest[line_term as usize..];
227            if let Some(line) = get_from_line_ends(&line_ends) {
228                return Some(line);
229            }
230        }
231
232        None
233    }
234
235    /// Returns a line slice.
236    ///
237    /// Note that columns are indexed as JavaScript WTF-16 columns.
238    pub fn get_line_slice(&self, line: u32, col: u32, span: u32) -> Option<&str> {
239        self.get_line(line).and_then(|line| {
240            let mut off = 0;
241            let mut idx = 0;
242            let mut char_iter = line.chars().peekable();
243
244            while let Some(&c) = char_iter.peek() {
245                if idx >= col as usize {
246                    break;
247                }
248                char_iter.next();
249                off += c.len_utf8();
250                idx += c.len_utf16();
251            }
252
253            let mut off_end = off;
254            for c in char_iter {
255                if idx >= (col + span) as usize {
256                    break;
257                }
258                off_end += c.len_utf8();
259                idx += c.len_utf16();
260            }
261
262            if idx < ((col + span) as usize) {
263                None
264            } else {
265                line.get(off..off_end)
266            }
267        })
268    }
269
270    /// Returns an iterator over all lines.
271    pub fn lines(&self) -> Lines<'_> {
272        Lines { sv: self, idx: 0 }
273    }
274
275    /// Returns the source.
276    pub fn source(&self) -> &str {
277        &self.source
278    }
279
280    fn rev_token_iter<'this, 'map>(&'this self, token: Token<'map>) -> RevTokenIter<'this, 'map> {
281        RevTokenIter {
282            sv: self,
283            token: Some(token),
284            source_line: None,
285        }
286    }
287
288    /// Given a token and minified function name this attemps to resolve the
289    /// name to an original function name.
290    ///
291    /// This invokes some guesswork and requires access to the original minified
292    /// source.  This will not yield proper results for anonymous functions or
293    /// functions that do not have clear function names.  (For instance it's
294    /// recommended that dotted function names are not passed to this
295    /// function).
296    pub fn get_original_function_name<'map>(
297        &self,
298        token: Token<'map>,
299        minified_name: &str,
300    ) -> Option<&'map str> {
301        if !is_valid_javascript_identifier(minified_name) {
302            return None;
303        }
304
305        let mut iter = self.rev_token_iter(token).take(128).peekable();
306
307        while let Some((token, original_identifier)) = iter.next() {
308            if_chain! {
309                if original_identifier == Some(minified_name);
310                if let Some(item) = iter.peek();
311                if item.1 == Some("function");
312                then {
313                    return token.get_name();
314                }
315            }
316        }
317
318        None
319    }
320
321    /// Returns the number of lines.
322    pub fn line_count(&self) -> usize {
323        self.get_line(!0);
324        self.line_end_offsets.lock().unwrap().len()
325    }
326
327    /// Returns the source map reference in the source view.
328    pub fn sourcemap_reference(&self) -> Result<Option<SourceMapRef>> {
329        locate_sourcemap_reference_slice(self.source.as_bytes())
330    }
331}
332
333/// A wrapper around an index that stores a [`LineTerminator`] in its 2 lowest bits.
334// We use `u64` instead of `usize` in order to not lose data when bit-packing
335// on 32-bit targets.
336#[derive(Clone, Copy)]
337struct LineEndOffset(u64);
338
339#[derive(Clone, Copy)]
340enum LineTerminator {
341    Eof = 0,
342    LfOrCr = 1,
343    CrLf = 2,
344}
345
346impl LineEndOffset {
347    fn new(index: usize, line_end: LineTerminator) -> Self {
348        let shifted = (index as u64) << 2;
349
350        Self(shifted | line_end as u64)
351    }
352
353    /// Return the index of the end of this line.
354    fn to_end_index(self) -> usize {
355        (self.0 >> 2) as usize
356    }
357
358    /// Return the index of the start of the next line.
359    fn to_start_index(self) -> usize {
360        self.to_end_index() + (self.0 & 0b11) as usize
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367
368    #[test]
369    #[allow(clippy::cognitive_complexity)]
370    fn test_minified_source_view() {
371        let view = SourceView::new("a\nb\nc".into());
372        assert_eq!(view.get_line(0), Some("a"));
373        assert_eq!(view.get_line(0), Some("a"));
374        assert_eq!(view.get_line(2), Some("c"));
375        assert_eq!(view.get_line(1), Some("b"));
376        assert_eq!(view.get_line(3), None);
377
378        assert_eq!(view.line_count(), 3);
379
380        let view = SourceView::new("a\r\nb\r\nc".into());
381        assert_eq!(view.get_line(0), Some("a"));
382        assert_eq!(view.get_line(0), Some("a"));
383        assert_eq!(view.get_line(2), Some("c"));
384        assert_eq!(view.get_line(1), Some("b"));
385        assert_eq!(view.get_line(3), None);
386
387        assert_eq!(view.line_count(), 3);
388
389        let view = SourceView::new("abc👌def\nblah".into());
390        assert_eq!(view.get_line_slice(0, 0, 3), Some("abc"));
391        assert_eq!(view.get_line_slice(0, 3, 1), Some("👌"));
392        assert_eq!(view.get_line_slice(0, 3, 2), Some("👌"));
393        assert_eq!(view.get_line_slice(0, 3, 3), Some("👌d"));
394        assert_eq!(view.get_line_slice(0, 0, 4), Some("abc👌"));
395        assert_eq!(view.get_line_slice(0, 0, 5), Some("abc👌"));
396        assert_eq!(view.get_line_slice(0, 0, 6), Some("abc👌d"));
397        assert_eq!(view.get_line_slice(1, 0, 4), Some("blah"));
398        assert_eq!(view.get_line_slice(1, 0, 5), None);
399        assert_eq!(view.get_line_slice(1, 0, 12), None);
400
401        let view = SourceView::new("a\nb\nc\n".into());
402        assert_eq!(view.get_line(0), Some("a"));
403        assert_eq!(view.get_line(1), Some("b"));
404        assert_eq!(view.get_line(2), Some("c"));
405        assert_eq!(view.get_line(3), Some(""));
406        assert_eq!(view.get_line(4), None);
407
408        fn is_send<T: Send>() {}
409        fn is_sync<T: Sync>() {}
410        is_send::<SourceView>();
411        is_sync::<SourceView>();
412    }
413}