Skip to main content

swc_common/
input.rs

1use std::str;
2
3use crate::syntax_pos::{BytePos, SourceFile};
4
5pub type SourceFileInput<'a> = StringInput<'a>;
6
7/// Implementation of [Input].
8#[derive(Clone)]
9pub struct StringInput<'a> {
10    last_pos: BytePos,
11    /// Remaining input as str - we slice this as we consume bytes
12    remaining: &'a str,
13    orig: &'a str,
14    /// Original start position.
15    orig_start: BytePos,
16    orig_end: BytePos,
17}
18
19impl<'a> StringInput<'a> {
20    /// `start` and `end` can be arbitrary value, but start should be less than
21    /// or equal to end.
22    ///
23    ///
24    /// `swc` get this value from [SourceMap] because code generator depends on
25    /// some methods of [SourceMap].
26    /// If you are not going to use methods from
27    /// [SourceMap], you may use any value.
28    pub fn new(src: &'a str, start: BytePos, end: BytePos) -> Self {
29        assert!(start <= end);
30
31        StringInput {
32            last_pos: start,
33            orig: src,
34            remaining: src,
35            orig_start: start,
36            orig_end: end,
37        }
38    }
39
40    #[inline(always)]
41    pub fn as_str(&self) -> &str {
42        self.remaining
43    }
44
45    #[inline(always)]
46    /// Compared to [StringInput::slice], this function doesn't set
47    /// `self.last_pos = end` because in most cases this property has been
48    /// satisfied but the compiler cannot optimize it.
49    ///
50    /// Caution: This function should only be used internally and will be
51    /// changed in the future.
52    ///
53    /// # Safety
54    /// - start should be less than or equal to end.
55    /// - start and end should be in the valid range of input.
56    pub unsafe fn slice_str(&self, start: BytePos, end: BytePos) -> &'a str {
57        debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}");
58        let s = self.orig;
59
60        let start_idx = (start - self.orig_start).0 as usize;
61        let end_idx = (end - self.orig_start).0 as usize;
62
63        debug_assert!(end_idx <= s.len());
64
65        let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
66
67        ret
68    }
69
70    pub fn start_pos(&self) -> BytePos {
71        self.orig_start
72    }
73
74    #[inline(always)]
75    pub fn end_pos(&self) -> BytePos {
76        self.orig_end
77    }
78}
79
80/// Creates an [Input] from [SourceFile]. This is an alias for
81///
82/// ```ignore
83///    StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
84/// ```
85impl<'a> From<&'a SourceFile> for StringInput<'a> {
86    fn from(fm: &'a SourceFile) -> Self {
87        StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
88    }
89}
90
91impl<'a> Input<'a> for StringInput<'a> {
92    #[inline]
93    fn cur(&self) -> Option<u8> {
94        self.remaining.as_bytes().first().copied()
95    }
96
97    #[inline]
98    fn peek(&self) -> Option<u8> {
99        self.remaining.as_bytes().get(1).copied()
100    }
101
102    #[inline]
103    fn peek_ahead(&self) -> Option<u8> {
104        self.remaining.as_bytes().get(2).copied()
105    }
106
107    #[inline]
108    unsafe fn bump_bytes(&mut self, n: usize) {
109        debug_assert!(n <= self.remaining.len());
110        self.remaining = unsafe { self.remaining.get_unchecked(n..) };
111        self.last_pos.0 += n as u32;
112    }
113
114    #[inline]
115    fn cur_as_ascii(&self) -> Option<u8> {
116        let first_byte = *self.remaining.as_bytes().first()?;
117        if first_byte <= 0x7f {
118            Some(first_byte)
119        } else {
120            None
121        }
122    }
123
124    #[inline]
125    fn cur_as_char(&self) -> Option<char> {
126        self.remaining.chars().next()
127    }
128
129    #[inline]
130    fn is_at_start(&self) -> bool {
131        self.orig_start == self.last_pos
132    }
133
134    /// TODO(kdy1): Remove this?
135    #[inline]
136    fn cur_pos(&self) -> BytePos {
137        self.last_pos
138    }
139
140    #[inline]
141    fn last_pos(&self) -> BytePos {
142        self.last_pos
143    }
144
145    #[inline]
146    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
147        debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}");
148        let s = self.orig;
149
150        let start_idx = (start - self.orig_start).0 as usize;
151        let end_idx = (end - self.orig_start).0 as usize;
152
153        debug_assert!(end_idx <= s.len());
154
155        let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
156
157        self.remaining = unsafe { s.get_unchecked(end_idx..) };
158        self.last_pos = end;
159
160        ret
161    }
162
163    #[inline]
164    fn uncons_while<F>(&mut self, mut pred: F) -> &'a str
165    where
166        F: FnMut(char) -> bool,
167    {
168        let last = {
169            let mut last = 0;
170            for c in self.remaining.chars() {
171                if pred(c) {
172                    last += c.len_utf8();
173                } else {
174                    break;
175                }
176            }
177            last
178        };
179
180        debug_assert!(last <= self.remaining.len());
181        let ret = unsafe { self.remaining.get_unchecked(..last) };
182
183        self.last_pos = self.last_pos + BytePos(last as _);
184        self.remaining = unsafe { self.remaining.get_unchecked(last..) };
185
186        ret
187    }
188
189    #[inline]
190    unsafe fn reset_to(&mut self, to: BytePos) {
191        if self.last_pos == to {
192            // No need to reset.
193            return;
194        }
195
196        let orig = self.orig;
197        let idx = (to - self.orig_start).0 as usize;
198
199        debug_assert!(idx <= orig.len());
200        self.remaining = unsafe { orig.get_unchecked(idx..) };
201        self.last_pos = to;
202    }
203
204    #[inline]
205    fn is_byte(&self, c: u8) -> bool {
206        self.remaining
207            .as_bytes()
208            .first()
209            .map(|b| *b == c)
210            .unwrap_or(false)
211    }
212
213    #[inline]
214    fn is_str(&self, s: &str) -> bool {
215        self.remaining.starts_with(s)
216    }
217
218    #[inline]
219    fn eat_byte(&mut self, c: u8) -> bool {
220        if self.is_byte(c) {
221            self.remaining = unsafe { self.remaining.get_unchecked(1..) };
222            self.last_pos = self.last_pos + BytePos(1_u32);
223            true
224        } else {
225            false
226        }
227    }
228}
229
230pub trait Input<'a>: Clone {
231    /// Returns the current byte. Returns [None] if at end of input.
232    fn cur(&self) -> Option<u8>;
233
234    /// Returns the next byte without consuming the current byte.
235    fn peek(&self) -> Option<u8>;
236
237    /// Returns the byte after the next byte without consuming anything.
238    fn peek_ahead(&self) -> Option<u8>;
239
240    /// Advances the input by exactly `n` bytes.
241    /// Unlike `bump()`, this does not calculate UTF-8 character boundaries.
242    ///
243    /// # Safety
244    ///
245    /// - This should be called only when `cur()` returns `Some`. i.e. when the
246    ///   Input is not empty.
247    /// - `n` should be the number of bytes of the current character.
248    unsafe fn bump_bytes(&mut self, n: usize);
249
250    /// Returns the current byte as ASCII if it's valid ASCII (0x00-0x7F).
251    /// Returns [None] if it's end of input or if the byte is not ASCII.
252    #[inline]
253    fn cur_as_ascii(&self) -> Option<u8> {
254        self.cur()
255            .and_then(|b| if b <= 0x7f { Some(b) } else { None })
256    }
257
258    /// Returns the current position as a UTF-8 char for cases where we need
259    /// full character processing (identifiers, strings, etc).
260    /// Returns [None] if at end of input or if the bytes don't form valid
261    /// UTF-8.
262    fn cur_as_char(&self) -> Option<char>;
263
264    fn is_at_start(&self) -> bool;
265
266    fn cur_pos(&self) -> BytePos;
267
268    fn last_pos(&self) -> BytePos;
269
270    /// # Safety
271    ///
272    /// - start should be less than or equal to end.
273    /// - start and end should be in the valid range of input.
274    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str;
275
276    /// Takes items from stream, testing each one with predicate. returns the
277    /// range of items which passed predicate.
278    fn uncons_while<F>(&mut self, f: F) -> &'a str
279    where
280        F: FnMut(char) -> bool;
281
282    /// # Safety
283    ///
284    /// - `to` be in the valid range of input.
285    unsafe fn reset_to(&mut self, to: BytePos);
286
287    /// Check if the current byte equals the given byte.
288    /// `c` should typically be an ASCII byte for performance.
289    #[inline]
290    #[allow(clippy::wrong_self_convention)]
291    fn is_byte(&self, c: u8) -> bool {
292        self.cur() == Some(c)
293    }
294
295    /// Implementors can override the method to make it faster.
296    ///
297    /// `s` must be ASCII only.
298    fn is_str(&self, s: &str) -> bool;
299
300    /// Implementors can override the method to make it faster.
301    ///
302    /// `c` must be ASCII.
303    #[inline]
304    fn eat_byte(&mut self, c: u8) -> bool {
305        if self.is_byte(c) {
306            unsafe {
307                // Safety: We are sure that the input is not empty
308                self.bump_bytes(1);
309            }
310            true
311        } else {
312            false
313        }
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320    use crate::{sync::Lrc, FileName, FilePathMapping, SourceMap};
321
322    fn with_test_sess<F>(src: &'static str, f: F)
323    where
324        F: FnOnce(StringInput<'_>),
325    {
326        let cm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
327        let fm = cm.new_source_file(FileName::Real("testing".into()).into(), src);
328
329        f((&*fm).into())
330    }
331
332    #[test]
333    fn src_input_slice_1() {
334        with_test_sess("foo/d", |mut i| {
335            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(2)) }, "f");
336            assert_eq!(i.last_pos, BytePos(2));
337            assert_eq!(i.cur(), Some(b'o'));
338
339            assert_eq!(unsafe { i.slice(BytePos(2), BytePos(4)) }, "oo");
340            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(4)) }, "foo");
341            assert_eq!(i.last_pos, BytePos(4));
342            assert_eq!(i.cur(), Some(b'/'));
343        });
344    }
345
346    #[test]
347    fn src_input_reset_to_1() {
348        with_test_sess("load", |mut i| {
349            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(3)) }, "lo");
350            assert_eq!(i.last_pos, BytePos(3));
351            assert_eq!(i.cur(), Some(b'a'));
352            unsafe { i.reset_to(BytePos(1)) };
353
354            assert_eq!(i.cur(), Some(b'l'));
355            assert_eq!(i.last_pos, BytePos(1));
356        });
357    }
358
359    #[test]
360    fn src_input_smoke_01() {
361        with_test_sess("foo/d", |mut i| {
362            assert_eq!(i.cur_pos(), BytePos(1));
363            assert_eq!(i.last_pos, BytePos(1));
364            assert_eq!(i.uncons_while(|c| c.is_alphabetic()), "foo");
365
366            // assert_eq!(i.cur_pos(), BytePos(4));
367            assert_eq!(i.last_pos, BytePos(4));
368            assert_eq!(i.cur(), Some(b'/'));
369
370            unsafe {
371                i.bump_bytes(1);
372            }
373            assert_eq!(i.last_pos, BytePos(5));
374            assert_eq!(i.cur(), Some(b'd'));
375
376            unsafe {
377                i.bump_bytes(1);
378            }
379            assert_eq!(i.last_pos, BytePos(6));
380            assert_eq!(i.cur(), None);
381        });
382    }
383
384    // #[test]
385    // fn src_input_find_01() {
386    //     with_test_sess("foo/d", |mut i| {
387    //         assert_eq!(i.cur_pos(), BytePos(1));
388    //         assert_eq!(i.last_pos, BytePos(1));
389
390    //         assert_eq!(i.find(|c| c == '/'), Some(BytePos(5)));
391    //         assert_eq!(i.last_pos, BytePos(5));
392    //         assert_eq!(i.cur(), Some('d'));
393    //     });
394    // }
395
396    //    #[test]
397    //    fn src_input_smoke_02() {
398    //        let _ = crate::with_test_sess("℘℘/℘℘", | mut i| {
399    //            assert_eq!(i.iter.as_str(), "℘℘/℘℘");
400    //            assert_eq!(i.cur_pos(), BytePos(0));
401    //            assert_eq!(i.last_pos, BytePos(0));
402    //            assert_eq!(i.start_pos, BytePos(0));
403    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
404    //
405    //            assert_eq!(i.iter.as_str(), "/℘℘");
406    //            assert_eq!(i.last_pos, BytePos(6));
407    //            assert_eq!(i.start_pos, BytePos(6));
408    //            assert_eq!(i.cur(), Some('/'));
409    //            i.bump();
410    //            assert_eq!(i.last_pos, BytePos(7));
411    //            assert_eq!(i.start_pos, BytePos(6));
412    //
413    //            assert_eq!(i.iter.as_str(), "℘℘");
414    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
415    //            assert_eq!(i.last_pos, BytePos(13));
416    //            assert_eq!(i.start_pos, BytePos(13));
417    //
418    //            Ok(())
419    //        });
420    //    }
421}