Skip to main content

swc_ecma_parser/parser/
input.rs

1use swc_atoms::{Atom, Wtf8Atom};
2use swc_common::{BytePos, Span};
3use swc_ecma_ast::EsVersion;
4
5use crate::{
6    error::Error,
7    lexer::{LexResult, NextTokenAndSpan, Token, TokenAndSpan, TokenFlags, TokenValue},
8    syntax::SyntaxFlags,
9    Context,
10};
11
12/// Clone should be cheap if you are parsing typescript because typescript
13/// syntax requires backtracking.
14pub trait Tokens: Clone {
15    type Checkpoint;
16
17    fn set_ctx(&mut self, ctx: Context);
18    fn ctx(&self) -> Context;
19    fn ctx_mut(&mut self) -> &mut Context;
20    fn syntax(&self) -> SyntaxFlags;
21    fn target(&self) -> EsVersion;
22
23    fn checkpoint_save(&self) -> Self::Checkpoint;
24    fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint);
25
26    fn read_string(&self, span: Span) -> &str;
27
28    fn start_pos(&self) -> BytePos {
29        BytePos(0)
30    }
31
32    fn set_expr_allowed(&mut self, allow: bool);
33    fn set_next_regexp(&mut self, start: Option<BytePos>);
34
35    /// Implementors should use Rc<RefCell<Vec<Error>>>.
36    ///
37    /// It is required because parser should backtrack while parsing typescript
38    /// code.
39    fn add_error(&mut self, error: Error);
40
41    /// Add an error which is valid syntax in script mode.
42    ///
43    /// This errors should be dropped if it's not a module.
44    ///
45    /// Implementor should check for if [Context].module, and buffer errors if
46    /// module is false. Also, implementors should move errors to the error
47    /// buffer on set_ctx if the parser mode become module mode.
48    fn add_module_mode_error(&mut self, error: Error);
49
50    fn end_pos(&self) -> BytePos;
51
52    fn take_errors(&mut self) -> Vec<Error>;
53
54    /// If the program was parsed as a script, this contains the module
55    /// errors should the program be identified as a module in the future.
56    fn take_script_module_errors(&mut self) -> Vec<Error>;
57    fn update_token_flags(&mut self, f: impl FnOnce(&mut TokenFlags));
58    fn token_flags(&self) -> TokenFlags;
59
60    fn clone_token_value(&self) -> Option<TokenValue>;
61    fn take_token_value(&mut self) -> Option<TokenValue>;
62    fn get_token_value(&self) -> Option<&TokenValue>;
63    fn set_token_value(&mut self, token_value: Option<TokenValue>);
64
65    /// Returns the first token in the file.
66    ///
67    /// This function should only be called at the first time of bump.
68    /// It's mainly used for shebang.
69    fn first_token(&mut self) -> TokenAndSpan;
70    /// Returns the next token from the input stream.
71    ///
72    /// This method always returns a `TokenAndSpan`. When the end of input is
73    /// reached, it returns `Token::Eof`, and subsequent calls will continue
74    /// returning `Token::Eof`.
75    fn next_token(&mut self) -> TokenAndSpan;
76    fn scan_jsx_token(&mut self) -> TokenAndSpan;
77    fn scan_jsx_open_el_terminal_token(&mut self) -> TokenAndSpan;
78    fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> TokenAndSpan;
79    fn rescan_jsx_token(&mut self, reset: BytePos) -> TokenAndSpan;
80    fn scan_jsx_identifier(&mut self, start: BytePos) -> TokenAndSpan;
81    fn scan_jsx_attribute_value(&mut self) -> TokenAndSpan;
82    fn rescan_template_token(&mut self, start: BytePos, start_with_back_tick: bool)
83        -> TokenAndSpan;
84}
85
86/// This struct is responsible for managing current token and peeked token.
87#[derive(Clone)]
88pub struct Buffer<I> {
89    pub iter: I,
90    /// Span of the previous token.
91    pub prev_span: Span,
92    pub cur: TokenAndSpan,
93    /// Peeked token
94    pub next: Option<NextTokenAndSpan>,
95}
96
97impl<I: Tokens> Buffer<I> {
98    pub fn expect_word_token_value(&mut self) -> Atom {
99        let Some(crate::lexer::TokenValue::Word(word)) = self.iter.take_token_value() else {
100            unreachable!()
101        };
102        word
103    }
104
105    pub fn expect_word_token_value_ref(&self) -> &Atom {
106        let Some(crate::lexer::TokenValue::Word(word)) = self.iter.get_token_value() else {
107            unreachable!("token_value: {:?}", self.iter.get_token_value())
108        };
109        word
110    }
111
112    pub fn expect_number_token_value(&mut self) -> f64 {
113        let Some(crate::lexer::TokenValue::Num(value)) = self.iter.take_token_value() else {
114            unreachable!()
115        };
116        value
117    }
118
119    pub fn expect_string_token_value(&mut self) -> Wtf8Atom {
120        let Some(crate::lexer::TokenValue::Str(value)) = self.iter.take_token_value() else {
121            unreachable!()
122        };
123        value
124    }
125
126    pub fn expect_jsx_text_token_value(&mut self) -> Atom {
127        let Some(crate::lexer::TokenValue::JsxText(value)) = self.iter.take_token_value() else {
128            unreachable!()
129        };
130        value
131    }
132
133    pub fn expect_bigint_token_value(&mut self) -> Box<num_bigint::BigInt> {
134        let Some(crate::lexer::TokenValue::BigInt(value)) = self.iter.take_token_value() else {
135            unreachable!()
136        };
137        value
138    }
139
140    pub fn expect_regex_token_value(&mut self) -> BytePos {
141        let Some(crate::lexer::TokenValue::Regex(exp_end)) = self.iter.take_token_value() else {
142            unreachable!()
143        };
144        exp_end
145    }
146
147    pub fn expect_template_token_value(&mut self) -> LexResult<Wtf8Atom> {
148        let Some(crate::lexer::TokenValue::Template(cooked)) = self.iter.take_token_value() else {
149            unreachable!()
150        };
151        cooked
152    }
153
154    pub fn expect_error_token_value(&mut self) -> Error {
155        let Some(crate::lexer::TokenValue::Error(error)) = self.iter.take_token_value() else {
156            unreachable!()
157        };
158        error
159    }
160
161    pub fn get_token_value(&self) -> Option<&TokenValue> {
162        self.iter.get_token_value()
163    }
164
165    pub fn scan_jsx_token(&mut self) {
166        let prev = self.cur;
167        let t = self.iter.scan_jsx_token();
168        self.prev_span = prev.span;
169        self.set_cur(t);
170    }
171
172    #[allow(unused)]
173    fn scan_jsx_open_el_terminal_token(&mut self) {
174        let prev = self.cur;
175        let t = self.iter.scan_jsx_open_el_terminal_token();
176        self.prev_span = prev.span;
177        self.set_cur(t);
178    }
179
180    pub fn rescan_jsx_open_el_terminal_token(&mut self) {
181        if !self.cur().should_rescan_into_gt_in_jsx() {
182            return;
183        }
184        // rescan `>=`, `>>`, `>>=`, `>>>`, `>>>=` into `>`
185        let start = self.cur.span.lo;
186        let t = self.iter.rescan_jsx_open_el_terminal_token(start);
187        self.set_cur(t);
188    }
189
190    pub fn rescan_jsx_token(&mut self) {
191        let start = self.cur.span.lo;
192        let t = self.iter.rescan_jsx_token(start);
193        self.set_cur(t);
194    }
195
196    pub fn scan_jsx_identifier(&mut self) {
197        if !self.cur().is_word() {
198            return;
199        }
200        let start = self.cur.span.lo;
201        let cur = self.iter.scan_jsx_identifier(start);
202        debug_assert!(cur.token == Token::JSXName);
203        self.set_cur(cur);
204    }
205
206    pub fn scan_jsx_attribute_value(&mut self) {
207        self.cur = self.iter.scan_jsx_attribute_value();
208    }
209
210    pub fn rescan_template_token(&mut self, start_with_back_tick: bool) {
211        let start = self.cur_pos();
212        self.cur = self.iter.rescan_template_token(start, start_with_back_tick);
213    }
214}
215
216impl<I: Tokens> Buffer<I> {
217    pub fn new(lexer: I) -> Self {
218        let start_pos = lexer.start_pos();
219        let prev_span = Span::new_with_checked(start_pos, start_pos);
220        Buffer {
221            iter: lexer,
222            cur: TokenAndSpan::new(Token::Eof, prev_span, false),
223            prev_span,
224            next: None,
225        }
226    }
227
228    #[inline(always)]
229    pub fn set_cur(&mut self, token: TokenAndSpan) {
230        self.cur = token
231    }
232
233    #[inline(always)]
234    pub fn next(&self) -> Option<&NextTokenAndSpan> {
235        self.next.as_ref()
236    }
237
238    #[inline(always)]
239    pub fn set_next(&mut self, token: Option<NextTokenAndSpan>) {
240        self.next = token;
241    }
242
243    #[inline(always)]
244    pub fn next_mut(&mut self) -> &mut Option<NextTokenAndSpan> {
245        &mut self.next
246    }
247
248    #[inline(always)]
249    pub fn cur(&self) -> Token {
250        self.cur.token
251    }
252
253    #[inline(always)]
254    pub fn get_cur(&self) -> &TokenAndSpan {
255        &self.cur
256    }
257
258    #[inline(always)]
259    pub fn prev_span(&self) -> Span {
260        self.prev_span
261    }
262
263    #[inline(always)]
264    pub fn iter(&self) -> &I {
265        &self.iter
266    }
267
268    #[inline(always)]
269    pub fn iter_mut(&mut self) -> &mut I {
270        &mut self.iter
271    }
272
273    pub fn peek(&mut self) -> Option<Token> {
274        debug_assert!(
275            self.cur.token != Token::Eof,
276            "parser should not call peek() without knowing current token"
277        );
278
279        if self.next.is_none() {
280            let old = self.iter.take_token_value();
281            let next_token = self.iter.next_token();
282            self.next = Some(NextTokenAndSpan {
283                token_and_span: next_token,
284                value: self.iter.take_token_value(),
285            });
286            self.iter.set_token_value(old);
287        }
288
289        self.next.as_ref().map(|ts| ts.token_and_span.token)
290    }
291
292    pub fn store(&mut self, token: Token) {
293        debug_assert!(self.next().is_none());
294        debug_assert!(self.cur() != Token::Eof);
295        let span = self.prev_span();
296        let token = TokenAndSpan::new(token, span, false);
297        self.set_cur(token);
298    }
299
300    pub fn first_bump(&mut self) {
301        let first_token = self.iter.first_token();
302        self.prev_span = self.cur.span;
303        self.set_cur(first_token);
304    }
305
306    pub fn bump(&mut self) {
307        let next = match self.next.take() {
308            Some(next) => {
309                self.iter.set_token_value(next.value);
310                next.token_and_span
311            }
312            None => self.iter.next_token(),
313        };
314        self.prev_span = self.cur.span;
315        self.set_cur(next);
316    }
317
318    pub fn expect_word_token_and_bump(&mut self) -> Atom {
319        let cur = self.cur();
320        let word = cur.take_word(self);
321        self.bump();
322        word
323    }
324
325    pub fn expect_shebang_token_and_bump(&mut self) -> swc_atoms::Atom {
326        let cur = self.cur();
327        let ret = cur.take_shebang(self);
328        self.bump();
329        ret
330    }
331
332    pub fn expect_jsx_name_token_and_bump(&mut self) -> Atom {
333        let cur = self.cur();
334        let word = cur.take_jsx_name(self);
335        self.bump();
336        word
337    }
338
339    pub fn expect_error_token_and_bump(&mut self) -> Error {
340        let cur = self.cur();
341        let ret = cur.take_error(self);
342        self.bump();
343        ret
344    }
345
346    #[cold]
347    #[inline(never)]
348    pub fn dump_cur(&self) -> String {
349        let cur = self.cur();
350        cur.to_string()
351    }
352}
353
354impl<I: Tokens> Buffer<I> {
355    pub fn had_line_break_before_cur(&self) -> bool {
356        self.get_cur().had_line_break
357    }
358
359    /// This returns true on eof.
360    pub fn has_linebreak_between_cur_and_peeked(&mut self) -> bool {
361        let _ = self.peek();
362        self.next().map(|item| item.had_line_break()).unwrap_or({
363            // return true on eof.
364            true
365        })
366    }
367
368    pub fn cut_lshift(&mut self) {
369        debug_assert!(
370            self.is(Token::LShift),
371            "parser should only call cut_lshift when encountering LShift token"
372        );
373        let span = self.cur_span().with_lo(self.cur_span().lo + BytePos(1));
374        let token = TokenAndSpan::new(Token::Lt, span, false);
375        self.set_cur(token);
376    }
377
378    pub fn merge_lt_gt(&mut self) {
379        debug_assert!(
380            self.is(Token::Lt) || self.is(Token::Gt),
381            "parser should only call merge_lt_gt when encountering Less token"
382        );
383        if self.peek().is_none() {
384            return;
385        }
386        let span = self.cur_span();
387        let next = self.next().unwrap();
388        if span.hi != next.span().lo {
389            return;
390        }
391        let next = self.next_mut().take().unwrap();
392        let cur = self.get_cur();
393        let cur_token = cur.token;
394        let token = if cur_token == Token::Gt {
395            let next_token = next.token();
396            if next_token == Token::Gt {
397                // >>
398                Token::RShift
399            } else if next_token == Token::Eq {
400                // >=
401                Token::GtEq
402            } else if next_token == Token::RShift {
403                // >>>
404                Token::ZeroFillRShift
405            } else if next_token == Token::GtEq {
406                // >>=
407                Token::RShiftEq
408            } else if next_token == Token::RShiftEq {
409                // >>>=
410                Token::ZeroFillRShiftEq
411            } else {
412                self.set_next(Some(next));
413                return;
414            }
415        } else if cur_token == Token::Lt {
416            let next_token = next.token();
417            if next_token == Token::Lt {
418                // <<
419                Token::LShift
420            } else if next_token == Token::Eq {
421                // <=
422                Token::LtEq
423            } else if next_token == Token::LtEq {
424                // <<=
425                Token::LShiftEq
426            } else {
427                self.set_next(Some(next));
428                return;
429            }
430        } else {
431            self.set_next(Some(next));
432            return;
433        };
434        let span = span.with_hi(next.span().hi);
435        let token = TokenAndSpan::new(token, span, cur.had_line_break);
436        self.set_cur(token);
437    }
438
439    #[inline(always)]
440    pub fn is(&self, expected: Token) -> bool {
441        self.cur() == expected
442    }
443
444    #[inline(always)]
445    pub fn eat(&mut self, expected: Token) -> bool {
446        let v = self.is(expected);
447        if v {
448            self.bump();
449        }
450        v
451    }
452
453    /// Returns start of current token.
454    #[inline]
455    pub fn cur_pos(&self) -> BytePos {
456        self.get_cur().span.lo
457    }
458
459    #[inline]
460    pub fn cur_span(&self) -> Span {
461        self.get_cur().span
462    }
463
464    #[inline]
465    pub fn cur_string(&self) -> &str {
466        let token_span = self.cur_span();
467        self.iter.read_string(token_span)
468    }
469
470    /// Returns last byte position of previous token.
471    #[inline]
472    pub fn last_pos(&self) -> BytePos {
473        self.prev_span().hi
474    }
475
476    #[inline]
477    pub fn get_ctx(&self) -> Context {
478        self.iter().ctx()
479    }
480
481    #[inline]
482    pub fn set_ctx(&mut self, ctx: Context) {
483        self.iter_mut().set_ctx(ctx);
484    }
485
486    #[inline]
487    pub fn syntax(&self) -> SyntaxFlags {
488        self.iter().syntax()
489    }
490
491    #[inline]
492    pub fn target(&self) -> EsVersion {
493        self.iter().target()
494    }
495
496    #[inline]
497    pub fn set_expr_allowed(&mut self, allow: bool) {
498        self.iter_mut().set_expr_allowed(allow)
499    }
500
501    #[inline]
502    pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
503        self.iter_mut().set_next_regexp(start);
504    }
505
506    #[inline]
507    pub fn end_pos(&self) -> BytePos {
508        self.iter().end_pos()
509    }
510
511    #[inline]
512    pub fn token_flags(&self) -> crate::lexer::TokenFlags {
513        self.iter().token_flags()
514    }
515}