parse_js/parse/
mod.rs

1use self::pattern::ParsePatternRules;
2use crate::error::SyntaxError;
3use crate::error::SyntaxErrorType;
4use crate::error::SyntaxResult;
5use crate::lex::lex_next;
6use crate::lex::LexMode;
7use crate::lex::Lexer;
8use crate::lex::LexerCheckpoint;
9use crate::loc::Loc;
10use crate::token::Token;
11use crate::token::TokenType;
12
13pub mod class_or_object;
14pub mod decl;
15pub mod expr;
16pub mod function;
17pub mod literal;
18pub mod operator;
19pub mod pattern;
20pub mod stmt;
21#[cfg(test)]
22mod tests;
23pub mod toplevel;
24
25// Almost every parse_* function takes these field values as parameters. Instead of having to enumerate them as parameters on every function and ordered unnamed arguments on every call, we simply pass this struct around. Fields are public to allow destructuring, but the value should be immutable; the with_* methods can be used to create an altered copy for passing into other functions, which is useful as most calls simply pass through the values unchanged. This struct should be received as a value, not a reference (i.e. `ctx: ParseCtx` not `ctx: &ParseCtx`) as the latter will require a separate lifetime.
26// All fields except `session` can (although not often) change between calls, so we don't simply put them in Parser, as otherwise we'd have to "unwind" (i.e. reset) those values after each call returns.
27#[derive(Clone, Copy)]
28pub struct ParseCtx {
29  pub rules: ParsePatternRules, // For simplicity, this is a copy, not a non-mutable reference, to avoid having a separate lifetime for it. The value is only two booleans, so a reference is probably slower, and it's supposed to be immutable (i.e. changes come from altered copying, not mutating the original single instance), so there shouldn't be any difference between a reference and a copy.
30}
31
32impl ParseCtx {
33  pub fn with_rules(&self, rules: ParsePatternRules) -> ParseCtx {
34    ParseCtx { rules, ..*self }
35  }
36}
37
38#[derive(Debug)]
39#[must_use]
40pub struct MaybeToken {
41  typ: TokenType,
42  loc: Loc,
43  matched: bool,
44}
45
46impl MaybeToken {
47  pub fn is_match(&self) -> bool {
48    self.matched
49  }
50
51  pub fn match_loc(&self) -> Option<Loc> {
52    if self.matched {
53      Some(self.loc)
54    } else {
55      None
56    }
57  }
58
59  pub fn error(&self, err: SyntaxErrorType) -> SyntaxError {
60    debug_assert!(!self.matched);
61    self.loc.error(err, Some(self.typ))
62  }
63
64  pub fn and_then<R, F: FnOnce() -> SyntaxResult<R>>(self, f: F) -> SyntaxResult<Option<R>> {
65    Ok(if self.matched { Some(f()?) } else { None })
66  }
67}
68
69pub struct ParserCheckpoint {
70  checkpoint: LexerCheckpoint,
71}
72
73struct BufferedToken {
74  token: Token,
75  lex_mode: LexMode,
76  after_checkpoint: LexerCheckpoint,
77}
78
79pub struct Parser<'a> {
80  lexer: Lexer<'a>,
81  buffered: Option<BufferedToken>,
82}
83
84// We extend this struct with added methods in the various submodules, instead of simply using free functions and passing `&mut Parser` around, for several reasons:
85// - Avoid needing to redeclare `<'a>` on every function.
86// - More lifetime elision is available for `self` than if it was just another reference parameter.
87// - `self` is shorter than `parser` but makes more sense than `p`.
88// - Don't need to import each function.
89// - Autocomplete is more specific since `self.*` narrows down the options instead of just listing all visible functions (although almost every function currently starts with `parse_` so this is not as significant).
90// - For general consistency; if there's no reason why it should be a free function (e.g. more than one ambiguous base type), it should be a method.
91// - Makes free functions truly separate independent utility functions.
92impl<'a> Parser<'a> {
93  pub fn new(lexer: Lexer<'a>) -> Parser<'a> {
94    Parser {
95      lexer,
96      buffered: None,
97    }
98  }
99
100  pub fn lexer_mut(&mut self) -> &mut Lexer<'a> {
101    &mut self.lexer
102  }
103
104  pub fn source_range(&self) -> Loc {
105    self.lexer.source_range()
106  }
107
108  pub fn bytes(&self, loc: Loc) -> &[u8] {
109    &self.lexer[loc]
110  }
111
112  pub fn str(&self, loc: Loc) -> &str {
113    unsafe { std::str::from_utf8_unchecked(self.bytes(loc)) }
114  }
115
116  pub fn string(&self, loc: Loc) -> String {
117    self.str(loc).to_string()
118  }
119
120  pub fn checkpoint(&self) -> ParserCheckpoint {
121    ParserCheckpoint {
122      checkpoint: self.lexer.checkpoint(),
123    }
124  }
125
126  pub fn since_checkpoint(&self, checkpoint: ParserCheckpoint) -> Loc {
127    self.lexer.since_checkpoint(checkpoint.checkpoint)
128  }
129
130  pub fn restore_checkpoint(&mut self, checkpoint: ParserCheckpoint) {
131    self.buffered = None;
132    self.lexer.apply_checkpoint(checkpoint.checkpoint);
133  }
134
135  // Useful if lexer was altered outside parser.
136  pub fn clear_buffered(&mut self) {
137    self.buffered = None;
138  }
139
140  fn forward<K: FnOnce(&Token) -> bool>(
141    &mut self,
142    mode: LexMode,
143    keep: K,
144  ) -> SyntaxResult<(bool, Token)> {
145    match self.buffered.as_ref() {
146      Some(b) if b.lex_mode == mode => Ok(if keep(&b.token) {
147        self.lexer.apply_checkpoint(b.after_checkpoint);
148        (true, self.buffered.take().unwrap().token)
149      } else {
150        (false, b.token.clone())
151      }),
152      _ => {
153        // Don't use self.checkpoint as self.backtrack will clear buffer.
154        let cp = self.lexer.checkpoint();
155        let t = lex_next(&mut self.lexer, mode)?;
156        let k = keep(&t);
157        self.buffered = if k {
158          None
159        } else {
160          let after_checkpoint = self.lexer.checkpoint();
161          self.lexer.apply_checkpoint(cp);
162          Some(BufferedToken {
163            token: t.clone(),
164            lex_mode: mode,
165            after_checkpoint,
166          })
167        };
168        Ok((k, t))
169      }
170    }
171  }
172
173  pub fn next_with_mode(&mut self, mode: LexMode) -> SyntaxResult<Token> {
174    self.forward(mode, |_| true).map(|r| r.1)
175  }
176
177  pub fn next(&mut self) -> SyntaxResult<Token> {
178    self.next_with_mode(LexMode::Standard)
179  }
180
181  pub fn peek_with_mode(&mut self, mode: LexMode) -> SyntaxResult<Token> {
182    self.forward(mode, |_| false).map(|r| r.1)
183  }
184
185  pub fn peek(&mut self) -> SyntaxResult<Token> {
186    self.peek_with_mode(LexMode::Standard)
187  }
188
189  pub fn consume_peeked(&mut self) -> Token {
190    let b = self.buffered.take().unwrap();
191    self.lexer.apply_checkpoint(b.after_checkpoint);
192    b.token
193  }
194
195  pub fn maybe_with_mode(&mut self, typ: TokenType, mode: LexMode) -> SyntaxResult<MaybeToken> {
196    let (matched, t) = self.forward(mode, |t| t.typ == typ)?;
197    Ok(MaybeToken {
198      typ,
199      matched,
200      loc: t.loc,
201    })
202  }
203
204  pub fn consume_if(&mut self, typ: TokenType) -> SyntaxResult<MaybeToken> {
205    self.maybe_with_mode(typ, LexMode::Standard)
206  }
207
208  pub fn consume_if_pred<F: FnOnce(&Token) -> bool>(
209    &mut self,
210    pred: F,
211  ) -> SyntaxResult<MaybeToken> {
212    let (matched, t) = self.forward(LexMode::Standard, pred)?;
213    Ok(MaybeToken {
214      typ: t.typ,
215      matched,
216      loc: t.loc,
217    })
218  }
219
220  pub fn require_with_mode(&mut self, typ: TokenType, mode: LexMode) -> SyntaxResult<Token> {
221    let t = self.next_with_mode(mode)?;
222    if t.typ != typ {
223      Err(t.error(SyntaxErrorType::RequiredTokenNotFound(typ)))
224    } else {
225      Ok(t)
226    }
227  }
228
229  pub fn require_predicate<P: FnOnce(TokenType) -> bool>(
230    &mut self,
231    pred: P,
232    expected: &'static str,
233  ) -> SyntaxResult<Token> {
234    let t = self.next_with_mode(LexMode::Standard)?;
235    if !pred(t.typ) {
236      Err(t.error(SyntaxErrorType::ExpectedSyntax(expected)))
237    } else {
238      Ok(t)
239    }
240  }
241
242  pub fn require(&mut self, typ: TokenType) -> SyntaxResult<Token> {
243    self.require_with_mode(typ, LexMode::Standard)
244  }
245}