ra_ap_parser/
parser.rs

1//! See [`Parser`].
2
3use std::cell::Cell;
4
5use drop_bomb::DropBomb;
6
7use crate::{
8    Edition,
9    SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
10    T, TokenSet,
11    event::Event,
12    input::Input,
13};
14
15/// `Parser` struct provides the low-level API for
16/// navigating through the stream of tokens and
17/// constructing the parse tree. The actual parsing
18/// happens in the [`grammar`](super::grammar) module.
19///
20/// However, the result of this `Parser` is not a real
21/// tree, but rather a flat stream of events of the form
22/// "start expression, consume number literal,
23/// finish expression". See `Event` docs for more.
24pub(crate) struct Parser<'t> {
25    inp: &'t Input,
26    pos: usize,
27    events: Vec<Event>,
28    steps: Cell<u32>,
29    edition: Edition,
30}
31
32const PARSER_STEP_LIMIT: usize = if cfg!(debug_assertions) { 150_000 } else { 15_000_000 };
33
34impl<'t> Parser<'t> {
35    pub(super) fn new(inp: &'t Input, edition: Edition) -> Parser<'t> {
36        Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0), edition }
37    }
38
39    pub(crate) fn finish(self) -> Vec<Event> {
40        self.events
41    }
42
43    /// Returns the kind of the current token.
44    /// If parser has already reached the end of input,
45    /// the special `EOF` kind is returned.
46    pub(crate) fn current(&self) -> SyntaxKind {
47        self.nth(0)
48    }
49
50    /// Lookahead operation: returns the kind of the next nth
51    /// token.
52    pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
53        assert!(n <= 3);
54
55        let steps = self.steps.get();
56        assert!((steps as usize) < PARSER_STEP_LIMIT, "the parser seems stuck");
57        self.steps.set(steps + 1);
58
59        self.inp.kind(self.pos + n)
60    }
61
62    /// Checks if the current token is `kind`.
63    pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
64        self.nth_at(0, kind)
65    }
66
67    pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
68        match kind {
69            T![-=] => self.at_composite2(n, T![-], T![=]),
70            T![->] => self.at_composite2(n, T![-], T![>]),
71            T![::] => self.at_composite2(n, T![:], T![:]),
72            T![!=] => self.at_composite2(n, T![!], T![=]),
73            T![..] => self.at_composite2(n, T![.], T![.]),
74            T![*=] => self.at_composite2(n, T![*], T![=]),
75            T![/=] => self.at_composite2(n, T![/], T![=]),
76            T![&&] => self.at_composite2(n, T![&], T![&]),
77            T![&=] => self.at_composite2(n, T![&], T![=]),
78            T![%=] => self.at_composite2(n, T![%], T![=]),
79            T![^=] => self.at_composite2(n, T![^], T![=]),
80            T![+=] => self.at_composite2(n, T![+], T![=]),
81            T![<<] => self.at_composite2(n, T![<], T![<]),
82            T![<=] => self.at_composite2(n, T![<], T![=]),
83            T![==] => self.at_composite2(n, T![=], T![=]),
84            T![=>] => self.at_composite2(n, T![=], T![>]),
85            T![>=] => self.at_composite2(n, T![>], T![=]),
86            T![>>] => self.at_composite2(n, T![>], T![>]),
87            T![|=] => self.at_composite2(n, T![|], T![=]),
88            T![||] => self.at_composite2(n, T![|], T![|]),
89
90            T![...] => self.at_composite3(n, T![.], T![.], T![.]),
91            T![..=] => self.at_composite3(n, T![.], T![.], T![=]),
92            T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
93            T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
94
95            _ => self.inp.kind(self.pos + n) == kind,
96        }
97    }
98
99    /// Consume the next token if `kind` matches.
100    pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
101        if !self.at(kind) {
102            return false;
103        }
104        let n_raw_tokens = match kind {
105            T![-=]
106            | T![->]
107            | T![::]
108            | T![!=]
109            | T![..]
110            | T![*=]
111            | T![/=]
112            | T![&&]
113            | T![&=]
114            | T![%=]
115            | T![^=]
116            | T![+=]
117            | T![<<]
118            | T![<=]
119            | T![==]
120            | T![=>]
121            | T![>=]
122            | T![>>]
123            | T![|=]
124            | T![||] => 2,
125
126            T![...] | T![..=] | T![<<=] | T![>>=] => 3,
127            _ => 1,
128        };
129        self.do_bump(kind, n_raw_tokens);
130        true
131    }
132
133    pub(crate) fn eat_contextual_kw(&mut self, kind: SyntaxKind) -> bool {
134        if !self.at_contextual_kw(kind) {
135            return false;
136        }
137        self.bump_remap(kind);
138        true
139    }
140
141    fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
142        self.inp.kind(self.pos + n) == k1
143            && self.inp.kind(self.pos + n + 1) == k2
144            && self.inp.is_joint(self.pos + n)
145    }
146
147    fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
148        self.inp.kind(self.pos + n) == k1
149            && self.inp.kind(self.pos + n + 1) == k2
150            && self.inp.kind(self.pos + n + 2) == k3
151            && self.inp.is_joint(self.pos + n)
152            && self.inp.is_joint(self.pos + n + 1)
153    }
154
155    /// Checks if the current token is in `kinds`.
156    pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
157        kinds.contains(self.current())
158    }
159
160    /// Checks if the current token is contextual keyword `kw`.
161    pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
162        self.inp.contextual_kind(self.pos) == kw
163    }
164
165    /// Checks if the nth token is contextual keyword `kw`.
166    pub(crate) fn nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool {
167        self.inp.contextual_kind(self.pos + n) == kw
168    }
169
170    /// Starts a new node in the syntax tree. All nodes and tokens
171    /// consumed between the `start` and the corresponding `Marker::complete`
172    /// belong to the same node.
173    pub(crate) fn start(&mut self) -> Marker {
174        let pos = self.events.len() as u32;
175        self.push_event(Event::tombstone());
176        Marker::new(pos)
177    }
178
179    /// Consume the next token. Panics if the parser isn't currently at `kind`.
180    pub(crate) fn bump(&mut self, kind: SyntaxKind) {
181        assert!(self.eat(kind));
182    }
183
184    /// Advances the parser by one token
185    pub(crate) fn bump_any(&mut self) {
186        let kind = self.nth(0);
187        if kind == EOF {
188            return;
189        }
190        self.do_bump(kind, 1);
191    }
192
193    /// Advances the parser by one token
194    pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) {
195        assert!(self.at(SyntaxKind::FLOAT_NUMBER));
196        // we have parse `<something>.`
197        // `<something>`.0.1
198        // here we need to insert an extra event
199        //
200        // `<something>`. 0. 1;
201        // here we need to change the follow up parse, the return value will cause us to emulate a dot
202        // the actual splitting happens later
203        let ends_in_dot = !self.inp.is_joint(self.pos);
204        if !ends_in_dot {
205            let new_marker = self.start();
206            let idx = marker.pos as usize;
207            match &mut self.events[idx] {
208                Event::Start { forward_parent, kind } => {
209                    *kind = SyntaxKind::FIELD_EXPR;
210                    *forward_parent = Some(new_marker.pos - marker.pos);
211                }
212                _ => unreachable!(),
213            }
214            marker.bomb.defuse();
215            marker = new_marker;
216        };
217        self.pos += 1;
218        self.push_event(Event::FloatSplitHack { ends_in_dot });
219        (ends_in_dot, marker)
220    }
221
222    /// Advances the parser by one token, remapping its kind.
223    /// This is useful to create contextual keywords from
224    /// identifiers. For example, the lexer creates a `union`
225    /// *identifier* token, but the parser remaps it to the
226    /// `union` keyword, and keyword is what ends up in the
227    /// final tree.
228    pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
229        if self.nth(0) == EOF {
230            // FIXME: panic!?
231            return;
232        }
233        self.do_bump(kind, 1);
234    }
235
236    /// Emit error with the `message`
237    /// FIXME: this should be much more fancy and support
238    /// structured errors with spans and notes, like rustc
239    /// does.
240    pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
241        let msg = message.into();
242        self.push_event(Event::Error { msg });
243    }
244
245    /// Consume the next token if it is `kind` or emit an error
246    /// otherwise.
247    pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
248        if self.eat(kind) {
249            return true;
250        }
251        self.error(format!("expected {kind:?}"));
252        false
253    }
254
255    /// Create an error node and consume the next token.
256    pub(crate) fn err_and_bump(&mut self, message: &str) {
257        let m = self.start();
258        self.error(message);
259        self.bump_any();
260        m.complete(self, ERROR);
261    }
262
263    /// Create an error node and consume the next token unless it is in the recovery set.
264    ///
265    /// Returns true if recovery kicked in.
266    pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) -> bool {
267        if matches!(self.current(), T!['{'] | T!['}']) {
268            self.error(message);
269            return true;
270        }
271
272        if self.at_ts(recovery) {
273            self.error(message);
274            return true;
275        }
276
277        let m = self.start();
278        self.error(message);
279        self.bump_any();
280        m.complete(self, ERROR);
281        false
282    }
283
284    fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
285        self.pos += n_raw_tokens as usize;
286        self.steps.set(0);
287        self.push_event(Event::Token { kind, n_raw_tokens });
288    }
289
290    fn push_event(&mut self, event: Event) {
291        self.events.push(event);
292    }
293
294    pub(crate) fn edition(&self) -> Edition {
295        self.edition
296    }
297}
298
299/// See [`Parser::start`].
300pub(crate) struct Marker {
301    pos: u32,
302    bomb: DropBomb,
303}
304
305impl Marker {
306    fn new(pos: u32) -> Marker {
307        Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
308    }
309
310    /// Finishes the syntax tree node and assigns `kind` to it,
311    /// and mark the create a `CompletedMarker` for possible future
312    /// operation like `.precede()` to deal with forward_parent.
313    pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
314        self.bomb.defuse();
315        let idx = self.pos as usize;
316        match &mut p.events[idx] {
317            Event::Start { kind: slot, .. } => {
318                *slot = kind;
319            }
320            _ => unreachable!(),
321        }
322        p.push_event(Event::Finish);
323        let end_pos = p.events.len() as u32;
324        CompletedMarker::new(self.pos, end_pos, kind)
325    }
326
327    /// Abandons the syntax tree node. All its children
328    /// are attached to its parent instead.
329    pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
330        self.bomb.defuse();
331        let idx = self.pos as usize;
332        if idx == p.events.len() - 1 {
333            assert!(matches!(
334                p.events.pop(),
335                Some(Event::Start { kind: TOMBSTONE, forward_parent: None })
336            ));
337        }
338    }
339}
340
341pub(crate) struct CompletedMarker {
342    start_pos: u32,
343    end_pos: u32,
344    kind: SyntaxKind,
345}
346
347impl CompletedMarker {
348    fn new(start_pos: u32, end_pos: u32, kind: SyntaxKind) -> Self {
349        CompletedMarker { start_pos, end_pos, kind }
350    }
351
352    /// This method allows to create a new node which starts
353    /// *before* the current one. That is, parser could start
354    /// node `A`, then complete it, and then after parsing the
355    /// whole `A`, decide that it should have started some node
356    /// `B` before starting `A`. `precede` allows to do exactly
357    /// that. See also docs about
358    /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
359    ///
360    /// Given completed events `[START, FINISH]` and its corresponding
361    /// `CompletedMarker(pos: 0, _)`.
362    /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
363    /// then mark `NEWSTART` as `START`'s parent with saving its relative
364    /// distance to `NEWSTART` into forward_parent(=2 in this case);
365    pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
366        let new_pos = p.start();
367        let idx = self.start_pos as usize;
368        match &mut p.events[idx] {
369            Event::Start { forward_parent, .. } => {
370                *forward_parent = Some(new_pos.pos - self.start_pos);
371            }
372            _ => unreachable!(),
373        }
374        new_pos
375    }
376
377    /// Extends this completed marker *to the left* up to `m`.
378    pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
379        m.bomb.defuse();
380        let idx = m.pos as usize;
381        match &mut p.events[idx] {
382            Event::Start { forward_parent, .. } => {
383                *forward_parent = Some(self.start_pos - m.pos);
384            }
385            _ => unreachable!(),
386        }
387        self
388    }
389
390    pub(crate) fn kind(&self) -> SyntaxKind {
391        self.kind
392    }
393
394    pub(crate) fn last_token(&self, p: &Parser<'_>) -> Option<SyntaxKind> {
395        let end_pos = self.end_pos as usize;
396        debug_assert_eq!(p.events[end_pos - 1], Event::Finish);
397        p.events[..end_pos].iter().rev().find_map(|event| match event {
398            Event::Token { kind, .. } => Some(*kind),
399            _ => None,
400        })
401    }
402}