plotnik_lib/parser/
core.rs

1//! Parser state machine and low-level operations.
2
3use rowan::{Checkpoint, GreenNode, GreenNodeBuilder, TextRange, TextSize};
4
5use super::ast::Root;
6use super::cst::token_sets::ROOT_EXPR_FIRST;
7use super::cst::{SyntaxKind, SyntaxNode, TokenSet};
8use super::lexer::{Token, token_text};
9use crate::Error;
10use crate::diagnostics::{DiagnosticKind, Diagnostics};
11
12#[derive(Debug)]
13pub struct ParseResult {
14    pub root: Root,
15    pub diagnostics: Diagnostics,
16    pub exec_fuel_consumed: u32,
17}
18
19#[derive(Debug, Clone, Copy)]
20pub(super) struct OpenDelimiter {
21    #[allow(dead_code)] // for future mismatch detection
22    pub kind: SyntaxKind,
23    pub span: TextRange,
24}
25
26/// Trivia tokens are buffered and flushed when starting a new node.
27pub struct Parser<'src> {
28    pub(super) source: &'src str,
29    pub(super) tokens: Vec<Token>,
30    pub(super) pos: usize,
31    pub(super) trivia_buffer: Vec<Token>,
32    pub(super) builder: GreenNodeBuilder<'static>,
33    pub(super) diagnostics: Diagnostics,
34    pub(super) depth: u32,
35    pub(super) last_diagnostic_pos: Option<TextSize>,
36    pub(super) delimiter_stack: Vec<OpenDelimiter>,
37    pub(super) debug_fuel: std::cell::Cell<u32>,
38    exec_fuel_initial: Option<u32>,
39    exec_fuel_remaining: Option<u32>,
40    recursion_fuel_limit: Option<u32>,
41    fatal_error: Option<Error>,
42}
43
44impl<'src> Parser<'src> {
45    pub fn new(source: &'src str, tokens: Vec<Token>) -> Self {
46        Self {
47            source,
48            tokens,
49            pos: 0,
50            trivia_buffer: Vec::with_capacity(4),
51            builder: GreenNodeBuilder::new(),
52            diagnostics: Diagnostics::new(),
53            depth: 0,
54            last_diagnostic_pos: None,
55            delimiter_stack: Vec::with_capacity(8),
56            debug_fuel: std::cell::Cell::new(256),
57            exec_fuel_initial: None,
58            exec_fuel_remaining: None,
59            recursion_fuel_limit: None,
60            fatal_error: None,
61        }
62    }
63
64    pub fn with_exec_fuel(mut self, limit: Option<u32>) -> Self {
65        self.exec_fuel_initial = limit;
66        self.exec_fuel_remaining = limit;
67        self
68    }
69
70    pub fn with_recursion_fuel(mut self, limit: Option<u32>) -> Self {
71        self.recursion_fuel_limit = limit;
72        self
73    }
74
75    pub fn parse(mut self) -> Result<ParseResult, Error> {
76        self.parse_root();
77        let (cst, diagnostics, exec_fuel_consumed) = self.finish()?;
78        let root = Root::cast(SyntaxNode::new_root(cst)).expect("parser always produces Root");
79        Ok(ParseResult {
80            root,
81            diagnostics,
82            exec_fuel_consumed,
83        })
84    }
85
86    fn finish(mut self) -> Result<(GreenNode, Diagnostics, u32), Error> {
87        self.drain_trivia();
88        if let Some(err) = self.fatal_error {
89            return Err(err);
90        }
91        let exec_fuel_consumed = match (self.exec_fuel_initial, self.exec_fuel_remaining) {
92            (Some(initial), Some(remaining)) => initial.saturating_sub(remaining),
93            _ => 0,
94        };
95        Ok((self.builder.finish(), self.diagnostics, exec_fuel_consumed))
96    }
97
98    pub(super) fn has_fatal_error(&self) -> bool {
99        self.fatal_error.is_some()
100    }
101
102    pub(super) fn current(&self) -> SyntaxKind {
103        self.nth(0)
104    }
105
106    fn reset_debug_fuel(&self) {
107        self.debug_fuel.set(256);
108    }
109
110    pub(super) fn nth(&self, lookahead: usize) -> SyntaxKind {
111        self.ensure_progress();
112        self.tokens
113            .get(self.pos + lookahead)
114            .map_or(SyntaxKind::Error, |t| t.kind)
115    }
116
117    fn consume_exec_fuel(&mut self) {
118        if let Some(ref mut remaining) = self.exec_fuel_remaining {
119            if *remaining == 0 {
120                if self.fatal_error.is_none() {
121                    self.fatal_error = Some(Error::ExecFuelExhausted);
122                }
123                return;
124            }
125            *remaining -= 1;
126        }
127    }
128
129    pub(super) fn current_span(&self) -> TextRange {
130        self.tokens
131            .get(self.pos)
132            .map_or_else(|| TextRange::empty(self.eof_offset()), |t| t.span)
133    }
134
135    pub(super) fn eof_offset(&self) -> TextSize {
136        TextSize::from(self.source.len() as u32)
137    }
138
139    pub(super) fn eof(&self) -> bool {
140        self.pos >= self.tokens.len()
141    }
142
143    pub(super) fn should_stop(&self) -> bool {
144        self.eof() || self.has_fatal_error()
145    }
146
147    pub(super) fn at(&self, kind: SyntaxKind) -> bool {
148        self.current() == kind
149    }
150
151    pub(super) fn at_set(&self, set: TokenSet) -> bool {
152        set.contains(self.current())
153    }
154
155    pub(super) fn peek(&mut self) -> SyntaxKind {
156        self.skip_trivia_to_buffer();
157        self.current()
158    }
159
160    /// LL(k) lookahead past trivia.
161    pub(super) fn peek_nth(&mut self, n: usize) -> SyntaxKind {
162        self.skip_trivia_to_buffer();
163        let mut count = 0;
164        let mut pos = self.pos;
165        while pos < self.tokens.len() {
166            let kind = self.tokens[pos].kind;
167            if !kind.is_trivia() {
168                if count == n {
169                    return kind;
170                }
171                count += 1;
172            }
173            pos += 1;
174        }
175        SyntaxKind::Error
176    }
177
178    pub(super) fn skip_trivia_to_buffer(&mut self) {
179        while self.pos < self.tokens.len() && self.tokens[self.pos].kind.is_trivia() {
180            self.trivia_buffer.push(self.tokens[self.pos]);
181            self.pos += 1;
182        }
183    }
184
185    pub(super) fn drain_trivia(&mut self) {
186        for token in self.trivia_buffer.drain(..) {
187            let text = token_text(self.source, &token);
188            self.builder.token(token.kind.into(), text);
189        }
190    }
191
192    pub(super) fn eat_trivia(&mut self) {
193        self.skip_trivia_to_buffer();
194        self.drain_trivia();
195    }
196
197    pub(super) fn start_node(&mut self, kind: SyntaxKind) {
198        self.drain_trivia();
199        self.builder.start_node(kind.into());
200    }
201
202    pub(super) fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
203        self.builder.start_node_at(checkpoint, kind.into());
204    }
205
206    pub(super) fn finish_node(&mut self) {
207        self.builder.finish_node();
208    }
209
210    pub(super) fn checkpoint(&mut self) -> Checkpoint {
211        self.drain_trivia();
212        self.builder.checkpoint()
213    }
214
215    pub(super) fn bump(&mut self) {
216        assert!(!self.eof(), "bump called at EOF");
217        self.reset_debug_fuel();
218        self.consume_exec_fuel();
219
220        let token = self.tokens[self.pos];
221        let text = token_text(self.source, &token);
222        self.builder.token(token.kind.into(), text);
223        self.pos += 1;
224    }
225
226    pub(super) fn skip_token(&mut self) {
227        assert!(!self.eof(), "skip_token called at EOF");
228        self.reset_debug_fuel();
229        self.consume_exec_fuel();
230        self.pos += 1;
231    }
232
233    pub(super) fn eat(&mut self, kind: SyntaxKind) -> bool {
234        if self.at(kind) {
235            self.bump();
236            true
237        } else {
238            false
239        }
240    }
241
242    /// On mismatch: emit diagnostic but don't consume.
243    pub(super) fn expect(&mut self, kind: SyntaxKind, what: &str) -> bool {
244        if self.eat(kind) {
245            return true;
246        }
247        self.error_msg(
248            DiagnosticKind::UnexpectedToken,
249            format!("expected {}", what),
250        );
251        false
252    }
253
254    pub(super) fn current_suppression_span(&self) -> TextRange {
255        self.delimiter_stack
256            .last()
257            .map(|d| TextRange::new(d.span.start(), TextSize::from(self.source.len() as u32)))
258            .unwrap_or_else(|| self.current_span())
259    }
260
261    fn should_report(&mut self, pos: TextSize) -> bool {
262        if self.last_diagnostic_pos == Some(pos) {
263            return false;
264        }
265        self.last_diagnostic_pos = Some(pos);
266        true
267    }
268
269    fn bump_as_error(&mut self) {
270        if !self.eof() {
271            self.start_node(SyntaxKind::Error);
272            self.bump();
273            self.finish_node();
274        }
275    }
276
277    fn get_error_ranges(&mut self) -> Option<(TextRange, TextRange)> {
278        let range = self.current_span();
279        if !self.should_report(range.start()) {
280            return None;
281        }
282        let suppression = self.current_suppression_span();
283        Some((range, suppression))
284    }
285
286    pub(super) fn error(&mut self, kind: DiagnosticKind) {
287        let Some((range, suppression)) = self.get_error_ranges() else {
288            return;
289        };
290        self.diagnostics
291            .report(kind, range)
292            .suppression_range(suppression)
293            .emit();
294    }
295
296    pub(super) fn error_msg(&mut self, kind: DiagnosticKind, message: impl Into<String>) {
297        let Some((range, suppression)) = self.get_error_ranges() else {
298            return;
299        };
300        self.diagnostics
301            .report(kind, range)
302            .message(message)
303            .suppression_range(suppression)
304            .emit();
305    }
306
307    pub(super) fn error_and_bump(&mut self, kind: DiagnosticKind) {
308        self.error(kind);
309        self.bump_as_error();
310    }
311
312    pub(super) fn error_and_bump_msg(&mut self, kind: DiagnosticKind, message: impl Into<String>) {
313        self.error_msg(kind, message);
314        self.bump_as_error();
315    }
316
317    #[allow(dead_code)]
318    pub(super) fn error_recover(
319        &mut self,
320        kind: DiagnosticKind,
321        message: &str,
322        recovery: TokenSet,
323    ) {
324        if self.at_set(recovery) || self.should_stop() {
325            self.error_msg(kind, message);
326            return;
327        }
328
329        self.start_node(SyntaxKind::Error);
330        self.error_msg(kind, message);
331        while !self.at_set(recovery) && !self.should_stop() {
332            self.bump();
333        }
334        self.finish_node();
335    }
336
337    pub(super) fn synchronize_to_def_start(&mut self) -> bool {
338        if self.should_stop() {
339            return false;
340        }
341
342        // Check if already at a sync point
343        if self.at_def_start() {
344            return false;
345        }
346
347        self.start_node(SyntaxKind::Error);
348        while !self.should_stop() && !self.at_def_start() {
349            self.bump();
350            self.skip_trivia_to_buffer();
351        }
352        self.finish_node();
353        true
354    }
355
356    pub(super) fn at_def_start(&mut self) -> bool {
357        let kind = self.peek();
358        if kind == SyntaxKind::Id && self.peek_nth(1) == SyntaxKind::Equals {
359            return true;
360        }
361        ROOT_EXPR_FIRST.contains(kind)
362    }
363
364    pub(super) fn enter_recursion(&mut self) -> bool {
365        if let Some(limit) = self.recursion_fuel_limit
366            && self.depth >= limit
367        {
368            if self.fatal_error.is_none() {
369                self.fatal_error = Some(Error::RecursionLimitExceeded);
370            }
371            return false;
372        }
373        self.depth += 1;
374        self.reset_debug_fuel();
375        true
376    }
377
378    pub(super) fn exit_recursion(&mut self) {
379        self.depth = self.depth.saturating_sub(1);
380        self.reset_debug_fuel();
381    }
382
383    pub(super) fn push_delimiter(&mut self, kind: SyntaxKind) {
384        self.delimiter_stack.push(OpenDelimiter {
385            kind,
386            span: self.current_span(),
387        });
388    }
389
390    pub(super) fn pop_delimiter(&mut self) -> Option<OpenDelimiter> {
391        self.delimiter_stack.pop()
392    }
393
394    pub(super) fn error_unclosed_delimiter(
395        &mut self,
396        kind: DiagnosticKind,
397        message: impl Into<String>,
398        related_msg: impl Into<String>,
399        open_range: TextRange,
400    ) {
401        let current = self.current_span();
402        if !self.should_report(current.start()) {
403            return;
404        }
405        // Use full range for easier downstream error suppression
406        let full_range = TextRange::new(open_range.start(), current.end());
407        self.diagnostics
408            .report(kind, full_range)
409            .message(message)
410            .related_to(related_msg, open_range)
411            .emit();
412    }
413
414    pub(super) fn last_non_trivia_end(&self) -> Option<TextSize> {
415        self.tokens[..self.pos]
416            .iter()
417            .rev()
418            .find(|t| !t.kind.is_trivia())
419            .map(|t| t.span.end())
420    }
421
422    pub(super) fn error_with_fix(
423        &mut self,
424        kind: DiagnosticKind,
425        range: TextRange,
426        message: impl Into<String>,
427        fix_description: impl Into<String>,
428        fix_replacement: impl Into<String>,
429    ) {
430        if !self.should_report(range.start()) {
431            return;
432        }
433        self.diagnostics
434            .report(kind, range)
435            .message(message)
436            .fix(fix_description, fix_replacement)
437            .emit();
438    }
439}