plotnik_compiler/parser/
core.rs

1//! Parser state machine and low-level operations.
2
3use rowan::{Checkpoint, GreenNode, GreenNodeBuilder, TextRange, TextSize};
4
5use super::ast::Root;
6use super::cst::{SyntaxKind, SyntaxNode, TokenSet};
7use super::lexer::{Token, token_text};
8use crate::Error;
9use crate::diagnostics::{DiagnosticKind, Diagnostics, SourceId};
10
11#[derive(Debug)]
12pub struct ParseResult {
13    pub ast: Root,
14    pub fuel_consumed: u32,
15}
16
17#[derive(Debug, Clone, Copy)]
18pub(super) struct OpenDelimiter {
19    #[allow(dead_code)] // for future mismatch detection
20    pub kind: SyntaxKind,
21    pub span: TextRange,
22}
23
24/// Default parsing fuel limit.
25const DEFAULT_FUEL: u32 = 1_000_000;
26/// Default maximum recursion depth.
27const DEFAULT_MAX_DEPTH: u32 = 4096;
28
29/// Trivia tokens are buffered and flushed when starting a new node.
30pub struct Parser<'q, 'd> {
31    pub(super) source: &'q str,
32    pub(super) source_id: SourceId,
33    pub(super) tokens: Vec<Token>,
34    pub(super) pos: usize,
35    pub(super) trivia_buffer: Vec<Token>,
36    pub(super) builder: GreenNodeBuilder<'static>,
37    pub(super) diagnostics: &'d mut Diagnostics,
38    pub(super) depth: u32,
39    pub(super) last_diagnostic_pos: Option<TextSize>,
40    pub(super) delimiter_stack: Vec<OpenDelimiter>,
41    pub(super) debug_fuel: std::cell::Cell<u32>,
42    pub(crate) fuel_initial: u32,
43    pub(crate) fuel_remaining: u32,
44    pub(crate) max_depth: u32,
45    pub(crate) fatal_error: Option<Error>,
46}
47
48/// Builder for `Parser`.
49pub struct ParserBuilder<'q, 'd> {
50    source: &'q str,
51    source_id: SourceId,
52    tokens: Vec<Token>,
53    diagnostics: &'d mut Diagnostics,
54    fuel: u32,
55    max_depth: u32,
56}
57
58impl<'q, 'd> ParserBuilder<'q, 'd> {
59    /// Create a new builder with required parameters.
60    pub fn new(
61        source: &'q str,
62        source_id: SourceId,
63        tokens: Vec<Token>,
64        diagnostics: &'d mut Diagnostics,
65    ) -> Self {
66        Self {
67            source,
68            source_id,
69            tokens,
70            diagnostics,
71            fuel: DEFAULT_FUEL,
72            max_depth: DEFAULT_MAX_DEPTH,
73        }
74    }
75
76    /// Set the fuel limit.
77    pub fn fuel(mut self, fuel: u32) -> Self {
78        self.fuel = fuel;
79        self
80    }
81
82    /// Set the maximum recursion depth.
83    pub fn max_depth(mut self, depth: u32) -> Self {
84        self.max_depth = depth;
85        self
86    }
87
88    /// Build the Parser.
89    pub fn build(self) -> Parser<'q, 'd> {
90        Parser {
91            source: self.source,
92            source_id: self.source_id,
93            tokens: self.tokens,
94            pos: 0,
95            trivia_buffer: Vec::with_capacity(4),
96            builder: GreenNodeBuilder::new(),
97            diagnostics: self.diagnostics,
98            depth: 0,
99            last_diagnostic_pos: None,
100            delimiter_stack: Vec::with_capacity(8),
101            debug_fuel: std::cell::Cell::new(256),
102            fuel_initial: self.fuel,
103            fuel_remaining: self.fuel,
104            max_depth: self.max_depth,
105            fatal_error: None,
106        }
107    }
108}
109
110impl<'q, 'd> Parser<'q, 'd> {
111    /// Create a builder for Parser.
112    pub fn builder(
113        source: &'q str,
114        source_id: SourceId,
115        tokens: Vec<Token>,
116        diagnostics: &'d mut Diagnostics,
117    ) -> ParserBuilder<'q, 'd> {
118        ParserBuilder::new(source, source_id, tokens, diagnostics)
119    }
120
121    /// Create a new parser with the specified parameters.
122    pub fn new(
123        source: &'q str,
124        source_id: SourceId,
125        tokens: Vec<Token>,
126        diagnostics: &'d mut Diagnostics,
127        fuel: u32,
128        max_depth: u32,
129    ) -> Self {
130        Parser::builder(source, source_id, tokens, diagnostics)
131            .fuel(fuel)
132            .max_depth(max_depth)
133            .build()
134    }
135
136    pub fn parse(mut self) -> Result<ParseResult, Error> {
137        self.parse_root();
138        let (cst, exec_fuel_consumed) = self.finish()?;
139        let root = Root::cast(SyntaxNode::new_root(cst)).expect("parser always produces Root");
140        Ok(ParseResult {
141            ast: root,
142            fuel_consumed: exec_fuel_consumed,
143        })
144    }
145
146    fn finish(mut self) -> Result<(GreenNode, u32), Error> {
147        self.drain_trivia();
148        if let Some(err) = self.fatal_error {
149            return Err(err);
150        }
151        let fuel_consumed = self.fuel_initial.saturating_sub(self.fuel_remaining);
152        Ok((self.builder.finish(), fuel_consumed))
153    }
154
155    pub(super) fn has_fatal_error(&self) -> bool {
156        self.fatal_error.is_some()
157    }
158
159    pub(super) fn current(&mut self) -> SyntaxKind {
160        self.skip_trivia_to_buffer();
161        self.nth_raw(0)
162    }
163
164    fn reset_debug_fuel(&self) {
165        self.debug_fuel.set(256);
166    }
167
168    pub(super) fn nth_raw(&self, lookahead: usize) -> SyntaxKind {
169        self.ensure_progress();
170        self.tokens
171            .get(self.pos + lookahead)
172            .map_or(SyntaxKind::Error, |t| t.kind)
173    }
174
175    fn consume_exec_fuel(&mut self) {
176        if self.fuel_remaining > 0 {
177            self.fuel_remaining -= 1;
178            return;
179        }
180
181        if self.fatal_error.is_none() {
182            self.fatal_error = Some(Error::ExecFuelExhausted);
183        }
184    }
185
186    pub(super) fn current_span(&mut self) -> TextRange {
187        self.skip_trivia_to_buffer();
188        self.tokens
189            .get(self.pos)
190            .map_or_else(|| TextRange::empty(self.eof_offset()), |t| t.span)
191    }
192
193    pub(super) fn eof_offset(&self) -> TextSize {
194        TextSize::from(self.source.len() as u32)
195    }
196
197    pub(super) fn eof(&self) -> bool {
198        self.pos >= self.tokens.len()
199    }
200
201    pub(super) fn should_stop(&self) -> bool {
202        self.eof() || self.has_fatal_error()
203    }
204
205    pub(super) fn currently_is(&mut self, kind: SyntaxKind) -> bool {
206        self.current() == kind
207    }
208
209    pub(super) fn currently_is_one_of(&mut self, set: TokenSet) -> bool {
210        set.contains(self.current())
211    }
212
213    /// LL(k) lookahead past trivia.
214    fn peek_nth(&mut self, n: usize) -> SyntaxKind {
215        self.skip_trivia_to_buffer();
216        let mut count = 0;
217        let mut pos = self.pos;
218        while pos < self.tokens.len() {
219            let kind = self.tokens[pos].kind;
220            if !kind.is_trivia() {
221                if count == n {
222                    return kind;
223                }
224                count += 1;
225            }
226            pos += 1;
227        }
228        SyntaxKind::Error
229    }
230
231    pub(super) fn next_is(&mut self, kind: SyntaxKind) -> bool {
232        self.peek_nth(1) == kind
233    }
234
235    pub(super) fn skip_trivia_to_buffer(&mut self) {
236        while self.pos < self.tokens.len() && self.tokens[self.pos].kind.is_trivia() {
237            self.trivia_buffer.push(self.tokens[self.pos]);
238            self.pos += 1;
239        }
240    }
241
242    pub(super) fn drain_trivia(&mut self) {
243        for token in self.trivia_buffer.drain(..) {
244            let text = token_text(self.source, &token);
245            self.builder.token(token.kind.into(), text);
246        }
247    }
248
249    pub(super) fn eat_trivia(&mut self) {
250        self.skip_trivia_to_buffer();
251        self.drain_trivia();
252    }
253
254    pub(super) fn start_node(&mut self, kind: SyntaxKind) {
255        self.drain_trivia();
256        self.builder.start_node(kind.into());
257    }
258
259    pub(super) fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
260        self.builder.start_node_at(checkpoint, kind.into());
261    }
262
263    pub(super) fn finish_node(&mut self) {
264        self.builder.finish_node();
265    }
266
267    pub(super) fn checkpoint(&mut self) -> Checkpoint {
268        self.drain_trivia();
269        self.builder.checkpoint()
270    }
271
272    pub(super) fn bump(&mut self) {
273        assert!(!self.eof(), "bump called at EOF");
274        self.reset_debug_fuel();
275        self.consume_exec_fuel();
276
277        self.drain_trivia();
278
279        let token = self.tokens[self.pos];
280        let text = token_text(self.source, &token);
281        self.builder.token(token.kind.into(), text);
282        self.pos += 1;
283    }
284
285    pub(super) fn skip_token(&mut self) {
286        assert!(!self.eof(), "skip_token called at EOF");
287        self.reset_debug_fuel();
288        self.consume_exec_fuel();
289        self.pos += 1;
290    }
291
292    pub(super) fn eat_token(&mut self, kind: SyntaxKind) -> bool {
293        if self.currently_is(kind) {
294            self.bump();
295            true
296        } else {
297            false
298        }
299    }
300
301    /// On mismatch: emit diagnostic but don't consume.
302    pub(super) fn expect(&mut self, kind: SyntaxKind, what: &str) -> bool {
303        if self.eat_token(kind) {
304            return true;
305        }
306        self.error_msg(
307            DiagnosticKind::UnexpectedToken,
308            format!("expected {}", what),
309        );
310        false
311    }
312
313    pub(super) fn current_suppression_span(&mut self) -> TextRange {
314        self.delimiter_stack
315            .last()
316            .map(|d| TextRange::new(d.span.start(), TextSize::from(self.source.len() as u32)))
317            .unwrap_or_else(|| self.current_span())
318    }
319
320    fn should_report(&mut self, pos: TextSize) -> bool {
321        if self.last_diagnostic_pos == Some(pos) {
322            return false;
323        }
324        self.last_diagnostic_pos = Some(pos);
325        true
326    }
327
328    pub(super) fn bump_as_error(&mut self) {
329        if !self.eof() {
330            self.start_node(SyntaxKind::Error);
331            self.bump();
332            self.finish_node();
333        }
334    }
335
336    fn get_error_ranges(&mut self) -> Option<(TextRange, TextRange)> {
337        let range = self.current_span();
338        if !self.should_report(range.start()) {
339            return None;
340        }
341        let suppression = self.current_suppression_span();
342        Some((range, suppression))
343    }
344
345    pub(super) fn error(&mut self, kind: DiagnosticKind) {
346        let Some((range, suppression)) = self.get_error_ranges() else {
347            return;
348        };
349        self.diagnostics
350            .report(self.source_id, kind, range)
351            .suppression_range(suppression)
352            .emit();
353    }
354
355    pub(super) fn error_msg(&mut self, kind: DiagnosticKind, message: impl Into<String>) {
356        let Some((range, suppression)) = self.get_error_ranges() else {
357            return;
358        };
359        self.diagnostics
360            .report(self.source_id, kind, range)
361            .message(message)
362            .suppression_range(suppression)
363            .emit();
364    }
365
366    pub(super) fn error_with_hint(&mut self, kind: DiagnosticKind, hint: impl Into<String>) {
367        let Some((range, suppression)) = self.get_error_ranges() else {
368            return;
369        };
370        self.diagnostics
371            .report(self.source_id, kind, range)
372            .hint(hint)
373            .suppression_range(suppression)
374            .emit();
375    }
376
377    pub(super) fn error_and_bump(&mut self, kind: DiagnosticKind) {
378        self.error(kind);
379        self.bump_as_error();
380    }
381
382    pub(super) fn error_and_bump_with_hint(
383        &mut self,
384        kind: DiagnosticKind,
385        hint: impl Into<String>,
386    ) {
387        self.error_with_hint(kind, hint);
388        self.bump_as_error();
389    }
390
391    #[allow(dead_code)]
392    pub(super) fn error_and_bump_msg(&mut self, kind: DiagnosticKind, message: impl Into<String>) {
393        self.error_msg(kind, message);
394        self.bump_as_error();
395    }
396
397    #[allow(dead_code)]
398    pub(super) fn error_recover(
399        &mut self,
400        kind: DiagnosticKind,
401        message: &str,
402        recovery: TokenSet,
403    ) {
404        if self.currently_is_one_of(recovery) || self.should_stop() {
405            self.error_msg(kind, message);
406            return;
407        }
408
409        self.start_node(SyntaxKind::Error);
410        self.error_msg(kind, message);
411        while !self.currently_is_one_of(recovery) && !self.should_stop() {
412            self.bump();
413        }
414        self.finish_node();
415    }
416
417    pub(super) fn enter_recursion(&mut self) -> bool {
418        if self.depth < self.max_depth {
419            self.depth += 1;
420            self.reset_debug_fuel();
421            return true;
422        }
423
424        if self.fatal_error.is_none() {
425            self.fatal_error = Some(Error::RecursionLimitExceeded);
426        }
427
428        false
429    }
430
431    pub(super) fn exit_recursion(&mut self) {
432        self.depth = self.depth.saturating_sub(1);
433        self.reset_debug_fuel();
434    }
435
436    pub(super) fn push_delimiter(&mut self, kind: SyntaxKind) {
437        let span = self.current_span();
438        self.delimiter_stack.push(OpenDelimiter { kind, span });
439    }
440
441    pub(super) fn pop_delimiter(&mut self) -> Option<OpenDelimiter> {
442        self.delimiter_stack.pop()
443    }
444
445    pub(super) fn error_unclosed_delimiter(
446        &mut self,
447        kind: DiagnosticKind,
448        related_msg: impl Into<String>,
449        open_range: TextRange,
450    ) {
451        let current = self.current_span();
452        if !self.should_report(current.start()) {
453            return;
454        }
455        // Use full range for easier downstream error suppression
456        let full_range = TextRange::new(open_range.start(), current.end());
457        self.diagnostics
458            .report(self.source_id, kind, full_range)
459            .related_to(self.source_id, open_range, related_msg)
460            .emit();
461    }
462
463    pub(super) fn last_non_trivia_end(&self) -> Option<TextSize> {
464        self.tokens[..self.pos]
465            .iter()
466            .rev()
467            .find(|t| !t.kind.is_trivia())
468            .map(|t| t.span.end())
469    }
470
471    pub(super) fn error_with_fix(
472        &mut self,
473        kind: DiagnosticKind,
474        range: TextRange,
475        message: impl Into<String>,
476        fix_description: impl Into<String>,
477        fix_replacement: impl Into<String>,
478    ) {
479        if !self.should_report(range.start()) {
480            return;
481        }
482        self.diagnostics
483            .report(self.source_id, kind, range)
484            .message(message)
485            .fix(fix_description, fix_replacement)
486            .emit();
487    }
488}