oak_core/parser/
mod.rs

1mod pratt_parser;
2pub use self::pratt_parser::{Associativity, OperatorInfo, PrattParser, Precedence};
3use crate::{GreenBuilder, GreenNode, IncrementalCache, Language, OakDiagnostics, Token, errors::OakError, source::Source};
4use triomphe::Arc;
5
6/// Output type for parsing operations, containing either a green kind tree or errors.
7pub type ParseOutput<L: Language> = OakDiagnostics<Arc<GreenNode<L::SyntaxKind>>>;
8
9/// Parser trait for converting tokens into kind trees.
10///
11/// This trait provides a unified interface for parsing source text into
12/// green kind trees, supporting both full parsing and incremental updates.
13pub trait Parser<L: Language> {
14    /// Parses source text into a kind tree.
15    ///
16    /// This method performs a full parse of the source text, creating a new
17    /// kind tree from scratch. It uses a default cache configuration.
18    ///
19    /// # Arguments
20    ///
21    /// * `text` - The source text to parse
22    ///
23    /// # Returns
24    ///
25    /// A parse output containing either the green kind tree or errors
26    fn parse(&self, text: impl Source) -> ParseOutput<L> {
27        let mut pool = GreenBuilder::new(0);
28        let cache = IncrementalCache::new(&mut pool);
29        self.parse_incremental(text, 0, cache)
30    }
31
32    /// Parses source text incrementally using an existing cache.
33    ///
34    /// This method enables efficient re-parsing by reusing information from
35    /// previous parsing operations, only processing the changed portions
36    /// of the source text.
37    ///
38    /// # Arguments
39    ///
40    /// * `text` - The source text to parse
41    /// * `changed` - The number of bytes that have changed since the last parse
42    /// * `cache` - The incremental cache containing previous parsing results
43    ///
44    /// # Returns
45    ///
46    /// A parse output containing either the green kind tree or errors
47    fn parse_incremental(&self, text: impl Source, changed: usize, cache: IncrementalCache<L>) -> ParseOutput<L>;
48}
49
50/// Generic parsing state that encapsulates cursor for token stream and error aggregation.
51///
52/// This struct maintains the current parsing position and provides utilities for
53/// consuming tokens, recording errors, and building kind trees incrementally.
54///
55/// # Examples
56///
57/// ```rust
58/// use core::range::Range;
59/// use oak_core::{SourceText, Token, parser::ParserState};
60///
61/// #[derive(Copy, Clone, PartialEq)]
62/// enum K {
63///     A,
64///     B,
65///     Eof,
66/// }
67///
68/// let source = SourceText::new("ab");
69/// let tokens = [
70///     Token { kind: K::A, span: Range { start: 0, end: 1 } },
71///     Token { kind: K::B, span: Range { start: 1, end: 2 } },
72///     Token { kind: K::Eof, span: Range { start: 2, end: 2 } },
73/// ];
74/// let mut st = ParserState::new_with_cache(&source, &tokens);
75/// assert!(st.match_kind(&[K::A]));
76/// assert!(st.match_kind(&[K::B]));
77/// let out = st.finish(Ok(()));
78/// assert!(out.diagnostics.is_empty());
79/// ```
80pub struct ParserState<'a, S: Source, L: Language> {
81    /// The source text being parsed
82    pub source: S,
83    /// The incremental cache containing tokens and previous parse results
84    pub cache: IncrementalCache<'a, L>,
85    /// Current position in the token stream
86    pub index: usize,
87    /// Collection of errors encountered during parsing
88    pub errors: Vec<OakError>,
89}
90
91impl<'a, S: Source, L: Language> ParserState<'a, S, L> {
92    /// Creates a new parser state with the given source text and tokens.
93    #[inline]
94    pub fn new_with_cache(source: S, change: usize, cache: IncrementalCache<'a, L>) -> Self {
95        Self { cache, source, index: 0, errors: Vec::new() }
96    }
97
98    /// Checks if there are more tokens to consume.
99    ///
100    /// # Returns
101    ///
102    /// `true` if there are more tokens to parse, `false` otherwise
103    pub fn not_at_end(&self) -> bool {
104        self.index < self.cache.count_tokens()
105    }
106
107    /// Returns the current token without consuming it.
108    ///
109    /// # Returns
110    ///
111    /// An optional reference to the current token, or `None` if at end of stream
112    #[inline]
113    pub fn current(&self) -> Option<&Token<L::SyntaxKind>> {
114        self.cache.get_token(self.index)
115    }
116
117    /// Returns the previous token (the one before the current position).
118    ///
119    /// # Returns
120    ///
121    /// An optional reference to the previous token, or `None` if at start of stream
122    #[inline]
123    pub fn previous(&self) -> Option<&Token<L::SyntaxKind>> {
124        if self.index > 0 { self.cache.get_token(self.index - 1) } else { None }
125    }
126
127    /// Advances to the next token and returns it.
128    ///
129    /// # Returns
130    ///
131    /// An optional reference to the consumed token, or `None` if at end of stream
132    #[inline]
133    pub fn advance(&mut self) -> Option<&Token<L::SyntaxKind>> {
134        if self.not_at_end() {
135            let i = self.index;
136            self.index += 1;
137            self.cache.get_token(i)
138        }
139        else {
140            None
141        }
142    }
143
144    /// Returns the kind of the current token without consuming it.
145    ///
146    /// # Returns
147    ///
148    /// An optional token kind, or `None` if at end of stream
149    #[inline]
150    pub fn peek_kind(&self) -> Option<L::SyntaxKind> {
151        self.current().map(|t| t.kind)
152    }
153
154    /// Checks if the current token matches any of the given kinds and consumes it if so.
155    ///
156    /// # Arguments
157    ///
158    /// * `kinds` - Array of token kinds to match against
159    ///
160    /// # Returns
161    ///
162    /// `true` if the current token was consumed (matched), `false` otherwise
163    #[inline]
164    pub fn match_kind(&mut self, kinds: &[L::SyntaxKind]) -> bool {
165        if let Some(t) = self.current() {
166            if kinds.iter().any(|k| *k == t.kind) {
167                self.advance();
168                return true;
169            }
170        }
171        false
172    }
173
174    /// Records a kind error at the specified byte position
175    ///
176    /// # Arguments
177    ///
178    /// * `position` - The byte position where the error occurred
179    /// * `msg` - The error message to record
180    pub fn record_error_at(&mut self, position: usize, msg: impl Into<String>) {
181        let err = self.source.syntax_error(msg, position);
182        self.errors.push(err);
183    }
184
185    /// Records an "unexpected current kind" error
186    ///
187    /// # Arguments
188    ///
189    /// * `msg` - The error message to record
190    pub fn record_unexpected(&mut self, msg: impl Into<String>) {
191        let pos = self.current().map(|t| t.span.start).unwrap_or(self.source.length());
192        self.record_error_at(pos, msg);
193    }
194
195    /// Consumes an expected kind; if it doesn't match, records an error and returns None (suitable for error recovery)
196    ///
197    /// # Arguments
198    ///
199    /// * `kind` - The expected token kind
200    /// * `msg` - The error message to record if the token doesn't match
201    ///
202    /// # Returns
203    ///
204    /// An optional token if the expected kind was found and consumed, `None` otherwise
205    pub fn consume(&mut self, kind: L::SyntaxKind, msg: impl Into<String>) -> Option<Token<L::SyntaxKind>> {
206        if let Some(t) = self.current() {
207            if t.kind == kind {
208                let tok = t.clone();
209                self.index += 1;
210                return Some(tok);
211            }
212        }
213        self.record_unexpected(msg);
214        None
215    }
216
217    /// Finishes parsing and returns the final parse output.
218    ///
219    /// This method consumes the parser state and returns a parse output containing
220    /// either the successfully parsed green tree or parsing errors.
221    ///
222    /// # Arguments
223    ///
224    /// * `result` - The parsing result (Ok for success, Err for failure)
225    ///
226    /// # Returns
227    ///
228    /// A parse output containing the green tree or errors
229    pub fn finish(self, result: Result<(), OakError>) -> ParseOutput<L> {
230        match result {
231            Ok(_) => {
232                if let Some(root) = self.cache.last_parse {
233                    OakDiagnostics { result: Ok(root), diagnostics: self.errors }
234                }
235                else {
236                    OakDiagnostics {
237                        result: Err(OakError::custom_error("Parser finished without building a root node")),
238                        diagnostics: self.errors,
239                    }
240                }
241            }
242            Err(e) => OakDiagnostics { result: Err(e), diagnostics: self.errors },
243        }
244    }
245}