oak_core/parser/mod.rs
1mod pratt_parser;
2pub use self::pratt_parser::{Associativity, OperatorInfo, PrattParser, Precedence};
3use crate::{GreenBuilder, GreenNode, IncrementalCache, Language, OakDiagnostics, Token, errors::OakError, source::Source};
4use triomphe::Arc;
5
6/// Output type for parsing operations, containing either a green kind tree or errors.
7pub type ParseOutput<L: Language> = OakDiagnostics<Arc<GreenNode<L::SyntaxKind>>>;
8
9/// Parser trait for converting tokens into kind trees.
10///
11/// This trait provides a unified interface for parsing source text into
12/// green kind trees, supporting both full parsing and incremental updates.
13pub trait Parser<L: Language> {
14 /// Parses source text into a kind tree.
15 ///
16 /// This method performs a full parse of the source text, creating a new
17 /// kind tree from scratch. It uses a default cache configuration.
18 ///
19 /// # Arguments
20 ///
21 /// * `text` - The source text to parse
22 ///
23 /// # Returns
24 ///
25 /// A parse output containing either the green kind tree or errors
26 fn parse(&self, text: impl Source) -> ParseOutput<L> {
27 let mut pool = GreenBuilder::new(0);
28 let cache = IncrementalCache::new(&mut pool);
29 self.parse_incremental(text, 0, cache)
30 }
31
32 /// Parses source text incrementally using an existing cache.
33 ///
34 /// This method enables efficient re-parsing by reusing information from
35 /// previous parsing operations, only processing the changed portions
36 /// of the source text.
37 ///
38 /// # Arguments
39 ///
40 /// * `text` - The source text to parse
41 /// * `changed` - The number of bytes that have changed since the last parse
42 /// * `cache` - The incremental cache containing previous parsing results
43 ///
44 /// # Returns
45 ///
46 /// A parse output containing either the green kind tree or errors
47 fn parse_incremental(&self, text: impl Source, changed: usize, cache: IncrementalCache<L>) -> ParseOutput<L>;
48}
49
50/// Generic parsing state that encapsulates cursor for token stream and error aggregation.
51///
52/// This struct maintains the current parsing position and provides utilities for
53/// consuming tokens, recording errors, and building kind trees incrementally.
54///
55/// # Examples
56///
57/// ```rust
58/// use core::range::Range;
59/// use oak_core::{SourceText, Token, parser::ParserState};
60///
61/// #[derive(Copy, Clone, PartialEq)]
62/// enum K {
63/// A,
64/// B,
65/// Eof,
66/// }
67///
68/// let source = SourceText::new("ab");
69/// let tokens = [
70/// Token { kind: K::A, span: Range { start: 0, end: 1 } },
71/// Token { kind: K::B, span: Range { start: 1, end: 2 } },
72/// Token { kind: K::Eof, span: Range { start: 2, end: 2 } },
73/// ];
74/// let mut st = ParserState::new_with_cache(&source, &tokens);
75/// assert!(st.match_kind(&[K::A]));
76/// assert!(st.match_kind(&[K::B]));
77/// let out = st.finish(Ok(()));
78/// assert!(out.diagnostics.is_empty());
79/// ```
80pub struct ParserState<'a, S: Source, L: Language> {
81 /// The source text being parsed
82 pub source: S,
83 /// The incremental cache containing tokens and previous parse results
84 pub cache: IncrementalCache<'a, L>,
85 /// Current position in the token stream
86 pub index: usize,
87 /// Collection of errors encountered during parsing
88 pub errors: Vec<OakError>,
89}
90
91impl<'a, S: Source, L: Language> ParserState<'a, S, L> {
92 /// Creates a new parser state with the given source text and tokens.
93 #[inline]
94 pub fn new_with_cache(source: S, change: usize, cache: IncrementalCache<'a, L>) -> Self {
95 Self { cache, source, index: 0, errors: Vec::new() }
96 }
97
98 /// Checks if there are more tokens to consume.
99 ///
100 /// # Returns
101 ///
102 /// `true` if there are more tokens to parse, `false` otherwise
103 pub fn not_at_end(&self) -> bool {
104 self.index < self.cache.count_tokens()
105 }
106
107 /// Returns the current token without consuming it.
108 ///
109 /// # Returns
110 ///
111 /// An optional reference to the current token, or `None` if at end of stream
112 #[inline]
113 pub fn current(&self) -> Option<&Token<L::SyntaxKind>> {
114 self.cache.get_token(self.index)
115 }
116
117 /// Returns the previous token (the one before the current position).
118 ///
119 /// # Returns
120 ///
121 /// An optional reference to the previous token, or `None` if at start of stream
122 #[inline]
123 pub fn previous(&self) -> Option<&Token<L::SyntaxKind>> {
124 if self.index > 0 { self.cache.get_token(self.index - 1) } else { None }
125 }
126
127 /// Advances to the next token and returns it.
128 ///
129 /// # Returns
130 ///
131 /// An optional reference to the consumed token, or `None` if at end of stream
132 #[inline]
133 pub fn advance(&mut self) -> Option<&Token<L::SyntaxKind>> {
134 if self.not_at_end() {
135 let i = self.index;
136 self.index += 1;
137 self.cache.get_token(i)
138 }
139 else {
140 None
141 }
142 }
143
144 /// Returns the kind of the current token without consuming it.
145 ///
146 /// # Returns
147 ///
148 /// An optional token kind, or `None` if at end of stream
149 #[inline]
150 pub fn peek_kind(&self) -> Option<L::SyntaxKind> {
151 self.current().map(|t| t.kind)
152 }
153
154 /// Checks if the current token matches any of the given kinds and consumes it if so.
155 ///
156 /// # Arguments
157 ///
158 /// * `kinds` - Array of token kinds to match against
159 ///
160 /// # Returns
161 ///
162 /// `true` if the current token was consumed (matched), `false` otherwise
163 #[inline]
164 pub fn match_kind(&mut self, kinds: &[L::SyntaxKind]) -> bool {
165 if let Some(t) = self.current() {
166 if kinds.iter().any(|k| *k == t.kind) {
167 self.advance();
168 return true;
169 }
170 }
171 false
172 }
173
174 /// Records a kind error at the specified byte position
175 ///
176 /// # Arguments
177 ///
178 /// * `position` - The byte position where the error occurred
179 /// * `msg` - The error message to record
180 pub fn record_error_at(&mut self, position: usize, msg: impl Into<String>) {
181 let err = self.source.syntax_error(msg, position);
182 self.errors.push(err);
183 }
184
185 /// Records an "unexpected current kind" error
186 ///
187 /// # Arguments
188 ///
189 /// * `msg` - The error message to record
190 pub fn record_unexpected(&mut self, msg: impl Into<String>) {
191 let pos = self.current().map(|t| t.span.start).unwrap_or(self.source.length());
192 self.record_error_at(pos, msg);
193 }
194
195 /// Consumes an expected kind; if it doesn't match, records an error and returns None (suitable for error recovery)
196 ///
197 /// # Arguments
198 ///
199 /// * `kind` - The expected token kind
200 /// * `msg` - The error message to record if the token doesn't match
201 ///
202 /// # Returns
203 ///
204 /// An optional token if the expected kind was found and consumed, `None` otherwise
205 pub fn consume(&mut self, kind: L::SyntaxKind, msg: impl Into<String>) -> Option<Token<L::SyntaxKind>> {
206 if let Some(t) = self.current() {
207 if t.kind == kind {
208 let tok = t.clone();
209 self.index += 1;
210 return Some(tok);
211 }
212 }
213 self.record_unexpected(msg);
214 None
215 }
216
217 /// Finishes parsing and returns the final parse output.
218 ///
219 /// This method consumes the parser state and returns a parse output containing
220 /// either the successfully parsed green tree or parsing errors.
221 ///
222 /// # Arguments
223 ///
224 /// * `result` - The parsing result (Ok for success, Err for failure)
225 ///
226 /// # Returns
227 ///
228 /// A parse output containing the green tree or errors
229 pub fn finish(self, result: Result<(), OakError>) -> ParseOutput<L> {
230 match result {
231 Ok(_) => {
232 if let Some(root) = self.cache.last_parse {
233 OakDiagnostics { result: Ok(root), diagnostics: self.errors }
234 }
235 else {
236 OakDiagnostics {
237 result: Err(OakError::custom_error("Parser finished without building a root node")),
238 diagnostics: self.errors,
239 }
240 }
241 }
242 Err(e) => OakDiagnostics { result: Err(e), diagnostics: self.errors },
243 }
244 }
245}