oak_core/parser/pratt.rs
1use crate::{Language, ParserState, source::Source, tree::GreenNode};
2
3/// Associativity of an operator.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Associativity {
6 /// Left-associative (e.g., a + b + c is (a + b) + c).
7 Left,
8 /// Right-associative (e.g., a = b = c is a = (b = c)).
9 Right,
10 /// Non-associative.
11 None,
12}
13
14/// Precedence and associativity of an operator.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub struct OperatorInfo {
17 /// Precedence of the operator.
18 pub precedence: u8,
19 /// Associativity of the operator.
20 pub associativity: Associativity,
21}
22
23impl OperatorInfo {
24 /// Creates a left-associative operator info.
25 pub fn left(precedence: u8) -> Self {
26 Self { precedence, associativity: Associativity::Left }
27 }
28
29 /// Creates a right-associative operator info.
30 pub fn right(precedence: u8) -> Self {
31 Self { precedence, associativity: Associativity::Right }
32 }
33
34 /// Creates a non-associative operator info.
35 pub fn none(precedence: u8) -> Self {
36 Self { precedence, associativity: Associativity::None }
37 }
38}
39
40/// A specification for a Pratt parser.
41///
42/// Users implement this trait to define the grammar rules for expressions.
43/// Using a trait allows the compiler to optimize operator lookups using match statements.
44///
45/// # Example
46///
47/// ```rust
48/// # use oak_core::{Language, ParserState, Pratt, binary, unary, OperatorInfo, Associativity, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
49/// # use oak_core::tree::GreenNode;
50/// # use oak_core::source::Source;
51/// #
52/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53/// # #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54/// # enum MyToken { End, Plus, Star, Int }
55/// # impl TokenType for MyToken {
56/// # const END_OF_STREAM: Self = MyToken::End;
57/// # type Role = UniversalTokenRole;
58/// # fn role(&self) -> Self::Role { UniversalTokenRole::None }
59/// # }
60/// #
61/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
62/// # #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
63/// # enum MyElement { Root }
64/// # impl ElementType for MyElement {
65/// # type Role = UniversalElementRole;
66/// # fn role(&self) -> Self::Role { UniversalElementRole::None }
67/// # }
68/// # impl From<MyToken> for MyElement {
69/// # fn from(_: MyToken) -> Self { MyElement::Root }
70/// # }
71/// #
72/// # struct MyLanguage;
73/// # impl Language for MyLanguage {
74/// # const NAME: &'static str = "test";
75/// # type TokenType = MyToken;
76/// # type ElementType = MyElement;
77/// # type TypedRoot = ();
78/// # }
79/// # #[derive(Clone, Copy)]
80/// # struct MyPratt;
81/// # impl Pratt<MyLanguage> for MyPratt {
82/// # fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>) -> &'a GreenNode<'a, MyLanguage> {
83/// # // Parse literals, identifiers, or parenthesized expressions
84/// # let cp = state.checkpoint();
85/// # state.finish_at(cp, MyElement::Root)
86/// # }
87/// #
88/// # fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, left: &'a GreenNode<'a, MyLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, MyLanguage>> {
89/// # let token = state.peek_at(0)?;
90/// # let info = match token.kind {
91/// # MyToken::Plus => Some(OperatorInfo::left(10)), // '+'
92/// # MyToken::Star => Some(OperatorInfo::left(20)), // '*'
93/// # _ => None,
94/// # }?;
95/// #
96/// # if info.precedence < min_precedence { return None }
97/// #
98/// # Some(binary(state, left, token.kind, info.precedence, info.associativity, MyElement::Root, |s, p| self.parse(s, p)))
99/// # }
100/// # }
101/// # impl MyPratt {
102/// # fn parse<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, min_precedence: u8) -> &'a GreenNode<'a, MyLanguage> {
103/// # oak_core::parser::PrattParser::new(*self).parse_expr(state, min_precedence)
104/// # }
105/// # }
106/// ```
107pub trait Pratt<L: Language> {
108 /// Parses a primary expression (e.g., literals, identifiers, group).
109 fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L>;
110
111 /// Handles prefix operators and primary expressions.
112 ///
113 /// Default implementation just calls `primary`.
114 fn prefix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L> {
115 self.primary(state)
116 }
117
118 /// Handles infix and postfix operators.
119 ///
120 /// Should return `Some(new_node)` if an operator was parsed, or `None` if no operator matches
121 /// or its precedence is lower than `min_precedence`.
122 fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, min_precedence: u8) -> Option<&'a GreenNode<'a, L>>;
123}
124
125/// A helper for parsing binary (infix) expressions.
126#[inline(always)]
127pub fn binary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, op_precedence: u8, assoc: Associativity, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
128where
129 L: Language,
130 S: Source + ?Sized,
131 F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
132 L::ElementType: From<L::TokenType>,
133{
134 let cp = state.checkpoint_before(left);
135 state.expect(op_kind).ok();
136
137 let next_prec = match assoc {
138 Associativity::Left => op_precedence + 1,
139 Associativity::Right => op_precedence,
140 Associativity::None => op_precedence + 1,
141 };
142
143 let _right = parse_expr(state, next_prec);
144 state.finish_at(cp, result_kind)
145}
146
147/// A helper for parsing unary (prefix) expressions.
148#[inline(always)]
149pub fn unary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, op_kind: L::TokenType, op_precedence: u8, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
150where
151 L: Language,
152 S: Source + ?Sized,
153 F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
154 L::ElementType: From<L::TokenType>,
155{
156 let cp = state.checkpoint();
157 state.expect(op_kind).ok();
158 let _right = parse_expr(state, op_precedence);
159 state.finish_at(cp, result_kind)
160}
161
162/// A helper for parsing postfix expressions.
163#[inline(always)]
164pub fn postfix<'a, L, S>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, result_kind: L::ElementType) -> &'a GreenNode<'a, L>
165where
166 L: Language,
167 S: Source + ?Sized,
168 L::ElementType: From<L::TokenType>,
169{
170 let cp = state.checkpoint_before(left);
171 state.expect(op_kind).ok();
172 state.finish_at(cp, result_kind)
173}
174
175/// A Pratt parser implementation.
176///
177/// # Examples
178///
179/// ```rust
180/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole, ParserState, Pratt, PrattParser, binary, unary, SyntaxArena, LexOutput, SourceText};
181/// # use triomphe::Arc;
182/// #
183/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
184/// #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
185/// enum Token { Number, Plus, Minus, Star, Slash, Eof }
186/// impl TokenType for Token {
187/// const END_OF_STREAM: Self = Token::Eof;
188/// type Role = UniversalTokenRole;
189/// fn role(&self) -> Self::Role { UniversalTokenRole::None }
190/// }
191///
192/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
193/// #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
194/// enum Element { Expr, Number, Plus, Minus, Star, Slash }
195/// impl ElementType for Element {
196/// type Role = UniversalElementRole;
197/// fn role(&self) -> Self::Role { UniversalElementRole::None }
198/// }
199/// impl From<Token> for Element {
200/// fn from(t: Token) -> Self {
201/// match t {
202/// Token::Number => Element::Number,
203/// Token::Plus => Element::Plus,
204/// Token::Minus => Element::Minus,
205/// Token::Star => Element::Star,
206/// Token::Slash => Element::Slash,
207/// Token::Eof => unreachable!(),
208/// }
209/// }
210/// }
211///
212/// #[derive(Clone, Copy)]
213/// struct Lang;
214/// impl Language for Lang {
215/// const NAME: &'static str = "test";
216/// type TokenType = Token;
217/// type ElementType = Element;
218/// type TypedRoot = ();
219/// }
220///
221/// struct ExprParser;
222/// impl Pratt<Lang> for ExprParser {
223/// fn primary<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>) -> &'a oak_core::GreenNode<'a, Lang> {
224/// let cp = state.checkpoint();
225/// state.bump(); // bump number
226/// state.finish_at(cp, Element::Number)
227/// }
228///
229/// fn infix<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, left: &'a oak_core::GreenNode<'a, Lang>, min_prec: u8) -> Option<&'a oak_core::GreenNode<'a, Lang>> {
230/// let kind = state.peek_kind()?;
231/// let (prec, assoc) = match kind {
232/// Token::Plus | Token::Minus => (1, oak_core::Associativity::Left),
233/// Token::Star | Token::Slash => (2, oak_core::Associativity::Left),
234/// _ => return None,
235/// };
236/// if prec < min_prec { return None }
237/// Some(binary(state, left, kind, prec, assoc, Element::Expr, |s, p| self.parse_expr(s, p)))
238/// }
239/// }
240///
241/// impl ExprParser {
242/// fn parse_expr<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, min_prec: u8) -> &'a oak_core::GreenNode<'a, Lang> {
243/// PrattParser::new(ExprParser).parse_expr(state, min_prec)
244/// }
245/// }
246/// ```
247pub struct PrattParser<L: Language, T: Pratt<L>> {
248 spec: T,
249 _marker: core::marker::PhantomData<L>,
250}
251
252impl<L: Language, T: Pratt<L>> PrattParser<L, T> {
253 /// Creates a new Pratt parser with the given specification.
254 pub const fn new(spec: T) -> Self {
255 Self { spec, _marker: core::marker::PhantomData }
256 }
257
258 /// Parses an expression with the given minimum precedence.
259 pub fn parse_expr<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, min_precedence: u8) -> &'a GreenNode<'a, L> {
260 Self::parse(state, min_precedence, &self.spec)
261 }
262
263 /// Static version of parse_expr that takes a specification reference.
264 pub fn parse<'a, S: Source + ?Sized>(state: &mut ParserState<'a, L, S>, min_precedence: u8, spec: &T) -> &'a GreenNode<'a, L> {
265 let mut left = spec.prefix(state);
266 while let Some(node) = spec.infix(state, left, min_precedence) {
267 left = node
268 }
269 left
270 }
271}