oak_core/parser/pratt.rs
1use crate::{Language, ParserState, source::Source, tree::GreenNode};
2
3/// Associativity of an operator.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Associativity {
6 /// Left-associative (e.g., a + b + c is (a + b) + c).
7 Left,
8 /// Right-associative (e.g., a = b = c is a = (b = c)).
9 Right,
10 /// Non-associative.
11 None,
12}
13
14/// Precedence and associativity of an operator.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub struct OperatorInfo {
17 /// Precedence of the operator.
18 pub precedence: u8,
19 /// Associativity of the operator.
20 pub associativity: Associativity,
21}
22
23impl OperatorInfo {
24 /// Creates a left-associative operator info.
25 pub fn left(precedence: u8) -> Self {
26 Self { precedence, associativity: Associativity::Left }
27 }
28
29 /// Creates a right-associative operator info.
30 pub fn right(precedence: u8) -> Self {
31 Self { precedence, associativity: Associativity::Right }
32 }
33
34 /// Creates a non-associative operator info.
35 pub fn none(precedence: u8) -> Self {
36 Self { precedence, associativity: Associativity::None }
37 }
38}
39
40/// A specification for a Pratt parser.
41///
42/// Users implement this trait to define the grammar rules for expressions.
43/// Using a trait allows the compiler to optimize operator lookups using match statements.
44///
45/// # Example
46///
47/// ```rust
48/// # use oak_core::{Language, ParserState, Pratt, binary, unary, OperatorInfo, Associativity};
49/// # use oak_core::tree::GreenNode;
50/// # use oak_core::source::Source;
51/// # struct MyLanguage;
52/// # impl Language for MyLanguage {
53/// # type TokenType = u16;
54/// # type ElementType = u16;
55/// # }
56/// # struct MyPratt;
57/// # impl Pratt<MyLanguage> for MyPratt {
58/// # fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>) -> &'a GreenNode<'a, MyLanguage> {
59/// # // Parse literals, identifiers, or parenthesized expressions
60/// # state.checkpoint().finish(1)
61/// # }
62/// #
63/// # fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, left: &'a GreenNode<'a, MyLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, MyLanguage>> {
64/// # let token = state.peek()?;
65/// # let info = match token.kind {
66/// # 1 => Some(OperatorInfo::left(10)), // '+'
67/// # 2 => Some(OperatorInfo::left(20)), // '*'
68/// # _ => None,
69/// # }?;
70/// #
71/// # if info.precedence < min_precedence { return None }
72/// #
73/// # Some(binary(state, left, token.kind, info.precedence, info.associativity, 100, |s, p| self.parse(s, p)))
74/// # }
75/// # }
76/// # impl MyPratt {
77/// # fn parse<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, min_precedence: u8) -> &'a GreenNode<'a, MyLanguage> {
78/// # oak_core::parser::PrattParser::new(self).parse(state, min_precedence)
79/// # }
80/// # }
81/// ```
82pub trait Pratt<L: Language> {
83 /// Parses a primary expression (e.g., literals, identifiers, group).
84 fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L>;
85
86 /// Handles prefix operators and primary expressions.
87 ///
88 /// Default implementation just calls `primary`.
89 fn prefix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L> {
90 self.primary(state)
91 }
92
93 /// Handles infix and postfix operators.
94 ///
95 /// Should return `Some(new_node)` if an operator was parsed, or `None` if no operator matches
96 /// or its precedence is lower than `min_precedence`.
97 fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, min_precedence: u8) -> Option<&'a GreenNode<'a, L>>;
98}
99
100/// A helper for parsing binary (infix) expressions.
101#[inline(always)]
102pub fn binary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, op_precedence: u8, assoc: Associativity, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
103where
104 L: Language,
105 S: Source + ?Sized,
106 F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
107 L::ElementType: From<L::TokenType>,
108{
109 let cp = state.checkpoint_before(left);
110 state.expect(op_kind).ok();
111
112 let next_prec = match assoc {
113 Associativity::Left => op_precedence + 1,
114 Associativity::Right => op_precedence,
115 Associativity::None => op_precedence + 1,
116 };
117
118 let _right = parse_expr(state, next_prec);
119 state.finish_at(cp, result_kind)
120}
121
122/// A helper for parsing unary (prefix) expressions.
123#[inline(always)]
124pub fn unary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, op_kind: L::TokenType, op_precedence: u8, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
125where
126 L: Language,
127 S: Source + ?Sized,
128 F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
129 L::ElementType: From<L::TokenType>,
130{
131 let cp = state.checkpoint();
132 state.expect(op_kind).ok();
133 let _right = parse_expr(state, op_precedence);
134 state.finish_at(cp, result_kind)
135}
136
137/// A helper for parsing postfix expressions.
138#[inline(always)]
139pub fn postfix<'a, L, S>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, result_kind: L::ElementType) -> &'a GreenNode<'a, L>
140where
141 L: Language,
142 S: Source + ?Sized,
143 L::ElementType: From<L::TokenType>,
144{
145 let cp = state.checkpoint_before(left);
146 state.expect(op_kind).ok();
147 state.finish_at(cp, result_kind)
148}
149
150/// A Pratt parser implementation.
151///
152/// # Examples
153///
154/// ```rust
155/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole, ParserState, Pratt, PrattParser, binary, unary, SyntaxArena, LexOutput, SourceText};
156/// # use triomphe::Arc;
157/// #
158/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
159/// enum Token { Number, Plus, Minus, Star, Slash, Eof }
160/// impl TokenType for Token {
161/// const END_OF_STREAM: Self = Token::Eof;
162/// type Role = UniversalTokenRole;
163/// fn role(&self) -> Self::Role { UniversalTokenRole::None }
164/// }
165///
166/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
167/// enum Element { Expr, Number, Plus, Minus, Star, Slash }
168/// impl ElementType for Element {
169/// type Role = UniversalElementRole;
170/// fn role(&self) -> Self::Role { UniversalElementRole::None }
171/// }
172/// impl From<Token> for Element {
173/// fn from(t: Token) -> Self {
174/// match t {
175/// Token::Number => Element::Number,
176/// Token::Plus => Element::Plus,
177/// Token::Minus => Element::Minus,
178/// Token::Star => Element::Star,
179/// Token::Slash => Element::Slash,
180/// Token::Eof => unreachable!(),
181/// }
182/// }
183/// }
184///
185/// #[derive(Clone, Copy)]
186/// struct Lang;
187/// impl Language for Lang {
188/// const NAME: &'static str = "test";
189/// type TokenType = Token;
190/// type ElementType = Element;
191/// type TypedRoot = ();
192/// }
193///
194/// struct ExprParser;
195/// impl Pratt<Lang> for ExprParser {
196/// fn primary<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>) -> &'a oak_core::GreenNode<'a, Lang> {
197/// let cp = state.checkpoint();
198/// state.bump(); // bump number
199/// state.finish_at(cp, Element::Number)
200/// }
201///
202/// fn infix<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, left: &'a oak_core::GreenNode<'a, Lang>, min_prec: u8) -> Option<&'a oak_core::GreenNode<'a, Lang>> {
203/// let kind = state.peek_kind()?;
204/// let (prec, assoc) = match kind {
205/// Token::Plus | Token::Minus => (1, oak_core::Associativity::Left),
206/// Token::Star | Token::Slash => (2, oak_core::Associativity::Left),
207/// _ => return None,
208/// }
209/// if prec < min_prec { return None }
210/// Some(binary(state, left, kind, prec, assoc, Element::Expr, |s, p| self.parse_expr(s, p)))
211/// }
212/// }
213///
214/// impl ExprParser {
215/// fn parse_expr<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, min_prec: u8) -> &'a oak_core::GreenNode<'a, Lang> {
216/// PrattParser::new(ExprParser).parse_expr(state, min_prec)
217/// }
218/// }
219/// ```
220pub struct PrattParser<L: Language, T: Pratt<L>> {
221 spec: T,
222 _marker: core::marker::PhantomData<L>,
223}
224
225impl<L: Language, T: Pratt<L>> PrattParser<L, T> {
226 /// Creates a new Pratt parser with the given specification.
227 pub const fn new(spec: T) -> Self {
228 Self { spec, _marker: core::marker::PhantomData }
229 }
230
231 /// Parses an expression with the given minimum precedence.
232 pub fn parse_expr<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, min_precedence: u8) -> &'a GreenNode<'a, L> {
233 Self::parse(state, min_precedence, &self.spec)
234 }
235
236 /// Static version of parse_expr that takes a specification reference.
237 pub fn parse<'a, S: Source + ?Sized>(state: &mut ParserState<'a, L, S>, min_precedence: u8, spec: &T) -> &'a GreenNode<'a, L> {
238 let mut left = spec.prefix(state);
239 while let Some(node) = spec.infix(state, left, min_precedence) {
240 left = node
241 }
242 left
243 }
244}