Skip to main content

oak_core/parser/
pratt.rs

1use crate::{Language, ParserState, source::Source, tree::GreenNode};
2
3/// Associativity of an operator.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Associativity {
6    /// Left-associative (e.g., a + b + c is (a + b) + c).
7    Left,
8    /// Right-associative (e.g., a = b = c is a = (b = c)).
9    Right,
10    /// Non-associative.
11    None,
12}
13
14/// Precedence and associativity of an operator.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub struct OperatorInfo {
17    /// Precedence of the operator.
18    pub precedence: u8,
19    /// Associativity of the operator.
20    pub associativity: Associativity,
21}
22
23impl OperatorInfo {
24    /// Creates a left-associative operator info.
25    pub fn left(precedence: u8) -> Self {
26        Self { precedence, associativity: Associativity::Left }
27    }
28
29    /// Creates a right-associative operator info.
30    pub fn right(precedence: u8) -> Self {
31        Self { precedence, associativity: Associativity::Right }
32    }
33
34    /// Creates a non-associative operator info.
35    pub fn none(precedence: u8) -> Self {
36        Self { precedence, associativity: Associativity::None }
37    }
38}
39
40/// A specification for a Pratt parser.
41///
42/// Users implement this trait to define the grammar rules for expressions.
43/// Using a trait allows the compiler to optimize operator lookups using match statements.
44///
45/// # Example
46///
47/// ```rust
48/// # use oak_core::{Language, ParserState, Pratt, binary, unary, OperatorInfo, Associativity, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
49/// # use oak_core::tree::GreenNode;
50/// # use oak_core::source::Source;
51/// #
52/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53/// # #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54/// # enum MyToken { End, Plus, Star, Int }
55/// # impl TokenType for MyToken {
56/// #     const END_OF_STREAM: Self = MyToken::End;
57/// #     type Role = UniversalTokenRole;
58/// #     fn role(&self) -> Self::Role { UniversalTokenRole::None }
59/// # }
60/// #
61/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
62/// # #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
63/// # enum MyElement { Root }
64/// # impl ElementType for MyElement {
65/// #     type Role = UniversalElementRole;
66/// #     fn role(&self) -> Self::Role { UniversalElementRole::None }
67/// # }
68/// # impl From<MyToken> for MyElement {
69/// #     fn from(_: MyToken) -> Self { MyElement::Root }
70/// # }
71/// #
72/// # struct MyLanguage;
73/// # impl Language for MyLanguage {
74/// #     const NAME: &'static str = "test";
75/// #     type TokenType = MyToken;
76/// #     type ElementType = MyElement;
77/// #     type TypedRoot = ();
78/// # }
79/// # #[derive(Clone, Copy)]
80/// # struct MyPratt;
81/// # impl Pratt<MyLanguage> for MyPratt {
82/// #     fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>) -> &'a GreenNode<'a, MyLanguage> {
83/// #         // Parse literals, identifiers, or parenthesized expressions
84/// #         let cp = state.checkpoint();
85/// #         state.finish_at(cp, MyElement::Root)
86/// #     }
87/// #
88/// #     fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, left: &'a GreenNode<'a, MyLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, MyLanguage>> {
89/// #         let token = state.peek_at(0)?;
90/// #         let info = match token.kind {
91/// #             MyToken::Plus => Some(OperatorInfo::left(10)), // '+'
92/// #             MyToken::Star => Some(OperatorInfo::left(20)), // '*'
93/// #             _ => None,
94/// #         }?;
95/// #
96/// #         if info.precedence < min_precedence { return None }
97/// #
98/// #         Some(binary(state, left, token.kind, info.precedence, info.associativity, MyElement::Root, |s, p| self.parse(s, p)))
99/// #     }
100/// # }
101/// # impl MyPratt {
102/// #     fn parse<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, min_precedence: u8) -> &'a GreenNode<'a, MyLanguage> {
103/// #         oak_core::parser::PrattParser::new(*self).parse_expr(state, min_precedence)
104/// #     }
105/// # }
106/// ```
107pub trait Pratt<L: Language> {
108    /// Parses a primary expression (e.g., literals, identifiers, group).
109    fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L>;
110
111    /// Handles prefix operators and primary expressions.
112    ///
113    /// Default implementation just calls `primary`.
114    fn prefix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L> {
115        self.primary(state)
116    }
117
118    /// Handles infix and postfix operators.
119    ///
120    /// Should return `Some(new_node)` if an operator was parsed, or `None` if no operator matches
121    /// or its precedence is lower than `min_precedence`.
122    fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, min_precedence: u8) -> Option<&'a GreenNode<'a, L>>;
123}
124
125/// A helper for parsing binary (infix) expressions.
126#[inline(always)]
127pub fn binary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, op_precedence: u8, assoc: Associativity, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
128where
129    L: Language,
130    S: Source + ?Sized,
131    F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
132    L::ElementType: From<L::TokenType>,
133{
134    let cp = state.checkpoint_before(left);
135    state.expect(op_kind).ok();
136
137    let next_prec = match assoc {
138        Associativity::Left => op_precedence + 1,
139        Associativity::Right => op_precedence,
140        Associativity::None => op_precedence + 1,
141    };
142
143    let _right = parse_expr(state, next_prec);
144    state.finish_at(cp, result_kind)
145}
146
147/// A helper for parsing unary (prefix) expressions.
148#[inline(always)]
149pub fn unary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, op_kind: L::TokenType, op_precedence: u8, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
150where
151    L: Language,
152    S: Source + ?Sized,
153    F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
154    L::ElementType: From<L::TokenType>,
155{
156    let cp = state.checkpoint();
157    state.expect(op_kind).ok();
158    let _right = parse_expr(state, op_precedence);
159    state.finish_at(cp, result_kind)
160}
161
162/// A helper for parsing postfix expressions.
163#[inline(always)]
164pub fn postfix<'a, L, S>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, result_kind: L::ElementType) -> &'a GreenNode<'a, L>
165where
166    L: Language,
167    S: Source + ?Sized,
168    L::ElementType: From<L::TokenType>,
169{
170    let cp = state.checkpoint_before(left);
171    state.expect(op_kind).ok();
172    state.finish_at(cp, result_kind)
173}
174
175/// A Pratt parser implementation.
176///
177/// # Examples
178///
179/// ```rust
180/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole, ParserState, Pratt, PrattParser, binary, unary, SyntaxArena, LexOutput, SourceText};
181/// # use triomphe::Arc;
182/// #
183/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
184/// #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
185/// enum Token { Number, Plus, Minus, Star, Slash, Eof }
186/// impl TokenType for Token {
187///     const END_OF_STREAM: Self = Token::Eof;
188///     type Role = UniversalTokenRole;
189///     fn role(&self) -> Self::Role { UniversalTokenRole::None }
190/// }
191///
192/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
193/// #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
194/// enum Element { Expr, Number, Plus, Minus, Star, Slash }
195/// impl ElementType for Element {
196///     type Role = UniversalElementRole;
197///     fn role(&self) -> Self::Role { UniversalElementRole::None }
198/// }
199/// impl From<Token> for Element {
200///     fn from(t: Token) -> Self {
201///         match t {
202///             Token::Number => Element::Number,
203///             Token::Plus => Element::Plus,
204///             Token::Minus => Element::Minus,
205///             Token::Star => Element::Star,
206///             Token::Slash => Element::Slash,
207///             Token::Eof => unreachable!(),
208///         }
209///     }
210/// }
211///
212/// #[derive(Clone, Copy)]
213/// struct Lang;
214/// impl Language for Lang {
215///     const NAME: &'static str = "test";
216///     type TokenType = Token;
217///     type ElementType = Element;
218///     type TypedRoot = ();
219/// }
220///
221/// struct ExprParser;
222/// impl Pratt<Lang> for ExprParser {
223///     fn primary<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>) -> &'a oak_core::GreenNode<'a, Lang> {
224///         let cp = state.checkpoint();
225///         state.bump(); // bump number
226///         state.finish_at(cp, Element::Number)
227///     }
228///
229///     fn infix<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, left: &'a oak_core::GreenNode<'a, Lang>, min_prec: u8) -> Option<&'a oak_core::GreenNode<'a, Lang>> {
230///         let kind = state.peek_kind()?;
231///         let (prec, assoc) = match kind {
232///             Token::Plus | Token::Minus => (1, oak_core::Associativity::Left),
233///             Token::Star | Token::Slash => (2, oak_core::Associativity::Left),
234///             _ => return None,
235///         };
236///         if prec < min_prec { return None }
237///         Some(binary(state, left, kind, prec, assoc, Element::Expr, |s, p| self.parse_expr(s, p)))
238///     }
239/// }
240///
241/// impl ExprParser {
242///     fn parse_expr<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, min_prec: u8) -> &'a oak_core::GreenNode<'a, Lang> {
243///         PrattParser::new(ExprParser).parse_expr(state, min_prec)
244///     }
245/// }
246/// ```
247pub struct PrattParser<L: Language, T: Pratt<L>> {
248    spec: T,
249    _marker: core::marker::PhantomData<L>,
250}
251
252impl<L: Language, T: Pratt<L>> PrattParser<L, T> {
253    /// Creates a new Pratt parser with the given specification.
254    pub const fn new(spec: T) -> Self {
255        Self { spec, _marker: core::marker::PhantomData }
256    }
257
258    /// Parses an expression with the given minimum precedence.
259    pub fn parse_expr<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, min_precedence: u8) -> &'a GreenNode<'a, L> {
260        Self::parse(state, min_precedence, &self.spec)
261    }
262
263    /// Static version of parse_expr that takes a specification reference.
264    pub fn parse<'a, S: Source + ?Sized>(state: &mut ParserState<'a, L, S>, min_precedence: u8, spec: &T) -> &'a GreenNode<'a, L> {
265        let mut left = spec.prefix(state);
266        while let Some(node) = spec.infix(state, left, min_precedence) {
267            left = node
268        }
269        left
270    }
271}