Skip to main content

oak_core/parser/
pratt.rs

1use crate::{Language, ParserState, source::Source, tree::GreenNode};
2
3/// Associativity of an operator.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Associativity {
6    /// Left-associative (e.g., a + b + c is (a + b) + c).
7    Left,
8    /// Right-associative (e.g., a = b = c is a = (b = c)).
9    Right,
10    /// Non-associative.
11    None,
12}
13
14/// Precedence and associativity of an operator.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub struct OperatorInfo {
17    /// Precedence of the operator.
18    pub precedence: u8,
19    /// Associativity of the operator.
20    pub associativity: Associativity,
21}
22
23impl OperatorInfo {
24    /// Creates a left-associative operator info.
25    pub fn left(precedence: u8) -> Self {
26        Self { precedence, associativity: Associativity::Left }
27    }
28
29    /// Creates a right-associative operator info.
30    pub fn right(precedence: u8) -> Self {
31        Self { precedence, associativity: Associativity::Right }
32    }
33
34    /// Creates a non-associative operator info.
35    pub fn none(precedence: u8) -> Self {
36        Self { precedence, associativity: Associativity::None }
37    }
38}
39
40/// A specification for a Pratt parser.
41///
42/// Users implement this trait to define the grammar rules for expressions.
43/// Using a trait allows the compiler to optimize operator lookups using match statements.
44///
45/// # Example
46///
47/// ```rust
48/// # use oak_core::{Language, ParserState, Pratt, binary, unary, OperatorInfo, Associativity};
49/// # use oak_core::tree::GreenNode;
50/// # use oak_core::source::Source;
51/// # struct MyLanguage;
52/// # impl Language for MyLanguage {
53/// #     type TokenType = u16;
54/// #     type ElementType = u16;
55/// # }
56/// # struct MyPratt;
57/// # impl Pratt<MyLanguage> for MyPratt {
58/// #     fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>) -> &'a GreenNode<'a, MyLanguage> {
59/// #         // Parse literals, identifiers, or parenthesized expressions
60/// #         state.checkpoint().finish(1)
61/// #     }
62/// #
63/// #     fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, left: &'a GreenNode<'a, MyLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, MyLanguage>> {
64/// #         let token = state.peek()?;
65/// #         let info = match token.kind {
66/// #             1 => Some(OperatorInfo::left(10)), // '+'
67/// #             2 => Some(OperatorInfo::left(20)), // '*'
68/// #             _ => None,
69/// #         }?;
70/// #
71/// #         if info.precedence < min_precedence { return None }
72/// #
73/// #         Some(binary(state, left, token.kind, info.precedence, info.associativity, 100, |s, p| self.parse(s, p)))
74/// #     }
75/// # }
76/// # impl MyPratt {
77/// #     fn parse<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MyLanguage, S>, min_precedence: u8) -> &'a GreenNode<'a, MyLanguage> {
78/// #         oak_core::parser::PrattParser::new(self).parse(state, min_precedence)
79/// #     }
80/// # }
81/// ```
82pub trait Pratt<L: Language> {
83    /// Parses a primary expression (e.g., literals, identifiers, group).
84    fn primary<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L>;
85
86    /// Handles prefix operators and primary expressions.
87    ///
88    /// Default implementation just calls `primary`.
89    fn prefix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>) -> &'a GreenNode<'a, L> {
90        self.primary(state)
91    }
92
93    /// Handles infix and postfix operators.
94    ///
95    /// Should return `Some(new_node)` if an operator was parsed, or `None` if no operator matches
96    /// or its precedence is lower than `min_precedence`.
97    fn infix<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, min_precedence: u8) -> Option<&'a GreenNode<'a, L>>;
98}
99
100/// A helper for parsing binary (infix) expressions.
101#[inline(always)]
102pub fn binary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, op_precedence: u8, assoc: Associativity, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
103where
104    L: Language,
105    S: Source + ?Sized,
106    F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
107    L::ElementType: From<L::TokenType>,
108{
109    let cp = state.checkpoint_before(left);
110    state.expect(op_kind).ok();
111
112    let next_prec = match assoc {
113        Associativity::Left => op_precedence + 1,
114        Associativity::Right => op_precedence,
115        Associativity::None => op_precedence + 1,
116    };
117
118    let _right = parse_expr(state, next_prec);
119    state.finish_at(cp, result_kind)
120}
121
122/// A helper for parsing unary (prefix) expressions.
123#[inline(always)]
124pub fn unary<'a, L, S, F>(state: &mut ParserState<'a, L, S>, op_kind: L::TokenType, op_precedence: u8, result_kind: L::ElementType, mut parse_expr: F) -> &'a GreenNode<'a, L>
125where
126    L: Language,
127    S: Source + ?Sized,
128    F: FnMut(&mut ParserState<'a, L, S>, u8) -> &'a GreenNode<'a, L>,
129    L::ElementType: From<L::TokenType>,
130{
131    let cp = state.checkpoint();
132    state.expect(op_kind).ok();
133    let _right = parse_expr(state, op_precedence);
134    state.finish_at(cp, result_kind)
135}
136
137/// A helper for parsing postfix expressions.
138#[inline(always)]
139pub fn postfix<'a, L, S>(state: &mut ParserState<'a, L, S>, left: &'a GreenNode<'a, L>, op_kind: L::TokenType, result_kind: L::ElementType) -> &'a GreenNode<'a, L>
140where
141    L: Language,
142    S: Source + ?Sized,
143    L::ElementType: From<L::TokenType>,
144{
145    let cp = state.checkpoint_before(left);
146    state.expect(op_kind).ok();
147    state.finish_at(cp, result_kind)
148}
149
150/// A Pratt parser implementation.
151///
152/// # Examples
153///
154/// ```rust
155/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole, ParserState, Pratt, PrattParser, binary, unary, SyntaxArena, LexOutput, SourceText};
156/// # use triomphe::Arc;
157/// #
158/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
159/// enum Token { Number, Plus, Minus, Star, Slash, Eof }
160/// impl TokenType for Token {
161///     const END_OF_STREAM: Self = Token::Eof;
162///     type Role = UniversalTokenRole;
163///     fn role(&self) -> Self::Role { UniversalTokenRole::None }
164/// }
165///
166/// #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
167/// enum Element { Expr, Number, Plus, Minus, Star, Slash }
168/// impl ElementType for Element {
169///     type Role = UniversalElementRole;
170///     fn role(&self) -> Self::Role { UniversalElementRole::None }
171/// }
172/// impl From<Token> for Element {
173///     fn from(t: Token) -> Self {
174///         match t {
175///             Token::Number => Element::Number,
176///             Token::Plus => Element::Plus,
177///             Token::Minus => Element::Minus,
178///             Token::Star => Element::Star,
179///             Token::Slash => Element::Slash,
180///             Token::Eof => unreachable!(),
181///         }
182///     }
183/// }
184///
185/// #[derive(Clone, Copy)]
186/// struct Lang;
187/// impl Language for Lang {
188///     const NAME: &'static str = "test";
189///     type TokenType = Token;
190///     type ElementType = Element;
191///     type TypedRoot = ();
192/// }
193///
194/// struct ExprParser;
195/// impl Pratt<Lang> for ExprParser {
196///     fn primary<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>) -> &'a oak_core::GreenNode<'a, Lang> {
197///         let cp = state.checkpoint();
198///         state.bump(); // bump number
199///         state.finish_at(cp, Element::Number)
200///     }
201///
202///     fn infix<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, left: &'a oak_core::GreenNode<'a, Lang>, min_prec: u8) -> Option<&'a oak_core::GreenNode<'a, Lang>> {
203///         let kind = state.peek_kind()?;
204///         let (prec, assoc) = match kind {
205///             Token::Plus | Token::Minus => (1, oak_core::Associativity::Left),
206///             Token::Star | Token::Slash => (2, oak_core::Associativity::Left),
207///             _ => return None,
208///         }
209///         if prec < min_prec { return None }
210///         Some(binary(state, left, kind, prec, assoc, Element::Expr, |s, p| self.parse_expr(s, p)))
211///     }
212/// }
213///
214/// impl ExprParser {
215///     fn parse_expr<'a, S: oak_core::Source + ?Sized>(&self, state: &mut ParserState<'a, Lang, S>, min_prec: u8) -> &'a oak_core::GreenNode<'a, Lang> {
216///         PrattParser::new(ExprParser).parse_expr(state, min_prec)
217///     }
218/// }
219/// ```
220pub struct PrattParser<L: Language, T: Pratt<L>> {
221    spec: T,
222    _marker: core::marker::PhantomData<L>,
223}
224
225impl<L: Language, T: Pratt<L>> PrattParser<L, T> {
226    /// Creates a new Pratt parser with the given specification.
227    pub const fn new(spec: T) -> Self {
228        Self { spec, _marker: core::marker::PhantomData }
229    }
230
231    /// Parses an expression with the given minimum precedence.
232    pub fn parse_expr<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, L, S>, min_precedence: u8) -> &'a GreenNode<'a, L> {
233        Self::parse(state, min_precedence, &self.spec)
234    }
235
236    /// Static version of parse_expr that takes a specification reference.
237    pub fn parse<'a, S: Source + ?Sized>(state: &mut ParserState<'a, L, S>, min_precedence: u8, spec: &T) -> &'a GreenNode<'a, L> {
238        let mut left = spec.prefix(state);
239        while let Some(node) = spec.infix(state, left, min_precedence) {
240            left = node
241        }
242        left
243    }
244}