sipha_parse/
pratt.rs

1//! Pratt parser trait for operator precedence parsing.
2//!
3//! This module provides the `PrattParser` trait which enables parsers to handle
4//! operator precedence using the Pratt parsing algorithm.
5
6use crate::state::ParserState;
7use sipha_core::traits::{GrammarContext, NodeId, RuleId, TokenKind};
8use sipha_error::ParseError;
9use sipha_pratt::{Associativity, PrattRuleKind, Precedence};
10
11/// Trait implemented by parsers to support Pratt expression parsing.
12///
13/// The Pratt parsing algorithm is an efficient method for parsing expressions
14/// with operator precedence. This trait provides the necessary methods for
15/// implementing Pratt parsing in sipha.
16///
17/// # How It Works
18///
19/// Pratt parsing uses two functions:
20/// - **nud (null denotation)**: Parses prefix operators and atoms
21/// - **led (left denotation)**: Parses infix, postfix, and ternary operators
22///
23/// The algorithm handles precedence and associativity automatically based on
24/// the precedence values in the operator rules.
25///
26/// # Example
27///
28/// ```rust,ignore
29/// impl PrattParser<Token, Rule> for MyParser {
30///     fn get_rule(&self, kind: &Token) -> Option<&PrattRuleKind<Token, Rule>> {
31///         self.pratt_rules.get(kind)
32///     }
33/// }
34/// ```
35pub trait PrattParser<K: TokenKind, R: RuleId> {
36    /// Lookup an operator rule for the provided token kind.
37    ///
38    /// This method is called by the Pratt parsing algorithm to find the operator
39    /// rule associated with a token. The rule determines how the operator should
40    /// be parsed (prefix, infix, postfix, etc.) and its precedence.
41    ///
42    /// # Arguments
43    ///
44    /// * `kind` - The token kind to look up
45    ///
46    /// # Returns
47    ///
48    /// A reference to the `PrattRuleKind` if found, or `None` if the token
49    /// is not an operator.
50    fn get_rule(&self, kind: &K) -> Option<&PrattRuleKind<K, R>>;
51
52    /// Parse a primary expression (nud - null denotation) using the Pratt rules.
53    ///
54    /// This method handles prefix operators and atoms. It's called at the start
55    /// of parsing an expression or when a prefix operator is encountered.
56    ///
57    /// # Arguments
58    ///
59    /// * `state` - The parser state containing the token stream and arena
60    ///
61    /// # Returns
62    ///
63    /// A parsed node representing the primary expression, or a parse error.
64    ///
65    /// # Errors
66    ///
67    /// Returns an error if:
68    /// - End of input is reached unexpectedly
69    /// - No rule is found for the current token
70    /// - The token is a binary operator (which cannot be a primary)
71    fn parse_primary<'tokens, 'arena, N, Ctx>(
72        &mut self,
73        state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
74    ) -> Result<N, ParseError<K, R>>
75    where
76        N: NodeId,
77        Ctx: GrammarContext,
78    {
79        let (token_kind, token_span) = state
80            .peek(0)
81            .map(|token| (token.kind, token.span.clone()))
82            .ok_or_else(|| ParseError::UnexpectedEof {
83                span: state.current_span(),
84                rule_context: state.current_rule(),
85                context_stack: state.build_context_stack(),
86            })?;
87        let rule = self
88            .get_rule(&token_kind)
89            .cloned()
90            .ok_or(ParseError::NoRule {
91                kind: token_kind,
92                span: token_span.clone(),
93                rule_context: state.current_rule(),
94                context_stack: state.build_context_stack(),
95            })?;
96
97        // Parse based on rule kind (nud - null denotation)
98        match rule {
99            PrattRuleKind::Binary(_) => Err(ParseError::UnexpectedToken {
100                kind: token_kind,
101                span: token_span,
102                rule_context: state.current_rule(),
103                context_stack: state.build_context_stack(),
104            }),
105            PrattRuleKind::Prefix(prefix_rule) => {
106                let mut builder = state.start_node(prefix_rule.rule_id);
107                let op_node = state.expect_node(token_kind)?;
108                builder.node(op_node);
109                let operand = self.parse_expr(state, prefix_rule.prec)?;
110                builder.node(operand);
111                Ok(builder.finish(state))
112            }
113            PrattRuleKind::Atom(atom_rule) => {
114                let mut builder = state.start_node(atom_rule.rule_id);
115                let atom_node = state.expect_node(token_kind)?;
116                builder.node(atom_node);
117                Ok(builder.finish(state))
118            }
119            PrattRuleKind::Postfix(_) => Err(ParseError::UnexpectedToken {
120                kind: token_kind,
121                span: token_span,
122                rule_context: state.current_rule(),
123                context_stack: state.build_context_stack(),
124            }),
125            PrattRuleKind::Ternary(_) => Err(ParseError::UnexpectedToken {
126                kind: token_kind,
127                span: token_span,
128                rule_context: state.current_rule(),
129                context_stack: state.build_context_stack(),
130            }),
131        }
132    }
133
134    /// Parse the remainder of an expression starting at `min_prec`.
135    ///
136    /// This method implements the main Pratt parsing loop. It parses a primary
137    /// expression, then continues parsing infix/postfix operators while their
138    /// precedence is greater than or equal to `min_prec`.
139    ///
140    /// # Arguments
141    ///
142    /// * `state` - The parser state containing the token stream and arena
143    /// * `min_prec` - The minimum precedence required to continue parsing
144    ///
145    /// # Returns
146    ///
147    /// A parsed node representing the complete expression, or a parse error.
148    ///
149    /// # Algorithm
150    ///
151    /// 1. Parse a primary expression (using `parse_primary`)
152    /// 2. While the next token is an operator with precedence >= `min_prec`:
153    ///    - Calculate the next precedence based on associativity
154    ///    - Parse the operator and its right-hand side
155    ///    - Build the expression node
156    /// 3. Return the final expression
157    fn parse_expr<'tokens, 'arena, N, Ctx>(
158        &mut self,
159        state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
160        min_prec: Precedence,
161    ) -> Result<N, ParseError<K, R>>
162    where
163        N: NodeId,
164        Ctx: GrammarContext,
165    {
166        let mut left = self.parse_primary(state)?;
167
168        while let Some((token_kind, token_span)) =
169            state.peek(0).map(|token| (token.kind, token.span.clone()))
170        {
171            let rule = match self.get_rule(&token_kind).cloned() {
172                Some(rule) if rule.precedence() >= min_prec => rule,
173                _ => break,
174            };
175
176            let next_prec = match rule.associativity() {
177                Associativity::Left => rule.precedence() + Precedence(1),
178                Associativity::Right => rule.precedence(),
179                Associativity::None => {
180                    return Err(ParseError::NonAssociative {
181                        kind: token_kind,
182                        span: token_span,
183                        rule_context: state.current_rule(),
184                        context_stack: state.build_context_stack(),
185                    });
186                }
187            };
188
189            // Parse based on rule kind (led - left denotation)
190            left = match rule {
191                PrattRuleKind::Binary(binary_rule) => {
192                    let mut builder = state.start_node(binary_rule.rule_id);
193                    builder.node(left);
194                    let op_node = state.expect_node(token_kind)?;
195                    builder.node(op_node);
196                    let right = self.parse_expr(state, next_prec)?;
197                    builder.node(right);
198                    Ok(builder.finish(state))
199                }
200                PrattRuleKind::Postfix(postfix_rule) => {
201                    let mut builder = state.start_node(postfix_rule.rule_id);
202                    builder.node(left);
203                    let op_node = state.expect_node(token_kind)?;
204                    builder.node(op_node);
205                    Ok(builder.finish(state))
206                }
207                PrattRuleKind::Ternary(ternary_rule) => {
208                    let mut builder = state.start_node(ternary_rule.rule_id);
209                    builder.node(left);
210                    let op_node = state.expect_node(token_kind)?;
211                    builder.node(op_node);
212                    let true_expr = self.parse_expr(state, ternary_rule.true_prec)?;
213                    builder.node(true_expr);
214                    let colon_node = state.expect_node(ternary_rule.colon_token)?;
215                    builder.node(colon_node);
216                    let false_expr = self.parse_expr(state, ternary_rule.false_prec)?;
217                    builder.node(false_expr);
218                    Ok(builder.finish(state))
219                }
220                PrattRuleKind::Prefix(_) | PrattRuleKind::Atom(_) => {
221                    Err(ParseError::UnexpectedToken {
222                        kind: token_kind,
223                        span: token_span,
224                        rule_context: state.current_rule(),
225                        context_stack: state.build_context_stack(),
226                    })
227                }
228            }?;
229        }
230
231        Ok(left)
232    }
233}
234