sipha_parse/pratt.rs
1//! Pratt parser trait for operator precedence parsing.
2//!
3//! This module provides the `PrattParser` trait which enables parsers to handle
4//! operator precedence using the Pratt parsing algorithm.
5
6use crate::state::ParserState;
7use sipha_core::traits::{GrammarContext, NodeId, RuleId, TokenKind};
8use sipha_error::ParseError;
9use sipha_pratt::{Associativity, PrattRuleKind, Precedence};
10
11/// Trait implemented by parsers to support Pratt expression parsing.
12///
13/// The Pratt parsing algorithm is an efficient method for parsing expressions
14/// with operator precedence. This trait provides the necessary methods for
15/// implementing Pratt parsing in sipha.
16///
17/// # How It Works
18///
19/// Pratt parsing uses two functions:
20/// - **nud (null denotation)**: Parses prefix operators and atoms
21/// - **led (left denotation)**: Parses infix, postfix, and ternary operators
22///
23/// The algorithm handles precedence and associativity automatically based on
24/// the precedence values in the operator rules.
25///
26/// # Example
27///
28/// ```rust,ignore
29/// impl PrattParser<Token, Rule> for MyParser {
30/// fn get_rule(&self, kind: &Token) -> Option<&PrattRuleKind<Token, Rule>> {
31/// self.pratt_rules.get(kind)
32/// }
33/// }
34/// ```
35pub trait PrattParser<K: TokenKind, R: RuleId> {
36 /// Lookup an operator rule for the provided token kind.
37 ///
38 /// This method is called by the Pratt parsing algorithm to find the operator
39 /// rule associated with a token. The rule determines how the operator should
40 /// be parsed (prefix, infix, postfix, etc.) and its precedence.
41 ///
42 /// # Arguments
43 ///
44 /// * `kind` - The token kind to look up
45 ///
46 /// # Returns
47 ///
48 /// A reference to the `PrattRuleKind` if found, or `None` if the token
49 /// is not an operator.
50 fn get_rule(&self, kind: &K) -> Option<&PrattRuleKind<K, R>>;
51
52 /// Parse a primary expression (nud - null denotation) using the Pratt rules.
53 ///
54 /// This method handles prefix operators and atoms. It's called at the start
55 /// of parsing an expression or when a prefix operator is encountered.
56 ///
57 /// # Arguments
58 ///
59 /// * `state` - The parser state containing the token stream and arena
60 ///
61 /// # Returns
62 ///
63 /// A parsed node representing the primary expression, or a parse error.
64 ///
65 /// # Errors
66 ///
67 /// Returns an error if:
68 /// - End of input is reached unexpectedly
69 /// - No rule is found for the current token
70 /// - The token is a binary operator (which cannot be a primary)
71 fn parse_primary<'tokens, 'arena, N, Ctx>(
72 &mut self,
73 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
74 ) -> Result<N, ParseError<K, R>>
75 where
76 N: NodeId,
77 Ctx: GrammarContext,
78 {
79 let (token_kind, token_span) = state
80 .peek(0)
81 .map(|token| (token.kind, token.span.clone()))
82 .ok_or_else(|| ParseError::UnexpectedEof {
83 span: state.current_span(),
84 rule_context: state.current_rule(),
85 context_stack: state.build_context_stack(),
86 })?;
87 let rule = self
88 .get_rule(&token_kind)
89 .cloned()
90 .ok_or(ParseError::NoRule {
91 kind: token_kind,
92 span: token_span.clone(),
93 rule_context: state.current_rule(),
94 context_stack: state.build_context_stack(),
95 })?;
96
97 // Parse based on rule kind (nud - null denotation)
98 match rule {
99 PrattRuleKind::Binary(_) => Err(ParseError::UnexpectedToken {
100 kind: token_kind,
101 span: token_span,
102 rule_context: state.current_rule(),
103 context_stack: state.build_context_stack(),
104 }),
105 PrattRuleKind::Prefix(prefix_rule) => {
106 let mut builder = state.start_node(prefix_rule.rule_id);
107 let op_node = state.expect_node(token_kind)?;
108 builder.node(op_node);
109 let operand = self.parse_expr(state, prefix_rule.prec)?;
110 builder.node(operand);
111 Ok(builder.finish(state))
112 }
113 PrattRuleKind::Atom(atom_rule) => {
114 let mut builder = state.start_node(atom_rule.rule_id);
115 let atom_node = state.expect_node(token_kind)?;
116 builder.node(atom_node);
117 Ok(builder.finish(state))
118 }
119 PrattRuleKind::Postfix(_) => Err(ParseError::UnexpectedToken {
120 kind: token_kind,
121 span: token_span,
122 rule_context: state.current_rule(),
123 context_stack: state.build_context_stack(),
124 }),
125 PrattRuleKind::Ternary(_) => Err(ParseError::UnexpectedToken {
126 kind: token_kind,
127 span: token_span,
128 rule_context: state.current_rule(),
129 context_stack: state.build_context_stack(),
130 }),
131 }
132 }
133
134 /// Parse the remainder of an expression starting at `min_prec`.
135 ///
136 /// This method implements the main Pratt parsing loop. It parses a primary
137 /// expression, then continues parsing infix/postfix operators while their
138 /// precedence is greater than or equal to `min_prec`.
139 ///
140 /// # Arguments
141 ///
142 /// * `state` - The parser state containing the token stream and arena
143 /// * `min_prec` - The minimum precedence required to continue parsing
144 ///
145 /// # Returns
146 ///
147 /// A parsed node representing the complete expression, or a parse error.
148 ///
149 /// # Algorithm
150 ///
151 /// 1. Parse a primary expression (using `parse_primary`)
152 /// 2. While the next token is an operator with precedence >= `min_prec`:
153 /// - Calculate the next precedence based on associativity
154 /// - Parse the operator and its right-hand side
155 /// - Build the expression node
156 /// 3. Return the final expression
157 fn parse_expr<'tokens, 'arena, N, Ctx>(
158 &mut self,
159 state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
160 min_prec: Precedence,
161 ) -> Result<N, ParseError<K, R>>
162 where
163 N: NodeId,
164 Ctx: GrammarContext,
165 {
166 let mut left = self.parse_primary(state)?;
167
168 while let Some((token_kind, token_span)) =
169 state.peek(0).map(|token| (token.kind, token.span.clone()))
170 {
171 let rule = match self.get_rule(&token_kind).cloned() {
172 Some(rule) if rule.precedence() >= min_prec => rule,
173 _ => break,
174 };
175
176 let next_prec = match rule.associativity() {
177 Associativity::Left => rule.precedence() + Precedence(1),
178 Associativity::Right => rule.precedence(),
179 Associativity::None => {
180 return Err(ParseError::NonAssociative {
181 kind: token_kind,
182 span: token_span,
183 rule_context: state.current_rule(),
184 context_stack: state.build_context_stack(),
185 });
186 }
187 };
188
189 // Parse based on rule kind (led - left denotation)
190 left = match rule {
191 PrattRuleKind::Binary(binary_rule) => {
192 let mut builder = state.start_node(binary_rule.rule_id);
193 builder.node(left);
194 let op_node = state.expect_node(token_kind)?;
195 builder.node(op_node);
196 let right = self.parse_expr(state, next_prec)?;
197 builder.node(right);
198 Ok(builder.finish(state))
199 }
200 PrattRuleKind::Postfix(postfix_rule) => {
201 let mut builder = state.start_node(postfix_rule.rule_id);
202 builder.node(left);
203 let op_node = state.expect_node(token_kind)?;
204 builder.node(op_node);
205 Ok(builder.finish(state))
206 }
207 PrattRuleKind::Ternary(ternary_rule) => {
208 let mut builder = state.start_node(ternary_rule.rule_id);
209 builder.node(left);
210 let op_node = state.expect_node(token_kind)?;
211 builder.node(op_node);
212 let true_expr = self.parse_expr(state, ternary_rule.true_prec)?;
213 builder.node(true_expr);
214 let colon_node = state.expect_node(ternary_rule.colon_token)?;
215 builder.node(colon_node);
216 let false_expr = self.parse_expr(state, ternary_rule.false_prec)?;
217 builder.node(false_expr);
218 Ok(builder.finish(state))
219 }
220 PrattRuleKind::Prefix(_) | PrattRuleKind::Atom(_) => {
221 Err(ParseError::UnexpectedToken {
222 kind: token_kind,
223 span: token_span,
224 rule_context: state.current_rule(),
225 context_stack: state.build_context_stack(),
226 })
227 }
228 }?;
229 }
230
231 Ok(left)
232 }
233}
234