sipha_parse/parser/
mod.rs

1//! High-level parser that evaluates grammar rules.
2//!
3//! The `Parser` is the main entry point for parsing token streams into concrete
4//! syntax trees (CST). It supports grammar-based parsing.
5
6mod grammar;
7mod recovery;
8
9use std::collections::{HashMap, HashSet};
10
11use crate::grammar::{GrammarRule, GrammarRuleParser};
12use crate::pratt::PrattParser;
13use crate::state::ParserState;
14use builder_pattern::Builder;
15use sipha_core::traits::{GrammarContext, NodeId, RuleId, TokenKind};
16use sipha_error::ParseError;
17use sipha_pratt::PrattRuleKind;
18
19use self::grammar::*;
20
21/// Main parser entry point.
22///
23/// The `Parser` is responsible for evaluating grammar rules and building
24/// concrete syntax trees (CST) from token streams. It supports both
25/// grammar-based parsing and Pratt parsing for operator precedence.
26///
27/// # Architecture
28///
29/// The parser uses a recursive descent approach with:
30/// - **Grammar rules**: Traditional parser combinators (seq, choice, many, etc.)
31/// - **Pratt rules**: Operator precedence parsing for expressions
32/// - **Error recovery**: Configurable recovery strategies using sync tokens
33/// - **Memoization**: Optional memoization for packrat parsing
34///
35/// # Performance Characteristics
36///
37/// - **Time Complexity**: O(n) for most grammars, where n is the number of tokens
38/// - **Space Complexity**: O(n) for the CST arena
39/// - **Memoization**: Optional, can be enabled for recursive grammars
40///
41/// # Example
42///
43/// ```rust
44/// use sipha_parse::{Parser, helpers, ParserState};
45/// use sipha_tree::{NodeArena, RawNodeId};
46///
47/// # #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
48/// # enum Token { Ident, Plus }
49/// # impl sipha_core::traits::TokenKind for Token {
50/// #     fn is_trivia(&self) -> bool { false }
51/// # }
52/// # #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
53/// # enum Rule { Expr }
54/// let mut parser = Parser::create();
55/// parser.register_rule(
56///     Rule::Expr,
57///     helpers::seq(vec![
58///         helpers::token(Token::Ident),
59///         helpers::token(Token::Plus),
60///         helpers::token(Token::Ident),
61///     ]),
62/// );
63/// ```
64#[derive(Builder)]
65pub struct Parser<K: TokenKind, R: RuleId> {
66    /// Grammar rules registered with the parser.
67    #[default(HashMap::new())]
68    grammar_rules: HashMap<R, GrammarRule<K, R>>,
69    /// Pratt operator rules registered with the parser.
70    #[default(HashMap::new())]
71    pratt_rules: HashMap<K, PrattRuleKind<K, R>>,
72    /// Synchronization tokens for error recovery.
73    #[default(HashSet::new())]
74    pub(crate) sync_tokens: HashSet<K>,
75    /// Maximum depth for error recovery attempts.
76    #[default(10)]
77    pub(crate) max_recovery_depth: usize,
78}
79
80impl<K: TokenKind, R: RuleId> Parser<K, R> {
81    /// Create a new parser with default settings.
82    ///
83    /// For builder pattern usage, use `Parser::new()` which returns a builder.
84    pub fn create() -> Self {
85        Self::default()
86    }
87
88    /// Set the maximum depth for error recovery attempts.
89    pub fn set_max_recovery_depth(&mut self, depth: usize) {
90        self.max_recovery_depth = depth;
91    }
92
93    /// Register a grammar rule.
94    ///
95    /// Validates the rule before registration (e.g., ensures range min <= max).
96    /// Invalid rules will cause a panic to prevent silent failures.
97    pub fn register_rule(&mut self, rule_id: R, rule: GrammarRule<K, R>) {
98        if let Err(err) = crate::grammar::validate_rule(&rule) {
99            panic!("Invalid grammar rule registered: {:?}", err);
100        }
101        self.grammar_rules.insert(rule_id, rule);
102    }
103
104    /// Configure synchronization tokens for error recovery.
105    pub fn set_sync_tokens(&mut self, tokens: Vec<K>) {
106        self.sync_tokens = tokens.into_iter().collect();
107    }
108
109    /// Register a Pratt operator rule.
110    ///
111    /// This allows the parser to handle operator precedence parsing for the given token kind.
112    pub fn register_pratt_rule(&mut self, token_kind: K, rule: PrattRuleKind<K, R>) {
113        self.pratt_rules.insert(token_kind, rule);
114    }
115
116    /// Parse a top-level rule by identifier.
117    #[must_use = "parse results should be checked for errors"]
118    pub fn parse_rule<'tokens, 'arena, N, Ctx>(
119        &mut self,
120        rule_id: R,
121        state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
122    ) -> Result<N, ParseError<K, R>>
123    where
124        N: NodeId,
125        Ctx: GrammarContext,
126    {
127        let start_pos = state.position();
128        let debug_enabled = std::env::var("SIPHA_DEBUG").is_ok();
129        if let Some(memo_entry) = state.check_memo(rule_id) {
130            if debug_enabled {
131                eprintln!(
132                    "[DEBUG] parse_rule: Found memo entry for {:?} at position {}",
133                    rule_id, start_pos
134                );
135            }
136            match memo_entry {
137                sipha_memo::MemoEntry::Success { node, consumed } => {
138                    let node = *node;
139                    let consumed = *consumed;
140                    state.reset(start_pos + consumed);
141                    return Ok(node);
142                }
143                sipha_memo::MemoEntry::Failure { error, consumed } => {
144                    let error = error.clone();
145                    let consumed = *consumed;
146                    if debug_enabled {
147                        eprintln!(
148                            "[DEBUG] parse_rule: Using cached failure for {:?}: {:?}",
149                            rule_id, error
150                        );
151                    }
152                    state.reset(start_pos + consumed);
153                    return Err(error);
154                }
155            }
156        }
157
158        state.push_rule(rule_id);
159        let result = {
160            let rule = self.grammar_rules.get(&rule_id).cloned().ok_or_else(|| {
161                ParseError::InvalidContext {
162                    rule: "unknown rule",
163                    span: state.current_span(),
164                    rule_context: state.current_rule(),
165                    context_stack: state.build_context_stack(),
166                }
167            })?;
168            self.parse_grammar_rule(rule_id, &rule, state)
169        };
170
171        let consumed = state.position() - start_pos;
172        match &result {
173            Ok(node) => state.store_memo_success(rule_id, start_pos, *node, consumed),
174            Err(error) => state.store_memo_failure(rule_id, start_pos, error.clone(), consumed),
175        }
176
177        state.pop_rule();
178        result
179    }
180}
181
182impl<K: TokenKind, R: RuleId> GrammarRuleParser<K, R> for Parser<K, R> {
183    fn parse_grammar_rule<'tokens, 'arena, N, Ctx>(
184        &mut self,
185        rule_id: R,
186        rule: &GrammarRule<K, R>,
187        state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
188    ) -> Result<N, ParseError<K, R>>
189    where
190        N: NodeId,
191        Ctx: GrammarContext,
192    {
193        match rule {
194            GrammarRule::Sequence(rules) => parse_sequence(self, rule_id, rules, state),
195            GrammarRule::Choice(rules) => parse_choice(self, rule_id, rules, state),
196            GrammarRule::Optional(inner) => parse_optional(self, rule_id, inner, state),
197            GrammarRule::ZeroOrMore(inner) => parse_zero_or_more(self, rule_id, inner, state),
198            GrammarRule::OneOrMore(inner) => parse_one_or_more(self, rule_id, inner, state),
199            GrammarRule::Range {
200                rule: inner,
201                min,
202                max,
203            } => parse_range(self, rule_id, inner, *min, *max, state),
204            GrammarRule::PrattExpr(min_prec) => parse_pratt_expr(self, rule_id, state, *min_prec),
205            GrammarRule::Token(kind) => state.expect_node(*kind),
206            GrammarRule::Rule(id) => self.parse_rule(*id, state),
207            GrammarRule::Conditional {
208                rule: inner,
209                condition,
210            } => {
211                if condition(state.position()) {
212                    self.parse_grammar_rule(rule_id, inner, state)
213                } else {
214                    Err(ParseError::InvalidContext {
215                        rule: "conditional",
216                        span: state.current_span(),
217                        rule_context: state.current_rule(),
218                        context_stack: state.build_context_stack(),
219                    })
220                }
221            }
222            GrammarRule::NegativeLookahead(rule) => {
223                parse_negative_lookahead(self, rule_id, rule, state)
224            }
225            GrammarRule::PositiveLookahead(rule) => {
226                parse_positive_lookahead(self, rule_id, rule, state)
227            }
228        }
229    }
230}
231
232impl<K: TokenKind, R: RuleId> PrattParser<K, R> for Parser<K, R> {
233    fn get_rule(&self, kind: &K) -> Option<&PrattRuleKind<K, R>> {
234        self.pratt_rules.get(kind)
235    }
236}
237
238impl<K: TokenKind, R: RuleId> Default for Parser<K, R> {
239    fn default() -> Self {
240        Parser::new().build()
241    }
242}