sipha_parse/state/
mod.rs

1//! Parser state and node builder utilities.
2
3mod builder;
4mod recovery;
5
6pub use builder::NodeBuilder;
7pub use recovery::RecoveryStrategy;
8
9use sipha_core::{
10    span::Span,
11    token::Token,
12    token::TokenTrivia,
13    traits::{GrammarContext, NodeId, RuleId, TokenKind},
14};
15use sipha_error::{ErrorContext, Expected, ParseError};
16use sipha_memo::{MemoEntry, MemoTable};
17use sipha_tree::{CstKind, CstNode, NodeArena, NodeChildren};
18
19/// Stateful helper passed around by grammar routines.
20pub struct ParserState<'tokens, 'arena, K, R, N, Ctx = ()>
21where
22    K: TokenKind,
23    R: RuleId,
24    N: NodeId,
25    Ctx: GrammarContext,
26{
27    tokens: &'tokens [Token<K>],
28    pos: usize,
29    pub(crate) arena: &'arena mut NodeArena<K, R, N>,
30    include_trivia: bool,
31    pub context: Ctx,
32    /// Stack of rule IDs currently being parsed, for error context.
33    rule_stack: Vec<R>,
34    /// Memoization table for packrat parsing.
35    pub(crate) memo: Option<&'arena mut MemoTable<K, R, N>>,
36    /// Precomputed index of non-trivia token positions (for optimization).
37    /// This is `None` if `include_trivia` is true or if not yet computed.
38    non_trivia_index: Option<Vec<usize>>,
39}
40
41impl<'tokens, 'arena, K, R, N, Ctx> ParserState<'tokens, 'arena, K, R, N, Ctx>
42where
43    K: TokenKind,
44    R: RuleId,
45    N: NodeId,
46    Ctx: GrammarContext,
47{
48    /// Construct a new parser state.
49    pub fn new(
50        tokens: &'tokens [Token<K>],
51        arena: &'arena mut NodeArena<K, R, N>,
52        include_trivia: bool,
53        context: Ctx,
54    ) -> Self {
55        Self::with_memo(tokens, arena, include_trivia, context, None)
56    }
57
58    /// Construct a new parser state with memoization support.
59    pub fn with_memo(
60        tokens: &'tokens [Token<K>],
61        arena: &'arena mut NodeArena<K, R, N>,
62        include_trivia: bool,
63        context: Ctx,
64        memo: Option<&'arena mut MemoTable<K, R, N>>,
65    ) -> Self {
66        let mut state = Self {
67            tokens,
68            pos: 0,
69            arena,
70            include_trivia,
71            context,
72            rule_stack: Vec::new(),
73            memo,
74            non_trivia_index: None,
75        };
76        if !include_trivia {
77            // Precompute non-trivia index for faster skipping
78            state.non_trivia_index = Some(state.build_non_trivia_index());
79            state.skip_trivia();
80        }
81        state
82    }
83
84    /// Skip trivia tokens at the current cursor.
85    ///
86    /// This method uses the precomputed non-trivia index when available
87    /// for better performance on large token streams.
88    pub fn skip_trivia(&mut self) {
89        if let Some(ref index) = self.non_trivia_index {
90            // Use binary search to find next non-trivia position
91            match index.binary_search(&self.pos) {
92                Ok(_) => {
93                    // Exact match - already at non-trivia position
94                    return;
95                }
96                Err(insert_pos) => {
97                    if insert_pos < index.len() {
98                        // Found next non-trivia position
99                        self.pos = index[insert_pos];
100                        return;
101                    }
102                    // Past end of index, set to end
103                    self.pos = self.tokens.len();
104                    return;
105                }
106            }
107        }
108        // Fallback to linear search (when include_trivia is true or index not built)
109        while self.pos < self.tokens.len() && self.tokens[self.pos].kind.is_trivia() {
110            self.pos += 1;
111        }
112    }
113
114    /// Build an index of non-trivia token positions.
115    ///
116    /// This is used to optimize trivia skipping for large token streams.
117    fn build_non_trivia_index(&self) -> Vec<usize> {
118        self.tokens
119            .iter()
120            .enumerate()
121            .filter(|(_, token)| !token.kind.is_trivia())
122            .map(|(idx, _)| idx)
123            .collect()
124    }
125
126    /// Return a reference to a token `offset` positions away.
127    pub fn peek(&self, offset: usize) -> Option<&Token<K>> {
128        self.tokens.get(self.pos + offset)
129    }
130
131    /// Advance the cursor by one token and return the new token.
132    pub fn advance(&mut self) -> Option<&Token<K>> {
133        if self.pos < self.tokens.len() {
134            self.pos += 1;
135        }
136        if !self.include_trivia {
137            self.skip_trivia();
138        }
139        self.peek(0)
140    }
141
142    /// Consume the current token.
143    pub fn consume(&mut self) -> Option<&'tokens Token<K>> {
144        let pos = self.pos;
145        if pos >= self.tokens.len() {
146            return None;
147        }
148        self.advance();
149        self.tokens.get(pos)
150    }
151
152    /// Whether the cursor points at the provided kind.
153    pub fn at(&self, kind: K) -> bool {
154        self.peek(0).map(|t| t.kind == kind).unwrap_or(false)
155    }
156
157    /// Expect a specific token kind.
158    pub fn expect(&mut self, kind: K) -> Result<&'tokens Token<K>, ParseError<K, R>> {
159        if self.at(kind) {
160            Ok(self.consume().expect("token should be present"))
161        } else {
162            Err(ParseError::Expected {
163                expected: Expected::Single(kind),
164                found: self.peek(0).map(|t| t.kind),
165                span: self.current_span(),
166                rule_context: self.current_rule(),
167                context_stack: self.build_context_stack(),
168            })
169        }
170    }
171
172    /// Expect a token and wrap it inside a CST node.
173    pub fn expect_node(&mut self, kind: K) -> Result<N, ParseError<K, R>> {
174        let token = self.expect(kind)?;
175        Ok(self.alloc_token_node(token))
176    }
177
178    /// Current source span or a default span if the stream is empty.
179    pub fn current_span(&self) -> Span {
180        self.peek(0)
181            .map(|token| token.span.clone())
182            .or_else(|| self.tokens.last().map(|token| token.span.clone()))
183            .unwrap_or_default()
184    }
185
186    /// Reset the cursor back to the provided position.
187    pub fn reset(&mut self, pos: usize) {
188        self.pos = pos.min(self.tokens.len());
189    }
190
191    /// Current cursor position.
192    pub fn position(&self) -> usize {
193        self.pos
194    }
195
196    /// Get the current token (if any).
197    ///
198    /// Returns `None` if at end of input.
199    pub fn current_token(&self) -> Option<&Token<K>> {
200        self.peek(0)
201    }
202
203    /// Get the number of remaining tokens.
204    ///
205    /// This counts all tokens (including trivia if `include_trivia` is true).
206    pub fn remaining_tokens(&self) -> usize {
207        self.tokens.len().saturating_sub(self.pos)
208    }
209
210    /// Check if we're at the end of input.
211    pub fn is_eof(&self) -> bool {
212        self.pos >= self.tokens.len()
213    }
214
215    /// Get the rule stack for debugging/inspection.
216    ///
217    /// Returns a slice of rule IDs currently being parsed.
218    pub fn rule_stack(&self) -> &[R] {
219        &self.rule_stack
220    }
221
222    /// Get the number of nodes in the arena.
223    pub fn arena_size(&self) -> usize {
224        self.arena.len()
225    }
226
227    /// Start building a new CST node for the provided rule.
228    pub fn start_node(&self, rule_id: R) -> NodeBuilder<R, N> {
229        NodeBuilder::new(rule_id, self.current_span())
230    }
231
232    /// Allocate a node inside the arena.
233    pub fn alloc_node(&mut self, kind: CstKind<K, R>, span: Span, children: NodeChildren<N>) -> N {
234        self.arena.alloc(CstNode::create(kind, span, children))
235    }
236
237    fn alloc_rule_node(&mut self, rule_id: R, span: Span, children: NodeChildren<N>) -> N {
238        self.alloc_node(CstKind::Rule(rule_id), span, children)
239    }
240
241    fn alloc_trivia_node(&mut self, trivia: &TokenTrivia<K>) -> N {
242        self.alloc_node(
243            CstKind::Trivia(trivia.kind),
244            trivia.span.clone(),
245            NodeChildren::new(),
246        )
247    }
248
249    fn alloc_token_node(&mut self, token: &Token<K>) -> N {
250        let mut children = NodeChildren::new();
251        for trivia in &token.leading_trivia {
252            children.push(self.alloc_trivia_node(trivia));
253        }
254
255        for trivia in &token.trailing_trivia {
256            children.push(self.alloc_trivia_node(trivia));
257        }
258
259        self.alloc_node(CstKind::Token(token.kind), token.span.clone(), children)
260    }
261
262    /// Push a rule ID onto the rule stack for error context tracking.
263    pub(crate) fn push_rule(&mut self, rule_id: R) {
264        self.rule_stack.push(rule_id);
265    }
266
267    /// Pop a rule ID from the rule stack.
268    pub(crate) fn pop_rule(&mut self) {
269        self.rule_stack.pop();
270    }
271
272    /// Get the current rule context (top of the stack).
273    pub fn current_rule(&self) -> Option<R> {
274        self.rule_stack.last().copied()
275    }
276
277    /// Build an error context stack from the current rule stack.
278    pub fn build_context_stack(&self) -> Vec<ErrorContext<R>> {
279        self.rule_stack
280            .iter()
281            .map(|&rule_id| ErrorContext {
282                rule_id,
283                span: self.current_span(),
284                message: format!("While parsing {:?}", rule_id),
285            })
286            .collect()
287    }
288
289    /// Check the memo table for a cached result at the current position.
290    pub(crate) fn check_memo(&self, rule_id: R) -> Option<&MemoEntry<K, R, N>> {
291        self.memo
292            .as_ref()
293            .and_then(|memo| memo.get(rule_id, self.pos))
294    }
295
296    /// Store a successful parse result in the memo table.
297    pub(crate) fn store_memo_success(
298        &mut self,
299        rule_id: R,
300        position: usize,
301        node: N,
302        consumed: usize,
303    ) {
304        if let Some(memo) = self.memo.as_mut() {
305            memo.store_success(rule_id, position, node, consumed);
306        }
307    }
308
309    /// Store a failed parse result in the memo table.
310    pub(crate) fn store_memo_failure(
311        &mut self,
312        rule_id: R,
313        position: usize,
314        error: ParseError<K, R>,
315        consumed: usize,
316    ) {
317        if let Some(memo) = self.memo.as_mut() {
318            memo.store_failure(rule_id, position, error, consumed);
319        }
320    }
321}