arena_terms_parser/
parser.rs

1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!             
8//! The parser consumes tokens produced by [`TermLexer`] and uses a mutable
9//! [`Arena`] as shared context to construct arena-allocated [`Term`] values
10//! (from the [`arena_terms`] crate) representing atoms, numbers, compound terms,
11//! lists, tuples, and other structures.
12//!
13//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
14//!
15//! [`TermParser`]: struct.TermParser
16//! [`TermLexer`]: crate::lexer::TermLexer
17//! [`TermToken`]: crate::lexer::TermToken
18//! [`OperDefs`]: crate::oper::OperDefs
19//! [`arena_terms`]: https://crates.io/crates/arena-terms
20//! [`aslr`]: https://crates.io/crates/parlex-gen
21
22use crate::{TermLexer, TermToken, TokenID, Value};
23use arena_terms::{Arena, Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, Term, View};
24use parlex::{
25    LexerStats, ParlexError, Parser, ParserAction, ParserData, ParserDriver, ParserStats, Token,
26};
27use parser_data::{AmbigID, ParData, ProdID, StateID};
28use std::marker::PhantomData;
29use try_next::TryNextWithContext;
30
31/// Includes the generated SLR parser tables and definitions.
32///         
33/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
34/// during the build process. It defines the parsing automaton, rule metadata,
35/// and associated enum types used by the [`TermParser`].
36pub mod parser_data {
37    include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
38}
39
40/// A driver that defines semantic actions for the term parser.
41///
42/// The [`TermParserDriver`] type implements [`ParserDriver`] and acts as the
43/// bridge between the parser engine ([`Parser`]) and calculator-specific
44/// semantic logic.
45///
46/// It provides the behavior for grammar reductions and ambiguity resolution
47/// during parsing. Each reduction corresponds to a grammar production rule
48/// in [`ParData`], and is responsible for building a term.
49///
50/// # Type Parameters
51///
52/// - `I`: The input source (the lexer) that yields [`TermToken`]s. Must implement
53///   [`TryNextWithContext<Arena, Item = TermToken>`].
54///
55/// # Associated Types
56///
57/// - `ParserData = ParData`:
58///   Generated parser metadata containing grammar rules, production IDs,
59///   and ambiguity identifiers.
60/// - `Token = TermToken`:
61///   The token type produced by the lexer and consumed by this parser.
62/// - `Parser = Parser<I, Self, Arena>`:
63///   The parser engine parameterized by this driver and context.
64/// - `Error = TermParserError`:
65///   Unified error type propagated during parsing.
66/// - `Context = Arena`:
67///   Externally supplied context.
68///
69/// # Responsibilities
70///
71/// The parser driver performs calculator-specific actions:
72///
73/// - **`resolve_ambiguity`** — invoked when the grammar allows multiple valid
74///   interpretations of a token sequence. The driver chooses which parse path
75///   to follow by returning an appropriate [`ParserAction`].
76/// - **`reduce`** — executed when a grammar production completes. The driver
77///   can perform semantic actions such as arithmetic evaluation, updating the
78///   symbol table, or producing intermediate values.
79pub struct TermParserDriver<I> {
80    /// Marker to associate the driver with its input type `I`.
81    _marker: PhantomData<I>,
82
83    /// Stack of intermediate [`Term`] values used for reduction of term sequences.
84    ///
85    /// [`Value::Index`] refers to an entry in this stack, enabling grammar
86    /// actions to compose and reduce sequences of terms into higher-level
87    /// structures during parsing.
88    terms: Vec<Term>,
89}
90
91impl<I> ParserDriver for TermParserDriver<I>
92where
93    I: TryNextWithContext<Arena, LexerStats, Item = TermToken, Error: std::fmt::Display + 'static>,
94{
95    /// Parser metadata generated from the calculator grammar.
96    type ParserData = ParData;
97
98    /// Token type consumed by the parser.
99    type Token = TermToken;
100
101    /// Concrete parser engine type.
102    type Parser = Parser<I, Self, Self::Context>;
103
104    /// Context (symbol table or shared state).
105    type Context = Arena;
106
107    /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
108    ///
109    /// Given an ambiguity identifier and the lookahead token `tok2`, this method
110    /// chooses the appropriate parser action (shift or reduce) according to the
111    /// operator precedence and associativity rules.
112    ///
113    /// # Parameters
114    /// - `_arena`: Arena used to allocate or inspect terms.
115    /// - `ambig`:  The generated ambiguity ID (`AmbigID`).
116    /// - `tok2`:   The lookahead token at the ambiguity point.
117    ///
118    /// # Returns
119    /// The selected parser [`Action`] to disambiguate the current state.
120    ///
121    /// # Errors
122    /// Returns an error if the ambiguity cannot be resolved consistently.
123    ///
124    /// # Notes
125    /// This grammar contains only **Shift/Reduce** conflicts — cases where
126    /// the parser can either:
127    /// - **Reduce** using a completed production rule, or
128    /// - **Shift** the next incoming token (`tok2`).
129    ///
130    /// Other types of conflicts (such as **Reduce/Reduce**) are much more
131    /// difficult to handle programmatically and usually require modifying
132    /// the grammar itself to eliminate ambiguity.
133    fn resolve_ambiguity(
134        &mut self,
135        parser: &mut Self::Parser,
136        arena: &mut Self::Context,
137        ambig: <Self::ParserData as ParserData>::AmbigID,
138        tok2: &Self::Token,
139    ) -> Result<ParserAction<StateID, ProdID, AmbigID>, ParlexError> {
140        let ambigs = ParData::lookup_ambig(ambig);
141        let shift_action = ambigs[0];
142        let ParserAction::Shift(_) = shift_action else {
143            panic!("expected shift");
144        };
145        let reduce_action = ambigs[1];
146        let ParserAction::Reduce(prod_id) = reduce_action else {
147            panic!("expected reduce");
148        };
149
150        log::trace!(
151            "Conflict between reducing {:?} and shifting {:?}",
152            prod_id,
153            tok2
154        );
155
156        let (fixity1, tok1) = match prod_id {
157            ProdID::Infix1 => {
158                // Expr -> Expr atomOper Expr
159                (Fixity::Infix, parser.tokens_peek(1))
160            }
161            ProdID::Infix2 => {
162                // Expr -> Expr funcOper Seq ) Expr
163                (Fixity::Infix, parser.tokens_peek(3))
164            }
165            ProdID::Prefix1 => {
166                // Expr -> atomOper Expr
167                (Fixity::Prefix, parser.tokens_peek(1))
168            }
169            ProdID::Prefix2 => {
170                // Expr -> funcOper Seq ) Expr
171                (Fixity::Prefix, parser.tokens_peek(3))
172            }
173            ProdID::Postfix1 => {
174                // Expr -> Expr atomOper
175                (Fixity::Postfix, parser.tokens_peek(0))
176            }
177            ProdID::Postfix2 => {
178                // Expr -> Expr funcOper Seq )
179                (Fixity::Postfix, parser.tokens_peek(2))
180            }
181            _ => {
182                return Err(ParlexError {
183                    message: format!(
184                        "unexpected conflict: reduction of {:?} with shifting token {:?}",
185                        prod_id, tok2
186                    ),
187                    span: tok2.span(),
188                });
189            }
190        };
191
192        let op_tab1 = arena.get_oper(tok1.op_tab_index);
193        let op_tab2 = arena.get_oper(tok2.op_tab_index);
194
195        assert!(op_tab1.is_oper());
196
197        if op_tab2.is_oper() {
198            let op_def1 = match op_tab1[fixity1] {
199                Some(ref op_def1) => op_def1,
200                None => return Ok(shift_action),
201            };
202
203            let prec1 = op_def1.prec;
204            let assoc1 = op_def1.assoc;
205
206            let min_prec2 = std::cmp::min(
207                op_tab2[Fixity::Infix]
208                    .as_ref()
209                    .map(|x| x.prec)
210                    .unwrap_or(MAX_OPER_PREC),
211                op_tab2[Fixity::Postfix]
212                    .as_ref()
213                    .map(|x| x.prec)
214                    .unwrap_or(MAX_OPER_PREC),
215            );
216            let max_prec2 = std::cmp::max(
217                op_tab2[Fixity::Infix]
218                    .as_ref()
219                    .map(|x| x.prec)
220                    .unwrap_or(MIN_OPER_PREC),
221                op_tab2[Fixity::Postfix]
222                    .as_ref()
223                    .map(|x| x.prec)
224                    .unwrap_or(MIN_OPER_PREC),
225            );
226
227            if prec1 > min_prec2 {
228                Ok(reduce_action)
229            } else if prec1 < max_prec2 {
230                Ok(shift_action)
231            } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
232                if assoc1 == Assoc::None {
233                    return Err(ParlexError {
234                        message: format!(
235                            "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
236                            tok1
237                        ),
238                        span: tok2.span(),
239                    });
240                }
241                if op_tab2[Fixity::Infix]
242                    .as_ref()
243                    .is_some_and(|x| x.assoc == Assoc::None)
244                    || op_tab2[Fixity::Postfix]
245                        .as_ref()
246                        .is_some_and(|x| x.assoc == Assoc::None)
247                {
248                    return Err(ParlexError {
249                        message: format!(
250                            "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
251                            tok2
252                        ),
253                        span: tok2.span(),
254                    });
255                }
256                if op_tab2[Fixity::Infix]
257                    .as_ref()
258                    .is_some_and(|x| x.assoc != assoc1)
259                    || op_tab2[Fixity::Postfix]
260                        .as_ref()
261                        .is_some_and(|x| x.assoc != assoc1)
262                {
263                    return Err(ParlexError {
264                        message: format!(
265                            "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
266                            tok1, tok2
267                        ),
268                        span: tok2.span(),
269                    });
270                } else {
271                    if assoc1 == Assoc::Left {
272                        Ok(reduce_action)
273                    } else {
274                        Ok(shift_action)
275                    }
276                }
277            } else {
278                return Err(ParlexError {
279                    message: format!(
280                        "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
281                        tok1, tok2
282                    ),
283                    span: tok2.span(),
284                });
285            }
286        } else {
287            Ok(shift_action)
288        }
289    }
290
291    /// Performs a grammar reduction for the given production rule.
292    ///
293    /// Applies the semantic action for `prod_id`, typically constructing or
294    /// normalizing an arena-backed [`Term`], and pushes the resulting token
295    /// onto the parser’s value stack.
296    ///
297    /// # Parameters
298    /// - `arena`: Arena used to allocate or inspect terms.
299    /// - `prod_id`:  The production being reduced (`ProdID`).
300    /// - `token`: The lookahead token (normally not used).
301    ///
302    /// # Errors
303    /// Returns an error if the reduction fails due to arity mismatches,
304    /// invalid operator metadata, or inconsistent stack state.
305    fn reduce(
306        &mut self,
307        parser: &mut Self::Parser,
308        arena: &mut Self::Context,
309        prod_id: <Self::ParserData as ParserData>::ProdID,
310        token: &Self::Token,
311    ) -> Result<(), ParlexError> {
312        match prod_id {
313            ProdID::Start => {
314                // Accept - does not get reduced
315                unreachable!()
316            }
317
318            ProdID::Term1 => {
319                // Term -> Expr
320                let mut expr_tok = parser.tokens_pop();
321                expr_tok.token_id = TokenID::Term;
322                parser.tokens_push(expr_tok);
323            }
324
325            ProdID::Term2 => {
326                // Term -> Expr .
327                let dot = parser.tokens_pop();
328                let mut expr_tok = parser.tokens_pop();
329                expr_tok.token_id = TokenID::Term;
330                expr_tok.merge_span(&dot);
331                parser.tokens_push(expr_tok);
332            }
333
334            ProdID::Term3 => {
335                // Term ->
336                parser.tokens_push(TermToken::new(TokenID::Term, Value::None, token.span()));
337            }
338
339            ProdID::Term4 => {
340                // Term -> .
341                let dot = parser.tokens_pop();
342                parser.tokens_push(TermToken::new(TokenID::Term, Value::None, dot.span()));
343            }
344
345            ProdID::Func => {
346                // Expr -> func Seq )
347                let right_paren = parser.tokens_pop();
348                let index = usize::try_from(parser.tokens_pop().value)?;
349                let mut func_tok = parser.tokens_pop();
350                func_tok.merge_span(&right_paren);
351                let span = func_tok.span();
352                let op_tab_index = func_tok.op_tab_index;
353                let functor = Term::try_from(func_tok.value)?;
354
355                let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
356                let term = arena
357                    .funcv(vs)
358                    .map_err(|e| ParlexError::from_err(e, span))?;
359                self.terms.truncate(index);
360
361                let term = arena
362                    .normalize_term(term, Fixity::Fun, op_tab_index)
363                    .map_err(|e| ParlexError::from_err(e, span))?;
364
365                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
366            }
367
368            ProdID::List => {
369                // Expr -> [ Seq ]
370                let right_brack_tok = parser.tokens_pop();
371                let seq_tok = parser.tokens_pop();
372                let mut left_brack_tok = parser.tokens_pop();
373                left_brack_tok.merge_span(&right_brack_tok);
374                let index = usize::try_from(seq_tok.value)?;
375
376                let term = arena.list(&self.terms[index..]);
377                self.terms.truncate(index);
378
379                parser.tokens_push(TermToken::new(
380                    TokenID::Expr,
381                    Value::Term(term),
382                    left_brack_tok.span(),
383                ));
384            }
385
386            ProdID::Nil => {
387                // Expr -> [ ]
388                let right_brack_tok = parser.tokens_pop();
389                let mut left_brack_tok = parser.tokens_pop();
390                left_brack_tok.merge_span(&right_brack_tok);
391                parser.tokens_push(TermToken::new(
392                    TokenID::Expr,
393                    Value::Term(Term::NIL),
394                    left_brack_tok.span(),
395                ));
396            }
397
398            ProdID::List2 => {
399                // Expr -> [ Seq | Expr ]
400                let right_brack_tok = parser.tokens_pop();
401                let tail = Term::try_from(parser.tokens_pop().value)?;
402                parser.tokens_pop();
403                let index = usize::try_from(parser.tokens_pop().value)?;
404                let mut left_brack_tok = parser.tokens_pop();
405                left_brack_tok.merge_span(&right_brack_tok);
406
407                let term = arena.listc(&self.terms[index..], tail);
408                self.terms.truncate(index);
409
410                parser.tokens_push(TermToken::new(
411                    TokenID::Expr,
412                    Value::Term(term),
413                    left_brack_tok.span(),
414                ));
415            }
416
417            ProdID::Tuple => {
418                // Expr -> ( Seq )
419                let right_paren_tok = parser.tokens_pop();
420                let seq_tok = parser.tokens_pop();
421                let mut left_paren_tok = parser.tokens_pop();
422                left_paren_tok.merge_span(&right_paren_tok);
423
424                let index = usize::try_from(seq_tok.value)?;
425
426                // Arena terms parser does not currently support unary tuples.
427                // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
428                let vs = &self.terms[index..];
429                let term = if vs.len() == 1 {
430                    vs[0]
431                } else {
432                    arena.tuple(vs)
433                };
434                self.terms.truncate(index);
435
436                parser.tokens_push(TermToken::new(
437                    TokenID::Expr,
438                    Value::Term(term),
439                    left_paren_tok.span(),
440                ));
441            }
442
443            ProdID::Unit => {
444                // Expr -> ( )
445                let right_paren_tok = parser.tokens_pop();
446                let mut left_paren_tok = parser.tokens_pop();
447                left_paren_tok.merge_span(&right_paren_tok);
448
449                parser.tokens_push(TermToken::new(
450                    TokenID::Expr,
451                    Value::Term(Term::UNIT),
452                    left_paren_tok.span(),
453                ));
454            }
455
456            ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
457                // Expr -> xxx
458                let mut tok = parser.tokens_pop();
459                tok.token_id = TokenID::Expr;
460                parser.tokens_push(tok);
461            }
462
463            ProdID::Atom => {
464                // Expr -> atom
465                let atom_tok = parser.tokens_pop();
466                let span = atom_tok.span();
467                let op_tab_index = atom_tok.op_tab_index;
468
469                let atom = Term::try_from(atom_tok.value)?;
470
471                let term = arena
472                    .normalize_term(atom, Fixity::Fun, op_tab_index)
473                    .map_err(|e| ParlexError::from_err(e, span))?;
474
475                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
476            }
477
478            ProdID::Infix1 => {
479                // Expr -> Expr atomOper Expr
480                let expr2_tok = parser.tokens_pop();
481                let oper_tok = parser.tokens_pop();
482                let mut expr1_tok = parser.tokens_pop();
483                expr1_tok.merge_span(&expr2_tok);
484                let span = expr1_tok.span();
485                let op_tab_index = oper_tok.op_tab_index;
486
487                let expr2 = Term::try_from(expr2_tok.value)?;
488                let oper = Term::try_from(oper_tok.value)?;
489                let expr1 = Term::try_from(expr1_tok.value)?;
490
491                let term = arena
492                    .funcv([oper, expr1, expr2])
493                    .map_err(|e| ParlexError::from_err(e, span))?;
494                let term = arena
495                    .normalize_term(term, Fixity::Infix, op_tab_index)
496                    .map_err(|e| ParlexError::from_err(e, span))?;
497
498                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
499            }
500
501            ProdID::Infix2 => {
502                // Expr -> Expr func Seq ) Expr
503                let expr2_tok = parser.tokens_pop();
504                parser.tokens_pop();
505                let index = usize::try_from(parser.tokens_pop().value)?;
506                let oper_tok = parser.tokens_pop();
507                let mut expr1_tok = parser.tokens_pop();
508                expr1_tok.merge_span(&expr2_tok);
509
510                let span = expr1_tok.span();
511                let op_tab_index = oper_tok.op_tab_index;
512
513                let expr2 = Term::try_from(expr2_tok.value)?;
514                let oper = Term::try_from(oper_tok.value)?;
515                let expr1 = Term::try_from(expr1_tok.value)?;
516
517                let xs = [oper, expr1, expr2];
518                let vs = xs.iter().chain(self.terms[index..].iter());
519                let term = arena
520                    .funcv(vs)
521                    .map_err(|e| ParlexError::from_err(e, span))?;
522                self.terms.truncate(index);
523
524                let term = arena
525                    .normalize_term(term, Fixity::Infix, op_tab_index)
526                    .map_err(|e| ParlexError::from_err(e, span))?;
527
528                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
529            }
530
531            ProdID::Prefix1 => {
532                // Expr -> atom Expr
533                let expr1_tok = parser.tokens_pop();
534                let mut oper_tok = parser.tokens_pop();
535                oper_tok.merge_span(&expr1_tok);
536
537                let span = oper_tok.span();
538                let op_tab_index = oper_tok.op_tab_index;
539
540                let expr1 = Term::try_from(expr1_tok.value)?;
541                let oper = Term::try_from(oper_tok.value)?;
542
543                let term = match oper
544                    .view(arena)
545                    .map_err(|e| ParlexError::from_err(e, span))?
546                {
547                    // Arena terms parser currently gives special treatment to unary minus
548                    // on integer and real literals (it directly negates them).
549                    // TODO: Consider handling minus at the lexical level.
550                    View::Atom(s)
551                        if s == "-"
552                            && matches!(
553                                expr1
554                                    .view(arena)
555                                    .map_err(|e| ParlexError::from_err(e, span))?,
556                                View::Int(_) | View::Real(_)
557                            ) =>
558                    {
559                        match expr1
560                            .view(arena)
561                            .map_err(|e| ParlexError::from_err(e, span))?
562                        {
563                            View::Int(i) => arena.int(-i),
564                            View::Real(r) => arena.real(-r),
565                            _ => unreachable!(),
566                        }
567                    }
568                    _ => {
569                        let term = arena
570                            .funcv([oper, expr1])
571                            .map_err(|e| ParlexError::from_err(e, span))?;
572                        arena
573                            .normalize_term(term, Fixity::Prefix, op_tab_index)
574                            .map_err(|e| ParlexError::from_err(e, span))?
575                    }
576                };
577
578                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
579            }
580
581            ProdID::Prefix2 => {
582                // Expr -> func Seq ) Expr
583                let expr1_tok = parser.tokens_pop();
584                parser.tokens_pop();
585                let index = usize::try_from(parser.tokens_pop().value)?;
586                let mut oper_tok = parser.tokens_pop();
587                oper_tok.merge_span(&expr1_tok);
588
589                let span = oper_tok.span();
590                let op_tab_index = oper_tok.op_tab_index;
591
592                let oper = Term::try_from(oper_tok.value)?;
593                let expr1 = Term::try_from(expr1_tok.value)?;
594
595                let xs = [oper, expr1];
596                let vs = xs.iter().chain(self.terms[index..].iter());
597                let term = arena
598                    .funcv(vs)
599                    .map_err(|e| ParlexError::from_err(e, span))?;
600                self.terms.truncate(index);
601
602                let term = arena
603                    .normalize_term(term, Fixity::Prefix, op_tab_index)
604                    .map_err(|e| ParlexError::from_err(e, span))?;
605
606                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
607            }
608
609            ProdID::Postfix1 => {
610                // Expr -> Expr atomOper
611                let oper_tok = parser.tokens_pop();
612                let mut expr1_tok = parser.tokens_pop();
613                expr1_tok.merge_span(&oper_tok);
614
615                let span = expr1_tok.span();
616                let op_tab_index = oper_tok.op_tab_index;
617
618                let oper = Term::try_from(oper_tok.value)?;
619                let expr1 = Term::try_from(expr1_tok.value)?;
620
621                let term = arena
622                    .funcv([oper, expr1])
623                    .map_err(|e| ParlexError::from_err(e, span))?;
624                let term = arena
625                    .normalize_term(term, Fixity::Postfix, op_tab_index)
626                    .map_err(|e| ParlexError::from_err(e, span))?;
627
628                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
629            }
630
631            ProdID::Postfix2 => {
632                // Expr -> Expr func Seq )
633                let right_paren_tok = parser.tokens_pop();
634                let index = usize::try_from(parser.tokens_pop().value)?;
635                let oper_tok = parser.tokens_pop();
636                let mut expr1_tok = parser.tokens_pop();
637                expr1_tok.merge_span(&right_paren_tok);
638
639                let span = expr1_tok.span();
640                let op_tab_index = oper_tok.op_tab_index;
641
642                let oper = Term::try_from(oper_tok.value)?;
643                let expr1 = Term::try_from(expr1_tok.value)?;
644
645                let xs = [oper, expr1];
646                let vs = xs.iter().chain(self.terms[index..].iter());
647                let term = arena
648                    .funcv(vs)
649                    .map_err(|e| ParlexError::from_err(e, span))?;
650                self.terms.truncate(index);
651
652                let term = arena
653                    .normalize_term(term, Fixity::Postfix, op_tab_index)
654                    .map_err(|e| ParlexError::from_err(e, span))?;
655
656                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
657            }
658
659            ProdID::Seq1 => {
660                // Seq -> BareSeq
661                let mut bare_seq_tok = parser.tokens_pop();
662                bare_seq_tok.token_id = TokenID::Seq;
663                parser.tokens_push(bare_seq_tok);
664            }
665
666            ProdID::Seq2 => {
667                // Seq -> BareSeq ,
668                parser.tokens_pop();
669                let mut bare_seq_tok = parser.tokens_pop();
670
671                bare_seq_tok.token_id = TokenID::Seq;
672                parser.tokens_push(bare_seq_tok);
673            }
674
675            ProdID::BareSeq1 => {
676                // BareSeq -> Expr
677                let expr_tok = parser.tokens_pop();
678                let span = expr_tok.span();
679                let expr = Term::try_from(expr_tok.value)?;
680
681                let index = self.terms.len();
682                self.terms.push(expr);
683
684                parser.tokens_push(TermToken::new(TokenID::BareSeq, Value::Index(index), span));
685            }
686
687            ProdID::BareSeq2 => {
688                // BareSeq -> BareSeq , Expr
689                let expr_tok = parser.tokens_pop();
690                let expr = Term::try_from(expr_tok.value)?;
691                parser.tokens_pop();
692
693                self.terms.push(expr);
694            }
695        }
696        Ok(())
697    }
698}
699
700/// Prolog-like term parser with operator precedence and associativity handling.
701///
702/// The [`TermTokenParser`] drives the parsing of Prolog-style terms using the
703/// [`parlex`] SLR(1) core library. It builds upon the [`TermLexer`] for tokenization
704/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
705///
706/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
707/// allowing user-defined or default operators to control how expressions are
708/// grouped and nested according to their **fixity**, **precedence**, and
709/// **associativity**.
710///
711/// /// # Input / Output
712///
713/// - **Input**: any byte stream `I` implementing
714///   [`TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>`].
715/// - **Output**: completed parsing units as [`TermToken`] values.
716///
717/// # End Tokens and Multiple Sentences
718///
719/// The underlying lexer typically emits an explicit [`TokenID::End`] token at
720/// the end of a *parsing unit* (end of “sentence” or expression). The parser
721/// uses this to finalize and emit one result. If the input contains multiple
722/// independent sentences, you will receive multiple results — one per `End` —
723/// and `None` only after all input is consumed.
724///
725/// # Empty Statements
726///
727/// The terms grammar also accepts an *empty* term, which is returned
728/// as a token with [`Value::None`].
729/// This occurs, for example, when the last statement in the input is terminated
730/// by a semicolon (`.`) but followed by no further expression. In that case:
731///
732/// 1. The parser first emits the token for the preceding completed term.
733/// 2. It then emits an additional token representing the *empty* term
734///    (`Value::None`).
735/// 3. Finally, it returns `None`, indicating the end of the input stream.
736///
737/// This design allows the parser to fully reflect the structure of the input.
738///
739/// # Errors
740///
741/// All failures are surfaced through a composed
742/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
743/// - `I::Error` — errors from the input source,
744/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
745///   symbol-table issues).
746///
747/// # Example
748///
749/// ```rust
750/// # use arena_terms_parser::{TermToken, TermTokenParser, TokenID, Value};
751/// # use arena_terms::Arena;
752/// # use try_next::{IterInput, TryNextWithContext};
753/// let mut arena = Arena::try_with_default_opers().unwrap();
754/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
755/// let mut parser = TermTokenParser::try_new(input).unwrap();
756/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
757/// assert_eq!(vs.len(), 4);
758/// ```
759///
760/// [`Arena`]: arena_terms::Arena
761/// [`Term`]: arena_terms::Term
762/// [`OperDefs`]: crate::OperDefs
763/// [`TermLexer`]: crate::TermLexer
764/// [`TermToken`]: crate::TermToken
765pub struct TermTokenParser<I>
766where
767    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
768{
769    parser: Parser<TermLexer<I>, TermParserDriver<TermLexer<I>>, Arena>,
770}
771
772impl<I> TermTokenParser<I>
773where
774    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
775{
776    /// Creates a new [`TermTokenParser`] for the given input stream and operator definitions.
777    ///
778    /// # Parameters
779    /// - `input`: A fused iterator over bytes to be parsed.
780    /// - `arena`: A term arena, used to initialized default operator defs.
781    ///
782    /// # Returns
783    /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
784    ///
785    /// # Errors
786    /// Returns an error if the lexer context cannot be initialized
787    /// or if the generated parser tables fail to load.
788    pub fn try_new(input: I) -> Result<Self, ParlexError> {
789        let lexer = TermLexer::try_new(input)?;
790        let driver = TermParserDriver {
791            _marker: PhantomData,
792            terms: Vec::new(),
793        };
794        let parser = Parser::new(lexer, driver);
795        Ok(Self { parser })
796    }
797}
798
799/// Defines or extends operator definitions directly from a Prolog-like
800/// `op/6` term list read from an input source.
801///
802/// This allows dynamic addition of new operator fixities and precedence
803/// rules during parsing.
804///
805/// # Parameters
806/// - `arena`: Arena allocator used for constructing term structures.
807/// - `defs_input`: Input byte iterator yielding the operator definition terms.
808///
809/// # Errors
810/// Returns an error if parsing the operator term list fails or produces
811/// an invalid operator specification.
812pub fn define_opers<I>(arena: &mut Arena, defs_input: I) -> Result<(), ParlexError>
813where
814    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
815{
816    let mut defs_parser = TermParser::try_new(defs_input)?;
817    while let Some(term) = defs_parser.try_next_with_context(arena)? {
818        arena
819            .define_opers(term)
820            .map_err(|e| ParlexError::from_err(e, None))?;
821    }
822    Ok(())
823}
824
825impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermTokenParser<I>
826where
827    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
828{
829    /// Tokens produced by this lexer.
830    type Item = TermToken;
831
832    /// Unified error type.
833    type Error = ParlexError;
834
835    /// Advances the parser and returns the next token, or `None` at end of input.
836    ///
837    /// The provided `context` (an [`Arena`]) may be mutated by rule
838    /// actions (for example, to intern terms). This method is fallible;
839    /// both input and lexical errors are converted into [`Self::Error`].
840    ///
841    /// # End of Input
842    ///
843    /// When the lexer reaches the end of the input stream, it will typically
844    /// emit a final [`TokenID::End`] token before returning `None`.
845    ///
846    /// This explicit *End* token is expected by the **Parlex parser** to
847    /// signal successful termination of a complete parsing unit.
848    /// Consumers should treat this token as a logical *end-of-sentence* or
849    /// *end-of-expression* marker, depending on the grammar.
850    ///
851    /// If the input contains **multiple independent sentences or expressions**,
852    /// the lexer may emit multiple `End` tokens—one after each completed unit.
853    /// In such cases, the parser can restart or resume parsing after each `End`
854    /// to produce multiple parse results from a single input stream.
855    ///
856    /// Once all input has been consumed, the lexer returns `None`.
857    fn try_next_with_context(
858        &mut self,
859        context: &mut Arena,
860    ) -> Result<Option<TermToken>, ParlexError> {
861        self.parser.try_next_with_context(context)
862    }
863
864    fn stats(&self) -> (LexerStats, ParserStats) {
865        self.parser.stats()
866    }
867}
868
869/// Prolog-like term parser with operator precedence and associativity handling.
870///
871/// The [`TermParser`] drives the parsing of Prolog-style terms using the
872/// [`parlex`] SLR(1) core library. It builds upon the [`TermTokenParser`] for tokenization
873/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
874///
875/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
876/// allowing user-defined or default operators to control how expressions are
877/// grouped and nested according to their **fixity**, **precedence**, and
878/// **associativity**.
879///
880/// /// # Input / Output
881///
882/// - **Input**: any byte stream `I` implementing
883///   [`TryNextWithContext<Arena, Item = u8>`].
884/// - **Output**: completed parsing units as [`TermToken`] values.
885///
886/// # End Tokens and Multiple Sentences
887///
888/// The underlying parser  emits an explicit tokens at
889/// the end of a *parsing unit* (end of “sentence” or expression). The parser
890/// uses this to finalize and emit one result. If the input contains multiple
891/// independent sentences, you will receive multiple results — one per `End` —
892/// and `None` only after all input is consumed.
893///
894/// # Empty Statements
895///
896/// The terms grammar also accepts an *empty* term, which is returned
897/// as a token with [`Value::None`].
898/// This occurs, for example, when the last statement in the input is terminated
899/// by a semicolon (`.`) but followed by no further expression. In that case:
900///
901/// 1. The parser first emits the token for the preceding completed term.
902/// 2. It then emits an additional token representing the *empty* term
903///    (`Value::None`).
904/// 3. Finally, it returns `None`, indicating the end of the input stream.
905///
906/// This design allows the parser to fully reflect the structure of the input.
907///
908/// # Errors
909///
910/// All failures are surfaced through a composed
911/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
912/// - `I::Error` — errors from the input source,
913/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
914///   symbol-table issues).
915///
916/// # Example
917///
918/// ```rust
919/// # use arena_terms_parser::{TermToken, TermParser, TokenID, Value};
920/// # use arena_terms::Arena;
921/// # use try_next::{IterInput, TryNextWithContext};
922/// let mut arena = Arena::try_with_default_opers().unwrap();
923/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
924/// let mut parser = TermParser::try_new(input).unwrap();
925/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
926/// assert_eq!(vs.len(), 4);
927/// ```
928///
929/// [`Arena`]: arena_terms::Arena
930/// [`Term`]: arena_terms::Term
931/// [`OperDefs`]: crate::OperDefs
932/// [`TermLexer`]: crate::TermLexer
933/// [`TermToken`]: crate::TermToken
934pub struct TermParser<I>
935where
936    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
937{
938    pub(crate) parser: TermTokenParser<I>,
939}
940
941impl<I> TermParser<I>
942where
943    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
944{
945    /// Creates a new [`TermParser`] for the given input stream and operator definitions.
946    ///
947    /// # Parameters
948    /// - `input`: A fused iterator over bytes to be parsed.
949    /// - `arena`: A term arena, used to initialized default operator defs.
950    ///
951    /// # Returns
952    /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
953    ///
954    /// # Errors
955    /// Returns an error if the lexer context cannot be initialized
956    /// or if the generated parser tables fail to load.
957    pub fn try_new(input: I) -> Result<Self, ParlexError> {
958        let parser: TermTokenParser<I> = TermTokenParser::try_new(input)?;
959        Ok(Self { parser })
960    }
961}
962
963impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermParser<I>
964where
965    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
966{
967    /// Tokens produced by this lexer.
968    type Item = Term;
969
970    /// Unified error type.
971    type Error = ParlexError;
972
973    /// Advances the parser and returns the next token, or `None` at end of input.
974    ///
975    /// The provided `context` (an [`Arena`]) may be mutated by rule
976    /// actions (for example, to intern terms). This method is fallible;
977    /// both input and lexical errors are converted into [`Self::Error`].
978    ///
979    /// # End of Input
980    ///
981    /// When the lexer reaches the end of the input stream, it will typically
982    /// emit a final [`TokenID::End`] token before returning `None`.
983    ///
984    /// This explicit *End* token is expected by the **Parlex parser** to
985    /// signal successful termination of a complete parsing unit.
986    /// Consumers should treat this token as a logical *end-of-sentence* or
987    /// *end-of-expression* marker, depending on the grammar.
988    ///
989    /// If the input contains **multiple independent sentences or expressions**,
990    /// the lexer may emit multiple `End` tokens—one after each completed unit.
991    /// In such cases, the parser can restart or resume parsing after each `End`
992    /// to produce multiple parse results from a single input stream.
993    ///
994    /// Once all input has been consumed, the lexer returns `None`.
995    fn try_next_with_context(&mut self, context: &mut Arena) -> Result<Option<Term>, ParlexError> {
996        while let Some(TermToken { value, .. }) = self.parser.try_next_with_context(context)? {
997            match value {
998                Value::Term(term) => return Ok(Some(term)),
999                Value::None => continue,
1000                Value::Index(_) => {
1001                    return Err(ParlexError {
1002                        message: format!("index token not expected"),
1003                        span: None,
1004                    });
1005                }
1006            }
1007        }
1008        Ok(None)
1009    }
1010
1011    fn stats(&self) -> (LexerStats, ParserStats) {
1012        self.parser.stats()
1013    }
1014}
1015
1016/// Unit tests for the [`TermParser`] implementation.
1017#[cfg(test)]
1018mod tests {
1019    use super::*;
1020    use try_next::IterInput;
1021
1022    const SAMPLE_DEFS: &str = r#"[
1023op(==(x,y),infix,350,none),
1024op(!=(x,y),infix,350,none),
1025op( <(x,y),infix,350,none),
1026op( >(x,y),infix,350,none),
1027op(<=(x,y),infix,350,none),
1028op(>=(x,y),infix,350,none),
1029op('+'(x,y),infix,380,left),
1030op('-'(x,y),infix,380,left),
1031op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
1032op('*'(x,y),infix,400,left),
1033op('/'(x,y),infix,400,left),
1034op('+'(x),prefix,800,right),
1035op(and(x,y),infix,300,left),
1036op(or(x,y),infix,250,left),
1037op(not(x),prefix,800,right),
1038]"#;
1039
1040    fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Vec<Term> {
1041        let input = IterInput::from(s.bytes());
1042        let mut parser = TermParser::try_new(input).expect("cannot create parser");
1043        if let Some(defs) = defs {
1044            let defs_input = IterInput::from(defs.bytes());
1045            define_opers(arena, defs_input).expect("cannot define ops");
1046        }
1047        let ts = parser
1048            .try_collect_with_context(arena)
1049            .expect("parser error");
1050        dbg!(parser.stats());
1051        ts
1052    }
1053
1054    #[test]
1055    fn one_term() {
1056        let _ = env_logger::builder().is_test(true).try_init();
1057        let arena = &mut Arena::try_with_default_opers().unwrap();
1058        let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .");
1059        dbg!(&ts);
1060        let s = format!("{}", ts[0].display(arena));
1061        dbg!(&s);
1062        assert_eq!(ts.len(), 1);
1063        assert_eq!(s, "'<='('*'(2, 2), 5)");
1064    }
1065
1066    #[test]
1067    #[should_panic]
1068    fn missing_ops() {
1069        let arena = &mut Arena::try_with_default_opers().unwrap();
1070        let _ts = parse(arena, None, "2 * 2 <= 5");
1071    }
1072
1073    #[test]
1074    fn more_complicated_term() {
1075        let _ = env_logger::builder().is_test(true).try_init();
1076        let arena = &mut Arena::try_with_default_opers().unwrap();
1077        let x = "(
1078[(1, 2) | unit] ++ foo(baz(1e-9)),
1079date{2025-09-30T18:24:22.154Z},
1080\"aaa{
10811 + 2
1082}bbb{
10833 * 4
1084}ccc\",
1085{player = {pos = {x = 0, y = 0}, health = 100}},
1086)";
1087        let ts = parse(arena, Some(SAMPLE_DEFS), x);
1088        let s = format!("{}", ts[0].display(arena));
1089        assert_eq!(ts.len(), 1);
1090        assert_eq!(
1091            s,
1092            "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = {pos = {x = 0, y = 0}, health = 100}\")"
1093        );
1094    }
1095}
arena_terms_parser/parser.rs

arena_terms_parser/
parser.rs