arena_terms_parser/
parser.rs

1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!
8//! The parser constructs arena-allocated [`Term`] values (from the [`arena_terms`] crate)
9//! representing atoms, numbers, compound terms, lists, tuples, and other structures.
10//!
11//! # Components
12//! - [`TermLexer`]: Produces [`TermToken`]s for the parser.
13//! - [`OperDefs`]: Defines fixity, precedence, and associativity.
14//! - [`TermParser`]: Performs syntax analysis using generated SLR tables.
15//!
16//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
17//!
18//! [`TermParser`]: struct.TermParser
19//! [`TermLexer`]: crate::lexer::TermLexer
20//! [`TermToken`]: crate::lexer::TermToken
21//! [`OperDefs`]: crate::oper::OperDefs
22//! [`arena_terms`]: https://crates.io/crates/arena-terms
23//! [`aslr`]: https://crates.io/crates/parlex-gen
24use crate::lexer::{TermLexer, TermToken, Value};
25use crate::oper::{Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, OperDef, OperDefTab, OperDefs};
26use anyhow::{Context, Result, anyhow, bail};
27use arena_terms::{Arena, IntoTerm, Term, View, atom, func, list};
28use parlex::{Lexer, LexerCtx, LexerData, Token};
29use smartstring::alias::String;
30use std::iter::FusedIterator;
31use std::str::FromStr;
32use std::{fmt, mem};
33
34/// Includes the generated SLR parser tables and definitions.
35///
36/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
37/// during the build process. It defines the parsing automaton, rule metadata,
38/// and associated enum types used by the [`TermParser`].
39include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
40
41/// Constructs the default operator definitions used by the [`TermParser`].
42///
43/// This function populates an [`OperDefs`] table in the given [`Arena`],
44/// defining built-in operators such as `-` (prefix), `++` (infix), and `=` (infix),
45/// along with their precedence and associativity rules.
46///
47/// ```prolog
48/// [ op(-(x), prefix, 800, right, none, false),
49///   op(++(x, y), infix, 500, left, none, false),
50///   op(=(x, y), infix, 100, right, none, false),
51///   op(op(f,
52///         =(type, fun),
53///         =(prec, 0),
54///         =(assoc, none),
55///         =(rename_to, none),
56///         =(embed_type, false)),
57///      fun, 0, none, none, false)
58/// ]
59/// ```
60///
61/// The resulting definitions form the standard operator environment available
62/// to the parser when no user-defined operator table is provided.
63///
64/// # Parameters
65/// - `arena`: The [`Arena`] used for allocating operator term structures.
66///
67/// # Returns
68/// An initialized [`OperDefs`] instance containing the default operator set.
69///
70/// [`TermParser`]: crate::parser::TermParser
71/// [`OperDefs`]: crate::oper::OperDefs
72/// [`Arena`]: arena_terms::Arena
73/// [`aslr`]: https://crates.io/crates/parlex-gen
74pub fn parser_oper_defs(arena: &mut Arena) -> OperDefs {
75    let term = list![
76        func!(
77            "op";
78            func!("-"; atom!("x")),
79            atom!("prefix"),
80            800,
81            atom!("right"),
82            atom!("none"),
83            atom!("false"),
84        ),
85        func!(
86            "op";
87            func!("++"; atom!("x"), atom!("y")),
88            atom!("infix"),
89            500,
90            atom!("left"),
91            atom!("none"),
92            atom!("false"),
93        ),
94        func!(
95            "op";
96            func!("="; atom!("x"), atom!("y")),
97            atom!("infix"),
98            100,
99            atom!("right"),
100            atom!("none"),
101            atom!("false"),
102        ),
103        func!(
104            "op";
105            func!(
106                "op";
107                atom!("f"),
108                func!("="; atom!("type"), atom!("fun")),
109                func!("="; atom!("prec"), 0),
110                func!("="; atom!("assoc"), atom!("none")),
111                func!("="; atom!("rename_to"), atom!("none")),
112                func!("="; atom!("embed_type"), atom!("false")),
113            ),
114            atom!("fun"),
115            0,
116            atom!("none"),
117            atom!("none"),
118            atom!("false"),
119        ),
120        => arena
121    ];
122    OperDefs::try_from_ops(arena, term).unwrap()
123}
124
125/// Prolog-like term parser with operator precedence and associativity handling.
126///
127/// The [`TermParser`] drives the parsing of Prolog-style terms using the
128/// [`parlex`] SLR(1) runtime library. It builds upon the [`TermLexer`] for tokenization
129/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
130///
131/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
132/// allowing user-defined or default operators to control how expressions are
133/// grouped and nested according to their **fixity**, **precedence**, and
134/// **associativity**.
135///
136/// # Core Components
137/// - [`ParserCtx`] — manages parse states, actions, and reductions generated by
138///   the `aslr` tool.
139/// - [`TermLexer`] — provides token streams of [`TermToken`]s for the parser.
140/// - [`Arena`] — stores terms compactly for minimal heap allocation.
141///
142/// # Typical Workflow
143/// 1. Create a [`TermParser`] from a byte iterator (`try_new`).
144/// 2. (Optionally) extend or redefine operator sets using [`define_opers`].
145/// 3. Call [`try_next_term`] or [`try_collect_terms`] to parse one or more
146///    terms into arena-backed [`Term`]s.
147///
148/// [`Arena`]: arena_terms::Arena
149/// [`Term`]: arena_terms::Term
150/// [`OperDefs`]: crate::oper::OperDefs
151/// [`ParserCtx`]: parlex::ParserCtx
152/// [`TermLexer`]: crate::lexer::TermLexer
153/// [`TermToken`]: crate::lexer::TermToken
154pub struct TermParser<I>
155where
156    I: FusedIterator<Item = u8>,
157{
158    /// The parser context that manages parse state, tables, and token input.
159    ///
160    /// This field wraps the generated `ParserData` and integrates the lexer
161    /// with the runtime parser loop from [`parlex`]. It drives token reading,
162    /// state transitions, and reductions.
163    ctx: ParserCtx<TermLexer<I>, <Self as Parser<Arena>>::ParserData, Arena>,
164
165    /// Stack of intermediate [`Term`] values used for reduction of term sequences.
166    ///
167    /// [`Value::Index`] refers to an entry in this stack, enabling grammar
168    /// actions to compose and reduce sequences of terms into higher-level
169    /// structures during parsing.
170    terms: Vec<Term>,
171}
172
173/// Implementation of [`TermParser`] methods.
174///
175/// This `impl` defines the core construction and execution logic for the
176/// Prolog-like term parser. It provides utilities to initialize a new
177/// parser instance, collect or stream parsed [`Term`] values, dynamically
178/// extend operator definitions, and normalize parsed terms.
179///
180/// The parser integrates with the [`parlex`] runtime library and operates over
181/// tokens produced by the [`TermLexer`], yielding arena-allocated [`Term`]
182/// values suitable for further semantic processing.
183///
184/// # Type Parameters
185/// - `I`: The input source, which must implement [`FusedIterator`] yielding bytes.
186impl<I> TermParser<I>
187where
188    I: FusedIterator<Item = u8>,
189{
190    /// Creates a new [`TermParser`] for the given input stream.
191    ///
192    /// Initializes an internal [`TermLexer`] (with optional operator definitions)
193    /// and wraps it in a [`ParserCtx`].
194    ///
195    /// # Parameters
196    /// - `input`: A fused iterator over bytes to be parsed.
197    /// - `opers`: Optional [`OperDefs`] defining operator precedence and fixity.
198    ///
199    /// # Returns
200    /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
201    ///
202    /// # Errors
203    /// Returns an error if the lexer context cannot be initialized
204    /// or if the generated parser tables fail to load.
205    pub fn try_new(input: I, opers: Option<OperDefs>) -> Result<Self> {
206        let lexer = TermLexer::try_new(input, opers)?;
207        let ctx = ParserCtx::new(lexer);
208        Ok(Self {
209            ctx,
210            terms: Vec::new(),
211        })
212    }
213
214    /// Parses all terms from the input until end-of-stream.
215    ///
216    /// Repeatedly calls [`try_next_term`] until no more terms are available,
217    /// collecting them into a single [`Vec`] of [`Term`]s.
218    ///
219    /// # Returns
220    /// A vector of all successfully parsed [`Term`]s.
221    ///
222    /// # Errors
223    /// Returns an error if any term fails syntactic or semantic validation.
224    pub fn try_collect_terms(&mut self, arena: &mut Arena) -> Result<Vec<Term>> {
225        let mut ts = Vec::new();
226        while let Some(t) = self.try_next_term(arena)? {
227            ts.push(t);
228        }
229        Ok(ts)
230    }
231
232    /// Parses the next term from the input stream.
233    ///
234    /// Returns the next complete [`Term`], or `None` if end-of-input is reached.
235    /// This method performs incremental parsing suitable for stream-based term input.
236    ///
237    /// # Parameters
238    /// - `arena`: Arena for allocating internal term structures.
239    ///
240    /// # Errors
241    /// Returns an error on unexpected tokens, incomplete constructs,
242    /// or invalid operator combinations.
243    #[inline]
244    pub fn try_next_term(&mut self, arena: &mut Arena) -> Result<Option<Term>> {
245        while let Some(tok) = self.try_next(arena)? {
246            match tok.token_id {
247                TokenID::Term => match tok.value {
248                    Value::None => {}
249                    Value::Term(term) => return Ok(Some(term)),
250                    value => bail!("Unexpected token value {:?}", value),
251                },
252                token_id => bail!("Unexpected token id {:?}", token_id),
253            }
254        }
255        Ok(None)
256    }
257
258    /// Defines or extends operator definitions directly from a Prolog-like
259    /// `op/6` term list read from a separate input source.
260    ///
261    /// This allows dynamic addition of new operator fixities and precedence
262    /// rules during runtime parsing.
263    ///
264    /// # Parameters
265    /// - `arena`: Arena allocator used for constructing term structures.
266    /// - `defs_input`: Input byte iterator yielding the operator definition terms.
267    /// - `opers`: Optional initial operator table to extend.
268    ///   If `None`, the default operator definitions are used.
269    ///
270    /// # Errors
271    /// Returns an error if parsing the operator term list fails or produces
272    /// an invalid operator specification.
273    pub fn define_opers<J: FusedIterator<Item = u8>>(
274        &mut self,
275        arena: &mut Arena,
276        defs_input: J,
277        opers: Option<OperDefs>,
278    ) -> Result<()> {
279        let opers = match opers {
280            Some(opers) => opers,
281            None => parser_oper_defs(arena),
282        };
283
284        let defs_lexer = TermLexer::try_new(defs_input, Some(opers))?;
285        let defs_ctx = ParserCtx::new(defs_lexer);
286        let mut defs_parser = TermParser {
287            ctx: defs_ctx,
288            terms: Vec::new(),
289        };
290        while let Some(term) = defs_parser.try_next_term(arena)? {
291            log::trace!(
292                "Stats: {:?}, {:?}",
293                defs_parser.ctx().lexer.stats(),
294                defs_parser.stats()
295            );
296            defs_parser
297                .ctx_mut()
298                .lexer
299                .opers
300                .define_opers(arena, term)?;
301        }
302        let defs_opers = std::mem::take(&mut defs_parser.ctx_mut().lexer.opers);
303        self.ctx_mut().lexer.opers = defs_opers;
304
305        Ok(())
306    }
307
308    /// Normalizes a parsed term using its operator definition.
309    ///
310    /// This process transforms terms according to their declared fixity,
311    /// applying named default arguments and other attributes specified
312    /// in the corresponding operator definition.
313    ///
314    /// # Parameters
315    /// - `arena`: Arena used to store normalized term structures.
316    /// - `term`: The parsed term to normalize.
317    /// - `fixity`: Operator fixity (`fun`, `prefix`, `infix`, or `postfix`).
318    /// - `op_tab_index`: Optional index into the operator definition table, if the
319    ///   term corresponds to a defined operator.
320    ///
321    /// # Returns
322    /// A normalized [`Term`] allocated in the given arena, ready for evaluation or
323    /// further semantic analysis.
324    ///
325    /// # Errors
326    /// Returns an error if normalization fails due to invalid fixity, mismatched
327    /// arity, or inconsistent operator metadata.
328    fn normalize_term(
329        &self,
330        arena: &mut Arena,
331        term: Term,
332        fixity: Fixity,
333        op_tab_index: Option<usize>,
334    ) -> Result<Term> {
335        match self.ctx().lexer.opers.get(op_tab_index)[fixity] {
336            Some(ref op_def) => {
337                let (functor, vs) = match term.view(arena)? {
338                    View::Atom(_) => (term, &[] as &[Term]),
339                    View::Func(_, functor, args) => {
340                        if args.is_empty() {
341                            bail!("invalid Func");
342                        }
343                        (*functor, args)
344                    }
345                    _ => {
346                        return Ok(term);
347                    }
348                };
349                let name = functor.atom_name(arena)?;
350
351                let n_required_args = OperDef::required_arity(fixity);
352                if vs.len() < n_required_args {
353                    bail!(
354                        "missing {} required arguments in term {:?}",
355                        n_required_args - vs.len(),
356                        name
357                    );
358                }
359
360                let args = &op_def.args;
361                let mut xs: Vec<Option<Term>> = vec![None; args.len()];
362
363                for (i, value) in vs.iter().enumerate() {
364                    if i < n_required_args {
365                        xs[i] = Some(*value);
366                    } else {
367                        match value.view(arena)? {
368                            View::Func(ar, functor, vs)
369                                if vs.len() == 2 && functor.atom_name(ar)? == "=" =>
370                            {
371                                let arg_name = vs[0].atom_name(arena)?;
372
373                                if let Some(pos) = args.iter().position(|x| x.name == arg_name) {
374                                    if xs[pos].is_none() {
375                                        xs[pos] = Some(vs[1]);
376                                    } else {
377                                        bail!(
378                                            "cannot redefine argument {:?} at position {} in {:?}",
379                                            arg_name,
380                                            pos,
381                                            name
382                                        );
383                                    }
384                                } else {
385                                    bail!("invalid argument name {:?} in {:?}", arg_name, name);
386                                }
387                            }
388                            _ => {
389                                if xs[i].is_none() {
390                                    xs[i] = Some(*value);
391                                } else {
392                                    bail!(
393                                        "cannot redefine argument {:?} at position {} in {:?}",
394                                        args[i].name,
395                                        i,
396                                        name
397                                    );
398                                }
399                            }
400                        }
401                    }
402                }
403
404                let vs: Option<Vec<_>> = xs
405                    .into_iter()
406                    .enumerate()
407                    .map(|(i, x)| x.or(args[i].default))
408                    .collect();
409                let mut vs = match vs {
410                    Some(vs) => vs,
411                    None => bail!("missing arguments in {:?}", name),
412                };
413
414                let rename_to = match op_def.rename_to {
415                    Some(rename_to) => rename_to,
416                    None => functor,
417                };
418
419                if op_def.embed_fixity {
420                    vs.insert(0, arena.atom(String::from(fixity)));
421                }
422
423                if vs.is_empty() {
424                    Ok(rename_to)
425                } else {
426                    Ok(arena.funcv(std::iter::once(&rename_to).chain(vs.iter()))?)
427                }
428            }
429            None => match fixity {
430                Fixity::Fun => Ok(term),
431                _ => bail!("missing opdef for fixity {:?}", fixity),
432            },
433        }
434    }
435}
436
437/// Implements the [`Parser`] trait for [`TermParser`], integrating with the **parlex** runtime library.
438///
439/// This binding connects the generated SLR parser tables (`ParData`) with the concrete
440/// term parser. It exposes the parser context, statistics, and the key callbacks
441/// required during shift/reduce parsing: ambiguity resolution and reductions.
442///
443/// # Associated Types
444/// - `Lexer`      — The input lexer producing [`TermToken`]s (`TermLexer<I>`).
445/// - `ParserData` — The generated parser tables and rule enums (`ParData`).
446impl<I> Parser<Arena> for TermParser<I>
447where
448    I: FusedIterator<Item = u8>,
449{
450    type Lexer = TermLexer<I>;
451    type ParserData = ParData;
452
453    /// Returns a shared reference to the internal [`ParserCtx`].
454    fn ctx(&self) -> &ParserCtx<Self::Lexer, Self::ParserData, Arena> {
455        &self.ctx
456    }
457
458    /// Returns a mutable reference to the internal [`ParserCtx`].
459    fn ctx_mut(&mut self) -> &mut ParserCtx<Self::Lexer, Self::ParserData, Arena> {
460        &mut self.ctx
461    }
462
463    /// Returns cumulative parsing statistics (tokens, shifts, reductions, ambiguities).
464    fn stats(&self) -> ParserStats {
465        self.ctx().stats.clone()
466    }
467
468    /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
469    ///
470    /// Given an ambiguity identifier and the lookahead token `tok2`, this method
471    /// chooses the appropriate parser action (shift or reduce) according to the
472    /// operator precedence and associativity rules.
473    ///
474    /// # Parameters
475    /// - `_arena`: Arena used to allocate or inspect terms.
476    /// - `ambig`:  The generated ambiguity ID (`AmbigID`).
477    /// - `tok2`:   The lookahead token at the ambiguity point.
478    ///
479    /// # Returns
480    /// The selected parser [`Action`] to disambiguate the current state.
481    ///
482    /// # Errors
483    /// Returns an error if the ambiguity cannot be resolved consistently.
484    fn resolve_ambiguity(
485        &mut self,
486        _arena: &mut Arena,
487        ambig: AmbigID,
488        tok2: &TermToken,
489    ) -> Result<Action> {
490        let ambigs = ParData::lookup_ambig(ambig);
491
492        let shift_action = ambigs[0];
493        assert!(matches!(shift_action, Action::Shift(_)));
494
495        let reduce_action = ambigs[1];
496        assert!(matches!(reduce_action, Action::Reduce(_)));
497
498        let Action::Reduce(prod) = reduce_action else {
499            bail!("can't match reduce action")
500        };
501
502        log::trace!(
503            "Conflict between reducing {:?} and shifting {:?}",
504            prod,
505            tok2
506        );
507
508        let (fixity1, tok1) = match prod {
509            ProdID::Infix1 => {
510                // Expr -> Expr atomOper Expr
511                (Fixity::Infix, self.tokens_peek(1))
512            }
513            ProdID::Infix2 => {
514                // Expr -> Expr funcOper Seq ) Expr
515                (Fixity::Infix, self.tokens_peek(3))
516            }
517            ProdID::Prefix1 => {
518                // Expr -> atomOper Expr
519                (Fixity::Prefix, self.tokens_peek(1))
520            }
521            ProdID::Prefix2 => {
522                // Expr -> funcOper Seq ) Expr
523                (Fixity::Prefix, self.tokens_peek(3))
524            }
525            ProdID::Postfix1 => {
526                // Expr -> Expr atomOper
527                (Fixity::Postfix, self.tokens_peek(0))
528            }
529            ProdID::Postfix2 => {
530                // Expr -> Expr funcOper Seq )
531                (Fixity::Postfix, self.tokens_peek(2))
532            }
533            _ => bail!(
534                "unexpected conflict: reduction of {:?} with shifting token {:?}",
535                prod,
536                tok2
537            ),
538        };
539
540        let op_tab1 = self.ctx().lexer.opers.get(tok1.op_tab_index);
541        let op_tab2 = self.ctx().lexer.opers.get(tok2.op_tab_index);
542
543        assert!(op_tab1.is_oper());
544
545        if op_tab2.is_oper() {
546            let op_def1 = match op_tab1[fixity1] {
547                Some(ref op_def1) => op_def1,
548                None => return Ok(shift_action),
549            };
550
551            let prec1 = op_def1.prec;
552            let assoc1 = op_def1.assoc;
553
554            let min_prec2 = std::cmp::min(
555                op_tab2[Fixity::Infix]
556                    .as_ref()
557                    .map(|x| x.prec)
558                    .unwrap_or(MAX_OPER_PREC),
559                op_tab2[Fixity::Postfix]
560                    .as_ref()
561                    .map(|x| x.prec)
562                    .unwrap_or(MAX_OPER_PREC),
563            );
564            let max_prec2 = std::cmp::max(
565                op_tab2[Fixity::Infix]
566                    .as_ref()
567                    .map(|x| x.prec)
568                    .unwrap_or(MIN_OPER_PREC),
569                op_tab2[Fixity::Postfix]
570                    .as_ref()
571                    .map(|x| x.prec)
572                    .unwrap_or(MIN_OPER_PREC),
573            );
574
575            if prec1 > min_prec2 {
576                Ok(reduce_action)
577            } else if prec1 < max_prec2 {
578                Ok(shift_action)
579            } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
580                if assoc1 == Assoc::None {
581                    bail!(
582                        "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
583                        tok1
584                    );
585                }
586                if op_tab2[Fixity::Infix]
587                    .as_ref()
588                    .is_some_and(|x| x.assoc == Assoc::None)
589                    || op_tab2[Fixity::Postfix]
590                        .as_ref()
591                        .is_some_and(|x| x.assoc == Assoc::None)
592                {
593                    bail!(
594                        "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
595                        tok2
596                    );
597                }
598                if op_tab2[Fixity::Infix]
599                    .as_ref()
600                    .is_some_and(|x| x.assoc != assoc1)
601                    || op_tab2[Fixity::Postfix]
602                        .as_ref()
603                        .is_some_and(|x| x.assoc != assoc1)
604                {
605                    bail!(
606                        "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
607                        tok1,
608                        tok2
609                    );
610                } else {
611                    if assoc1 == Assoc::Left {
612                        Ok(reduce_action)
613                    } else {
614                        Ok(shift_action)
615                    }
616                }
617            } else {
618                bail!(
619                    "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
620                    tok1,
621                    tok2
622                );
623            }
624        } else {
625            Ok(shift_action)
626        }
627    }
628
629    /// Performs a grammar reduction for the given production rule.
630    ///
631    /// Applies the semantic action for `prod`, typically constructing or
632    /// normalizing an arena-backed [`Term`], and pushes the resulting token
633    /// onto the parser’s value stack.
634    ///
635    /// # Parameters
636    /// - `arena`: Arena used to allocate or inspect terms.
637    /// - `prod`:  The production being reduced (`ProdID`).
638    /// - `token`: The lookahead token (normally not used).
639    ///
640    /// # Errors
641    /// Returns an error if the reduction fails due to arity mismatches,
642    /// invalid operator metadata, or inconsistent stack state.
643    fn reduce(&mut self, arena: &mut Arena, prod: ProdID, token: &TermToken) -> Result<()> {
644        match prod {
645            ProdID::Start => {
646                // Accept - does not get reduced
647                unreachable!()
648            }
649
650            ProdID::Term1 => {
651                // Term -> Expr
652                let mut expr_tok = self.tokens_pop()?;
653                expr_tok.token_id = TokenID::Term;
654                self.tokens_push(expr_tok);
655            }
656
657            ProdID::Term2 => {
658                // Term -> Expr .
659                self.tokens_pop()?;
660                let mut expr_tok = self.tokens_pop()?;
661                expr_tok.token_id = TokenID::Term;
662                self.tokens_push(expr_tok);
663            }
664
665            ProdID::Term3 => {
666                // Term ->
667                self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
668            }
669
670            ProdID::Term4 => {
671                // Term -> .
672                self.tokens_pop()?;
673                self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
674            }
675
676            ProdID::Func => {
677                // Expr -> func Seq )
678                self.tokens_pop()?;
679                let index = usize::try_from(self.tokens_pop()?.value)?;
680                let func_tok = self.tokens_pop()?;
681                let line_no = func_tok.line_no;
682                let op_tab_index = func_tok.op_tab_index;
683                let functor = Term::try_from(func_tok.value)?;
684
685                let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
686                let term = arena.funcv(vs)?;
687                self.terms.truncate(index);
688
689                let term = self.normalize_term(arena, term, Fixity::Fun, op_tab_index)?;
690
691                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
692            }
693
694            ProdID::List => {
695                // Expr -> [ Seq ]
696                self.tokens_pop()?;
697                let seq_tok = self.tokens_pop()?;
698                let left_brack_tok = self.tokens_pop()?;
699                let index = usize::try_from(seq_tok.value)?;
700
701                let term = arena.list(&self.terms[index..]);
702                self.terms.truncate(index);
703
704                self.tokens_push(TermToken::new(
705                    TokenID::Expr,
706                    Value::Term(term),
707                    left_brack_tok.line_no,
708                ));
709            }
710
711            ProdID::Nil => {
712                // Expr -> [ ]
713                self.tokens_pop()?;
714                let left_brack_tok = self.tokens_pop()?;
715                self.tokens_push(TermToken::new(
716                    TokenID::Expr,
717                    Value::Term(Term::NIL),
718                    left_brack_tok.line_no,
719                ));
720            }
721
722            ProdID::List2 => {
723                // Expr -> [ Seq | Expr ]
724                self.tokens_pop()?;
725                let tail = Term::try_from(self.tokens_pop()?.value)?;
726                self.tokens_pop()?;
727                let index = usize::try_from(self.tokens_pop()?.value)?;
728                let left_brack_tok = self.tokens_pop()?;
729
730                let term = arena.listc(&self.terms[index..], tail);
731                self.terms.truncate(index);
732
733                self.tokens_push(TermToken::new(
734                    TokenID::Expr,
735                    Value::Term(term),
736                    left_brack_tok.line_no,
737                ));
738            }
739
740            ProdID::Tuple => {
741                // Expr -> ( Seq )
742                self.tokens_pop()?;
743                let seq_tok = self.tokens_pop()?;
744                let left_paren_tok = self.tokens_pop()?;
745
746                let index = usize::try_from(seq_tok.value)?;
747
748                // Arena terms parser does not currently support unary tuples.
749                // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
750                let vs = &self.terms[index..];
751                let term = if vs.len() == 1 {
752                    vs[0]
753                } else {
754                    arena.tuple(vs)
755                };
756                self.terms.truncate(index);
757
758                self.tokens_push(TermToken::new(
759                    TokenID::Expr,
760                    Value::Term(term),
761                    left_paren_tok.line_no,
762                ));
763            }
764
765            ProdID::Unit => {
766                // Expr -> ( )
767                self.tokens_pop()?;
768                let left_paren_tok = self.tokens_pop()?;
769                self.tokens_push(TermToken::new(
770                    TokenID::Expr,
771                    Value::Term(Term::UNIT),
772                    left_paren_tok.line_no,
773                ));
774            }
775
776            ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
777                // Expr -> xxx
778                let mut tok = self.tokens_pop()?;
779                tok.token_id = TokenID::Expr;
780                self.tokens_push(tok);
781            }
782
783            ProdID::Atom => {
784                // Expr -> atom
785                let atom_tok = self.tokens_pop()?;
786                let line_no = atom_tok.line_no;
787                let op_tab_index = atom_tok.op_tab_index;
788
789                let atom = Term::try_from(atom_tok.value)?;
790
791                let term = self.normalize_term(arena, atom, Fixity::Fun, op_tab_index)?;
792
793                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
794            }
795
796            ProdID::Infix1 => {
797                // Expr -> Expr atomOper Expr
798                let expr2_tok = self.tokens_pop()?;
799                let oper_tok = self.tokens_pop()?;
800                let expr1_tok = self.tokens_pop()?;
801                let line_no = expr1_tok.line_no;
802                let op_tab_index = oper_tok.op_tab_index;
803
804                let expr2 = Term::try_from(expr2_tok.value)?;
805                let oper = Term::try_from(oper_tok.value)?;
806                let expr1 = Term::try_from(expr1_tok.value)?;
807
808                let term = arena.funcv([oper, expr1, expr2])?;
809                let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
810
811                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
812            }
813
814            ProdID::Infix2 => {
815                // Expr -> Expr func Seq ) Expr
816                let expr2_tok = self.tokens_pop()?;
817                self.tokens_pop()?;
818                let index = usize::try_from(self.tokens_pop()?.value)?;
819                let oper_tok = self.tokens_pop()?;
820                let expr1_tok = self.tokens_pop()?;
821                let line_no = expr1_tok.line_no;
822                let op_tab_index = oper_tok.op_tab_index;
823
824                let expr2 = Term::try_from(expr2_tok.value)?;
825                let oper = Term::try_from(oper_tok.value)?;
826                let expr1 = Term::try_from(expr1_tok.value)?;
827
828                let xs = [oper, expr1, expr2];
829                let vs = xs.iter().chain(self.terms[index..].iter());
830                let term = arena.funcv(vs)?;
831                self.terms.truncate(index);
832
833                let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
834
835                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
836            }
837
838            ProdID::Prefix1 => {
839                // Expr -> atom Expr
840                let expr1_tok = self.tokens_pop()?;
841                let oper_tok = self.tokens_pop()?;
842                let line_no = oper_tok.line_no;
843                let op_tab_index = oper_tok.op_tab_index;
844
845                let expr1 = Term::try_from(expr1_tok.value)?;
846                let oper = Term::try_from(oper_tok.value)?;
847
848                let term = match oper.view(arena)? {
849                    // Arena terms parser currently gives special treatment to unary minus
850                    // on integer and real literals (it directly negates them).
851                    // TODO: Consider handling minus at the lexical level.
852                    View::Atom(s)
853                        if s == "-"
854                            && matches!(expr1.view(arena)?, View::Int(_) | View::Real(_)) =>
855                    {
856                        match expr1.view(arena)? {
857                            View::Int(i) => arena.int(-i),
858                            View::Real(r) => arena.real(-r),
859                            _ => unreachable!(),
860                        }
861                    }
862                    _ => {
863                        let term = arena.funcv([oper, expr1])?;
864                        self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?
865                    }
866                };
867
868                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
869            }
870
871            ProdID::Prefix2 => {
872                // Expr -> func Seq ) Expr
873                let expr1_tok = self.tokens_pop()?;
874                self.tokens_pop()?;
875                let index = usize::try_from(self.tokens_pop()?.value)?;
876                let oper_tok = self.tokens_pop()?;
877                let line_no = oper_tok.line_no;
878                let op_tab_index = oper_tok.op_tab_index;
879
880                let oper = Term::try_from(oper_tok.value)?;
881                let expr1 = Term::try_from(expr1_tok.value)?;
882
883                let xs = [oper, expr1];
884                let vs = xs.iter().chain(self.terms[index..].iter());
885                let term = arena.funcv(vs)?;
886                self.terms.truncate(index);
887
888                let term = self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?;
889
890                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
891            }
892
893            ProdID::Postfix1 => {
894                // Expr -> Expr atomOper
895                let oper_tok = self.tokens_pop()?;
896                let expr1_tok = self.tokens_pop()?;
897                let line_no = expr1_tok.line_no;
898                let op_tab_index = oper_tok.op_tab_index;
899
900                let oper = Term::try_from(oper_tok.value)?;
901                let expr1 = Term::try_from(expr1_tok.value)?;
902
903                let term = arena.funcv([oper, expr1])?;
904                let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
905
906                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
907            }
908
909            ProdID::Postfix2 => {
910                // Expr -> Expr func Seq )
911                self.tokens_pop()?;
912                let index = usize::try_from(self.tokens_pop()?.value)?;
913                let oper_tok = self.tokens_pop()?;
914                let expr1_tok = self.tokens_pop()?;
915                let line_no = expr1_tok.line_no;
916                let op_tab_index = oper_tok.op_tab_index;
917
918                let oper = Term::try_from(oper_tok.value)?;
919                let expr1 = Term::try_from(expr1_tok.value)?;
920
921                let xs = [oper, expr1];
922                let vs = xs.iter().chain(self.terms[index..].iter());
923                let term = arena.funcv(vs)?;
924                self.terms.truncate(index);
925
926                let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
927
928                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
929            }
930
931            ProdID::Seq1 => {
932                // Seq -> BareSeq
933                let mut bare_seq_tok = self.tokens_pop()?;
934                bare_seq_tok.token_id = TokenID::Seq;
935                self.tokens_push(bare_seq_tok);
936            }
937
938            ProdID::Seq2 => {
939                // Seq -> BareSeq ,
940                self.tokens_pop()?;
941                let mut bare_seq_tok = self.tokens_pop()?;
942                bare_seq_tok.token_id = TokenID::Seq;
943                self.tokens_push(bare_seq_tok);
944            }
945
946            ProdID::BareSeq1 => {
947                // BareSeq -> Expr
948                let expr_tok = self.tokens_pop()?;
949                let line_no = expr_tok.line_no;
950                let expr = Term::try_from(expr_tok.value)?;
951
952                let index = self.terms.len();
953                self.terms.push(expr);
954
955                self.tokens_push(TermToken::new(
956                    TokenID::BareSeq,
957                    Value::Index(index),
958                    line_no,
959                ));
960            }
961
962            ProdID::BareSeq2 => {
963                // BareSeq -> BareSeq , Expr
964                let expr_tok = self.tokens_pop()?;
965                let expr = Term::try_from(expr_tok.value)?;
966                self.tokens_pop()?;
967
968                self.terms.push(expr);
969            }
970        }
971        Ok(())
972    }
973}
974
975/// Unit tests for the [`TermParser`] implementation.
976#[cfg(test)]
977mod tests {
978    use super::*;
979
980    const SAMPLE_DEFS: &str = r#"[
981op(==(x,y),infix,350,none),
982op(!=(x,y),infix,350,none),
983op( <(x,y),infix,350,none),
984op( >(x,y),infix,350,none),
985op(<=(x,y),infix,350,none),
986op(>=(x,y),infix,350,none),
987op('+'(x,y),infix,380,left),
988op('-'(x,y),infix,380,left),
989op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
990op('*'(x,y),infix,400,left),
991op('/'(x,y),infix,400,left),
992op('+'(x),prefix,800,right),
993op(and(x,y),infix,300,left),
994op(or(x,y),infix,250,left),
995op(not(x),prefix,800,right),
996]"#;
997
998    fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Result<Vec<Term>> {
999        let mut parser = TermParser::try_new(s.bytes().fuse(), Some(parser_oper_defs(arena)))?;
1000        if let Some(defs) = defs {
1001            parser.define_opers(arena, defs.bytes().fuse(), None)?;
1002        }
1003        parser.try_collect_terms(arena)
1004    }
1005
1006    #[test]
1007    fn one_term() {
1008        let _ = env_logger::builder().is_test(true).try_init();
1009        let arena = &mut Arena::new();
1010        let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .").unwrap();
1011        dbg!(&ts);
1012        let s = format!("{}", ts[0].display(arena));
1013        dbg!(&s);
1014        assert_eq!(ts.len(), 1);
1015        assert_eq!(s, "'<='('*'(2, 2), 5)");
1016    }
1017
1018    #[test]
1019    #[should_panic]
1020    fn missing_ops() {
1021        let arena = &mut Arena::new();
1022        let _ts = parse(arena, None, "2 * 2 <= 5").unwrap();
1023    }
1024
1025    #[test]
1026    fn more_complicated_term() {
1027        let _ = env_logger::builder().is_test(true).try_init();
1028        let arena = &mut Arena::new();
1029        let x = "(
1030[(1, 2) | unit] ++ foo(baz(1e-9)),
1031date{2025-09-30T18:24:22.154Z},
1032\"aaa{
10331 + 2
1034}bbb{
10353 * 4
1036}ccc\",
1037{player = {pos = {x = 0, y = 0}, health = 100}},
1038)";
1039        let ts = parse(arena, Some(SAMPLE_DEFS), x).unwrap();
1040        let s = format!("{}", ts[0].display(arena));
1041        assert_eq!(ts.len(), 1);
1042        assert_eq!(
1043            s,
1044            "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = {pos = {x = 0, y = 0}, health = 100}\")"
1045        );
1046    }
1047}