arena_terms_parser/
parser.rs

1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!
8//! The parser constructs arena-allocated [`Term`] values (from the [`arena_terms`] crate)
9//! representing atoms, numbers, compound terms, lists, tuples, and other structures.
10//!
11//! # Components
12//! - [`TermLexer`]: Produces [`TermToken`]s for the parser.
13//! - [`OperDefs`]: Defines fixity, precedence, and associativity.
14//! - [`TermParser`]: Performs syntax analysis using generated SLR tables.
15//!
16//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
17//!
18//! [`TermParser`]: struct.TermParser
19//! [`TermLexer`]: crate::lexer::TermLexer
20//! [`TermToken`]: crate::lexer::TermToken
21//! [`OperDefs`]: crate::oper::OperDefs
22//! [`arena_terms`]: https://crates.io/crates/arena-terms
23//! [`aslr`]: https://crates.io/crates/parlex-gen
24use crate::lexer::{TermLexer, TermToken, Value};
25use crate::oper::{Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, OperDef, OperDefTab, OperDefs};
26use anyhow::{Context, Result, anyhow, bail};
27use arena_terms::{Arena, IntoTerm, Term, View, atom, func, list};
28use once_cell::sync::Lazy;
29use parlex::{Lexer, LexerCtx, LexerData, Token};
30use smartstring::alias::String;
31use std::iter::FusedIterator;
32use std::str::FromStr;
33use std::{fmt, mem};
34
35/// Includes the generated SLR parser tables and definitions.
36///
37/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
38/// during the build process. It defines the parsing automaton, rule metadata,
39/// and associated enum types used by the [`TermParser`].
40include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
41
42/// Constructs the default operator definitions used by the [`TermParser`].
43///
44/// This function populates an [`OperDefs`] table in the given [`Arena`],
45/// defining built-in operators such as `-` (prefix), `++` (infix), and `=` (infix),
46/// along with their precedence and associativity rules.
47///
48/// ```prolog
49/// [ op(-(x), prefix, 800, right, none, false),
50///   op(++(x, y), infix, 500, left, none, false),
51///   op(=(x, y), infix, 100, right, none, false),
52///   op(op(f,
53///         =(type, fun),
54///         =(prec, 0),
55///         =(assoc, none),
56///         =(rename_to, none),
57///         =(embed_type, false)),
58///      fun, 0, none, none, false)
59/// ]
60/// ```
61///
62/// The resulting definitions form the standard operator environment available
63/// to the parser when no user-defined operator table is provided.
64///
65/// # Parameters
66/// - `arena`: The [`Arena`] used for allocating operator term structures.
67///
68/// # Returns
69/// An initialized [`OperDefs`] instance containing the default operator set.
70///
71/// [`TermParser`]: crate::parser::TermParser
72/// [`OperDefs`]: crate::oper::OperDefs
73/// [`Arena`]: arena_terms::Arena
74/// [`aslr`]: https://crates.io/crates/parlex-gen
75pub fn parser_oper_defs(arena: &mut Arena) -> OperDefs {
76    let term = list![
77        func!(
78            "op";
79            func!("-"; atom!("x")),
80            atom!("prefix"),
81            800,
82            atom!("right"),
83            atom!("none"),
84            atom!("false"),
85        ),
86        func!(
87            "op";
88            func!("++"; atom!("x"), atom!("y")),
89            atom!("infix"),
90            500,
91            atom!("left"),
92            atom!("none"),
93            atom!("false"),
94        ),
95        func!(
96            "op";
97            func!("="; atom!("x"), atom!("y")),
98            atom!("infix"),
99            100,
100            atom!("right"),
101            atom!("none"),
102            atom!("false"),
103        ),
104        func!(
105            "op";
106            func!(
107                "op";
108                atom!("f"),
109                func!("="; atom!("type"), atom!("fun")),
110                func!("="; atom!("prec"), 0),
111                func!("="; atom!("assoc"), atom!("none")),
112                func!("="; atom!("rename_to"), atom!("none")),
113                func!("="; atom!("embed_type"), atom!("false")),
114            ),
115            atom!("fun"),
116            0,
117            atom!("none"),
118            atom!("none"),
119            atom!("false"),
120        ),
121        => arena
122    ];
123    OperDefs::try_from_ops(arena, term).unwrap()
124}
125
126/// Prolog-like term parser with operator precedence and associativity handling.
127///
128/// The [`TermParser`] drives the parsing of Prolog-style terms using the
129/// [`parlex`] SLR(1) runtime library. It builds upon the [`TermLexer`] for tokenization
130/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
131///
132/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
133/// allowing user-defined or default operators to control how expressions are
134/// grouped and nested according to their **fixity**, **precedence**, and
135/// **associativity**.
136///
137/// # Core Components
138/// - [`ParserCtx`] — manages parse states, actions, and reductions generated by
139///   the `aslr` tool.
140/// - [`TermLexer`] — provides token streams of [`TermToken`]s for the parser.
141/// - [`Arena`] — stores terms compactly for minimal heap allocation.
142///
143/// # Typical Workflow
144/// 1. Create a [`TermParser`] from a byte iterator (`try_new`).
145/// 2. (Optionally) extend or redefine operator sets using [`define_opers`].
146/// 3. Call [`try_next_term`] or [`try_collect_terms`] to parse one or more
147///    terms into arena-backed [`Term`]s.
148///
149/// [`Arena`]: arena_terms::Arena
150/// [`Term`]: arena_terms::Term
151/// [`OperDefs`]: crate::oper::OperDefs
152/// [`ParserCtx`]: parlex::ParserCtx
153/// [`TermLexer`]: crate::lexer::TermLexer
154/// [`TermToken`]: crate::lexer::TermToken
155pub struct TermParser<I>
156where
157    I: FusedIterator<Item = u8>,
158{
159    /// The parser context that manages parse state, tables, and token input.
160    ///
161    /// This field wraps the generated `ParserData` and integrates the lexer
162    /// with the runtime parser loop from [`parlex`]. It drives token reading,
163    /// state transitions, and reductions.
164    ctx: ParserCtx<TermLexer<I>, <Self as Parser<Arena>>::ParserData, Arena>,
165
166    /// Stack of intermediate [`Term`] values used for reduction of term sequences.
167    ///
168    /// [`Value::Index`] refers to an entry in this stack, enabling grammar
169    /// actions to compose and reduce sequences of terms into higher-level
170    /// structures during parsing.
171    terms: Vec<Term>,
172}
173
174/// Implementation of [`TermParser`] methods.
175///
176/// This `impl` defines the core construction and execution logic for the
177/// Prolog-like term parser. It provides utilities to initialize a new
178/// parser instance, collect or stream parsed [`Term`] values, dynamically
179/// extend operator definitions, and normalize parsed terms.
180///
181/// The parser integrates with the [`parlex`] runtime library and operates over
182/// tokens produced by the [`TermLexer`], yielding arena-allocated [`Term`]
183/// values suitable for further semantic processing.
184///
185/// # Type Parameters
186/// - `I`: The input source, which must implement [`FusedIterator`] yielding bytes.
187impl<I> TermParser<I>
188where
189    I: FusedIterator<Item = u8>,
190{
191    /// Creates a new [`TermParser`] for the given input stream.
192    ///
193    /// Initializes an internal [`TermLexer`] (with optional operator definitions)
194    /// and wraps it in a [`ParserCtx`].
195    ///
196    /// # Parameters
197    /// - `input`: A fused iterator over bytes to be parsed.
198    /// - `opers`: Optional [`OperDefs`] defining operator precedence and fixity.
199    ///
200    /// # Returns
201    /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
202    ///
203    /// # Errors
204    /// Returns an error if the lexer context cannot be initialized
205    /// or if the generated parser tables fail to load.
206    pub fn try_new(input: I, opers: Option<OperDefs>) -> Result<Self> {
207        let lexer = TermLexer::try_new(input, opers)?;
208        let ctx = ParserCtx::new(lexer);
209        Ok(Self {
210            ctx,
211            terms: Vec::new(),
212        })
213    }
214
215    /// Parses all terms from the input until end-of-stream.
216    ///
217    /// Repeatedly calls [`try_next_term`] until no more terms are available,
218    /// collecting them into a single [`Vec`] of [`Term`]s.
219    ///
220    /// # Returns
221    /// A vector of all successfully parsed [`Term`]s.
222    ///
223    /// # Errors
224    /// Returns an error if any term fails syntactic or semantic validation.
225    pub fn try_collect_terms(&mut self, arena: &mut Arena) -> Result<Vec<Term>> {
226        let mut ts = Vec::new();
227        while let Some(t) = self.try_next_term(arena)? {
228            ts.push(t);
229        }
230        Ok(ts)
231    }
232
233    /// Parses the next term from the input stream.
234    ///
235    /// Returns the next complete [`Term`], or `None` if end-of-input is reached.
236    /// This method performs incremental parsing suitable for stream-based term input.
237    ///
238    /// # Parameters
239    /// - `arena`: Arena for allocating internal term structures.
240    ///
241    /// # Errors
242    /// Returns an error on unexpected tokens, incomplete constructs,
243    /// or invalid operator combinations.
244    #[inline]
245    pub fn try_next_term(&mut self, arena: &mut Arena) -> Result<Option<Term>> {
246        while let Some(tok) = self.try_next(arena)? {
247            match tok.token_id {
248                TokenID::Term => match tok.value {
249                    Value::None => {}
250                    Value::Term(term) => return Ok(Some(term)),
251                    value => bail!("Unexpected token value {:?}", value),
252                },
253                token_id => bail!("Unexpected token id {:?}", token_id),
254            }
255        }
256        Ok(None)
257    }
258
259    /// Defines or extends operator definitions directly from a Prolog-like
260    /// `op/6` term list read from a separate input source.
261    ///
262    /// This allows dynamic addition of new operator fixities and precedence
263    /// rules during runtime parsing.
264    ///
265    /// # Parameters
266    /// - `arena`: Arena allocator used for constructing term structures.
267    /// - `defs_input`: Input byte iterator yielding the operator definition terms.
268    /// - `opers`: Optional initial operator table to extend.
269    ///   If `None`, the default operator definitions are used.
270    ///
271    /// # Errors
272    /// Returns an error if parsing the operator term list fails or produces
273    /// an invalid operator specification.
274    pub fn define_opers<J: FusedIterator<Item = u8>>(
275        &mut self,
276        arena: &mut Arena,
277        defs_input: J,
278        opers: Option<OperDefs>,
279    ) -> Result<()> {
280        let opers = match opers {
281            Some(opers) => opers,
282            None => parser_oper_defs(arena),
283        };
284
285        let defs_lexer = TermLexer::try_new(defs_input, Some(opers))?;
286        let defs_ctx = ParserCtx::new(defs_lexer);
287        let mut defs_parser = TermParser {
288            ctx: defs_ctx,
289            terms: Vec::new(),
290        };
291        while let Some(term) = defs_parser.try_next_term(arena)? {
292            log::trace!(
293                "Stats: {:?}, {:?}",
294                defs_parser.ctx().lexer.stats(),
295                defs_parser.stats()
296            );
297            defs_parser
298                .ctx_mut()
299                .lexer
300                .opers
301                .define_opers(arena, term)?;
302        }
303        let defs_opers = std::mem::take(&mut defs_parser.ctx_mut().lexer.opers);
304        self.ctx_mut().lexer.opers = defs_opers;
305
306        Ok(())
307    }
308
309    /// Normalizes a parsed term using its operator definition.
310    ///
311    /// This process transforms terms according to their declared fixity,
312    /// applying named default arguments and other attributes specified
313    /// in the corresponding operator definition.
314    ///
315    /// # Parameters
316    /// - `arena`: Arena used to store normalized term structures.
317    /// - `term`: The parsed term to normalize.
318    /// - `fixity`: Operator fixity (`fun`, `prefix`, `infix`, or `postfix`).
319    /// - `op_tab_index`: Optional index into the operator definition table, if the
320    ///   term corresponds to a defined operator.
321    ///
322    /// # Returns
323    /// A normalized [`Term`] allocated in the given arena, ready for evaluation or
324    /// further semantic analysis.
325    ///
326    /// # Errors
327    /// Returns an error if normalization fails due to invalid fixity, mismatched
328    /// arity, or inconsistent operator metadata.
329    fn normalize_term(
330        &self,
331        arena: &mut Arena,
332        term: Term,
333        fixity: Fixity,
334        op_tab_index: Option<usize>,
335    ) -> Result<Term> {
336        match self.ctx().lexer.opers.get(op_tab_index)[fixity] {
337            Some(ref op_def) => {
338                let (functor, vs) = match term.view(arena)? {
339                    View::Atom(_) => (term, &[] as &[Term]),
340                    View::Func(_, functor, args) => {
341                        if args.is_empty() {
342                            bail!("invalid Func");
343                        }
344                        (*functor, args)
345                    }
346                    _ => {
347                        return Ok(term);
348                    }
349                };
350                let name = functor.atom_name(arena)?;
351
352                let n_required_args = OperDef::required_arity(fixity);
353                if vs.len() < n_required_args {
354                    bail!(
355                        "missing {} required arguments in term {:?}",
356                        n_required_args - vs.len(),
357                        name
358                    );
359                }
360
361                let args = &op_def.args;
362                let mut xs: Vec<Option<Term>> = vec![None; args.len()];
363
364                for (i, value) in vs.iter().enumerate() {
365                    if i < n_required_args {
366                        xs[i] = Some(*value);
367                    } else {
368                        match value.view(arena)? {
369                            View::Func(ar, functor, vs)
370                                if vs.len() == 2 && functor.atom_name(ar)? == "=" =>
371                            {
372                                let arg_name = vs[0].atom_name(arena)?;
373
374                                if let Some(pos) = args.iter().position(|x| x.name == arg_name) {
375                                    if xs[pos].is_none() {
376                                        xs[pos] = Some(vs[1]);
377                                    } else {
378                                        bail!(
379                                            "cannot redefine argument {:?} at position {} in {:?}",
380                                            arg_name,
381                                            pos,
382                                            name
383                                        );
384                                    }
385                                } else {
386                                    bail!("invalid argument name {:?} in {:?}", arg_name, name);
387                                }
388                            }
389                            _ => {
390                                if xs[i].is_none() {
391                                    xs[i] = Some(*value);
392                                } else {
393                                    bail!(
394                                        "cannot redefine argument {:?} at position {} in {:?}",
395                                        args[i].name,
396                                        i,
397                                        name
398                                    );
399                                }
400                            }
401                        }
402                    }
403                }
404
405                let vs: Option<Vec<_>> = xs
406                    .into_iter()
407                    .enumerate()
408                    .map(|(i, x)| x.or(args[i].default))
409                    .collect();
410                let mut vs = match vs {
411                    Some(vs) => vs,
412                    None => bail!("missing arguments in {:?}", name),
413                };
414
415                let rename_to = match op_def.rename_to {
416                    Some(rename_to) => rename_to,
417                    None => functor,
418                };
419
420                if op_def.embed_fixity {
421                    vs.insert(0, arena.atom(String::from(fixity)));
422                }
423
424                if vs.is_empty() {
425                    Ok(rename_to)
426                } else {
427                    Ok(arena.funcv(std::iter::once(&rename_to).chain(vs.iter()))?)
428                }
429            }
430            None => match fixity {
431                Fixity::Fun => Ok(term),
432                _ => bail!("missing opdef for fixity {:?}", fixity),
433            },
434        }
435    }
436}
437
438/// Implements the [`Parser`] trait for [`TermParser`], integrating with the **parlex** runtime library.
439///
440/// This binding connects the generated SLR parser tables (`ParData`) with the concrete
441/// term parser. It exposes the parser context, statistics, and the key callbacks
442/// required during shift/reduce parsing: ambiguity resolution and reductions.
443///
444/// # Associated Types
445/// - `Lexer`      — The input lexer producing [`TermToken`]s (`TermLexer<I>`).
446/// - `ParserData` — The generated parser tables and rule enums (`ParData`).
447impl<I> Parser<Arena> for TermParser<I>
448where
449    I: FusedIterator<Item = u8>,
450{
451    type Lexer = TermLexer<I>;
452    type ParserData = ParData;
453
454    /// Returns a shared reference to the internal [`ParserCtx`].
455    fn ctx(&self) -> &ParserCtx<Self::Lexer, Self::ParserData, Arena> {
456        &self.ctx
457    }
458
459    /// Returns a mutable reference to the internal [`ParserCtx`].
460    fn ctx_mut(&mut self) -> &mut ParserCtx<Self::Lexer, Self::ParserData, Arena> {
461        &mut self.ctx
462    }
463
464    /// Returns cumulative parsing statistics (tokens, shifts, reductions, ambiguities).
465    fn stats(&self) -> ParserStats {
466        self.ctx().stats.clone()
467    }
468
469    /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
470    ///
471    /// Given an ambiguity identifier and the lookahead token `tok2`, this method
472    /// chooses the appropriate parser action (shift or reduce) according to the
473    /// operator precedence and associativity rules.
474    ///
475    /// # Parameters
476    /// - `_arena`: Arena used to allocate or inspect terms.
477    /// - `ambig`:  The generated ambiguity ID (`AmbigID`).
478    /// - `tok2`:   The lookahead token at the ambiguity point.
479    ///
480    /// # Returns
481    /// The selected parser [`Action`] to disambiguate the current state.
482    ///
483    /// # Errors
484    /// Returns an error if the ambiguity cannot be resolved consistently.
485    fn resolve_ambiguity(
486        &mut self,
487        _arena: &mut Arena,
488        ambig: AmbigID,
489        tok2: &TermToken,
490    ) -> Result<Action> {
491        let ambigs = ParData::lookup_ambig(ambig);
492
493        let shift_action = ambigs[0];
494        assert!(matches!(shift_action, Action::Shift(_)));
495
496        let reduce_action = ambigs[1];
497        assert!(matches!(reduce_action, Action::Reduce(_)));
498
499        let Action::Reduce(prod) = reduce_action else {
500            bail!("can't match reduce action")
501        };
502
503        log::trace!(
504            "Conflict between reducing {:?} and shifting {:?}",
505            prod,
506            tok2
507        );
508
509        let (fixity1, tok1) = match prod {
510            ProdID::Infix1 => {
511                // Expr -> Expr atomOper Expr
512                (Fixity::Infix, self.tokens_peek(1))
513            }
514            ProdID::Infix2 => {
515                // Expr -> Expr funcOper Seq ) Expr
516                (Fixity::Infix, self.tokens_peek(3))
517            }
518            ProdID::Prefix1 => {
519                // Expr -> atomOper Expr
520                (Fixity::Prefix, self.tokens_peek(1))
521            }
522            ProdID::Prefix2 => {
523                // Expr -> funcOper Seq ) Expr
524                (Fixity::Prefix, self.tokens_peek(3))
525            }
526            ProdID::Postfix1 => {
527                // Expr -> Expr atomOper
528                (Fixity::Postfix, self.tokens_peek(0))
529            }
530            ProdID::Postfix2 => {
531                // Expr -> Expr funcOper Seq )
532                (Fixity::Postfix, self.tokens_peek(2))
533            }
534            _ => bail!(
535                "unexpected conflict: reduction of {:?} with shifting token {:?}",
536                prod,
537                tok2
538            ),
539        };
540
541        let op_tab1 = self.ctx().lexer.opers.get(tok1.op_tab_index);
542        let op_tab2 = self.ctx().lexer.opers.get(tok2.op_tab_index);
543
544        assert!(op_tab1.is_oper());
545
546        if op_tab2.is_oper() {
547            let op_def1 = match op_tab1[fixity1] {
548                Some(ref op_def1) => op_def1,
549                None => return Ok(shift_action),
550            };
551
552            let prec1 = op_def1.prec;
553            let assoc1 = op_def1.assoc;
554
555            let min_prec2 = std::cmp::min(
556                op_tab2[Fixity::Infix]
557                    .as_ref()
558                    .map(|x| x.prec)
559                    .unwrap_or(MAX_OPER_PREC),
560                op_tab2[Fixity::Postfix]
561                    .as_ref()
562                    .map(|x| x.prec)
563                    .unwrap_or(MAX_OPER_PREC),
564            );
565            let max_prec2 = std::cmp::max(
566                op_tab2[Fixity::Infix]
567                    .as_ref()
568                    .map(|x| x.prec)
569                    .unwrap_or(MIN_OPER_PREC),
570                op_tab2[Fixity::Postfix]
571                    .as_ref()
572                    .map(|x| x.prec)
573                    .unwrap_or(MIN_OPER_PREC),
574            );
575
576            if prec1 > min_prec2 {
577                Ok(reduce_action)
578            } else if prec1 < max_prec2 {
579                Ok(shift_action)
580            } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
581                if assoc1 == Assoc::None {
582                    bail!(
583                        "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
584                        tok1
585                    );
586                }
587                if op_tab2[Fixity::Infix]
588                    .as_ref()
589                    .is_some_and(|x| x.assoc == Assoc::None)
590                    || op_tab2[Fixity::Postfix]
591                        .as_ref()
592                        .is_some_and(|x| x.assoc == Assoc::None)
593                {
594                    bail!(
595                        "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
596                        tok2
597                    );
598                }
599                if op_tab2[Fixity::Infix]
600                    .as_ref()
601                    .is_some_and(|x| x.assoc != assoc1)
602                    || op_tab2[Fixity::Postfix]
603                        .as_ref()
604                        .is_some_and(|x| x.assoc != assoc1)
605                {
606                    bail!(
607                        "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
608                        tok1,
609                        tok2
610                    );
611                } else {
612                    if assoc1 == Assoc::Left {
613                        Ok(reduce_action)
614                    } else {
615                        Ok(shift_action)
616                    }
617                }
618            } else {
619                bail!(
620                    "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
621                    tok1,
622                    tok2
623                );
624            }
625        } else {
626            Ok(shift_action)
627        }
628    }
629
630    /// Performs a grammar reduction for the given production rule.
631    ///
632    /// Applies the semantic action for `prod`, typically constructing or
633    /// normalizing an arena-backed [`Term`], and pushes the resulting token
634    /// onto the parser’s value stack.
635    ///
636    /// # Parameters
637    /// - `arena`: Arena used to allocate or inspect terms.
638    /// - `prod`:  The production being reduced (`ProdID`).
639    /// - `token`: The lookahead token (normally not used).
640    ///
641    /// # Errors
642    /// Returns an error if the reduction fails due to arity mismatches,
643    /// invalid operator metadata, or inconsistent stack state.
644    fn reduce(&mut self, arena: &mut Arena, prod: ProdID, token: &TermToken) -> Result<()> {
645        match prod {
646            ProdID::Start => {
647                // Accept - does not get reduced
648                unreachable!()
649            }
650
651            ProdID::Term1 => {
652                // Term -> Expr
653                let mut expr_tok = self.tokens_pop()?;
654                expr_tok.token_id = TokenID::Term;
655                self.tokens_push(expr_tok);
656            }
657
658            ProdID::Term2 => {
659                // Term -> Expr .
660                self.tokens_pop()?;
661                let mut expr_tok = self.tokens_pop()?;
662                expr_tok.token_id = TokenID::Term;
663                self.tokens_push(expr_tok);
664            }
665
666            ProdID::Term3 => {
667                // Term ->
668                self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
669            }
670
671            ProdID::Term4 => {
672                // Term -> .
673                self.tokens_pop()?;
674                self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
675            }
676
677            ProdID::Func => {
678                // Expr -> func Seq )
679                self.tokens_pop()?;
680                let index = usize::try_from(self.tokens_pop()?.value)?;
681                let func_tok = self.tokens_pop()?;
682                let line_no = func_tok.line_no;
683                let op_tab_index = func_tok.op_tab_index;
684                let functor = Term::try_from(func_tok.value)?;
685
686                let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
687                let term = arena.funcv(vs)?;
688                self.terms.truncate(index);
689
690                let term = self.normalize_term(arena, term, Fixity::Fun, op_tab_index)?;
691
692                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
693            }
694
695            ProdID::List => {
696                // Expr -> [ Seq ]
697                self.tokens_pop()?;
698                let seq_tok = self.tokens_pop()?;
699                let left_brack_tok = self.tokens_pop()?;
700                let index = usize::try_from(seq_tok.value)?;
701
702                let term = arena.list(&self.terms[index..]);
703                self.terms.truncate(index);
704
705                self.tokens_push(TermToken::new(
706                    TokenID::Expr,
707                    Value::Term(term),
708                    left_brack_tok.line_no,
709                ));
710            }
711
712            ProdID::Nil => {
713                // Expr -> [ ]
714                self.tokens_pop()?;
715                let left_brack_tok = self.tokens_pop()?;
716                self.tokens_push(TermToken::new(
717                    TokenID::Expr,
718                    Value::Term(Term::NIL),
719                    left_brack_tok.line_no,
720                ));
721            }
722
723            ProdID::List2 => {
724                // Expr -> [ Seq | Expr ]
725                self.tokens_pop()?;
726                let tail = Term::try_from(self.tokens_pop()?.value)?;
727                self.tokens_pop()?;
728                let index = usize::try_from(self.tokens_pop()?.value)?;
729                let left_brack_tok = self.tokens_pop()?;
730
731                let term = arena.listc(&self.terms[index..], tail);
732                self.terms.truncate(index);
733
734                self.tokens_push(TermToken::new(
735                    TokenID::Expr,
736                    Value::Term(term),
737                    left_brack_tok.line_no,
738                ));
739            }
740
741            ProdID::Tuple => {
742                // Expr -> ( Seq )
743                self.tokens_pop()?;
744                let seq_tok = self.tokens_pop()?;
745                let left_paren_tok = self.tokens_pop()?;
746
747                let index = usize::try_from(seq_tok.value)?;
748
749                // Arena terms parser does not currently support unary tuples.
750                // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
751                let vs = &self.terms[index..];
752                let term = if vs.len() == 1 {
753                    vs[0]
754                } else {
755                    arena.tuple(vs)
756                };
757                self.terms.truncate(index);
758
759                self.tokens_push(TermToken::new(
760                    TokenID::Expr,
761                    Value::Term(term),
762                    left_paren_tok.line_no,
763                ));
764            }
765
766            ProdID::Unit => {
767                // Expr -> ( )
768                self.tokens_pop()?;
769                let left_paren_tok = self.tokens_pop()?;
770                self.tokens_push(TermToken::new(
771                    TokenID::Expr,
772                    Value::Term(Term::UNIT),
773                    left_paren_tok.line_no,
774                ));
775            }
776
777            ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
778                // Expr -> xxx
779                let mut tok = self.tokens_pop()?;
780                tok.token_id = TokenID::Expr;
781                self.tokens_push(tok);
782            }
783
784            ProdID::Atom => {
785                // Expr -> atom
786                let atom_tok = self.tokens_pop()?;
787                let line_no = atom_tok.line_no;
788                let op_tab_index = atom_tok.op_tab_index;
789
790                let atom = Term::try_from(atom_tok.value)?;
791
792                let term = self.normalize_term(arena, atom, Fixity::Fun, op_tab_index)?;
793
794                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
795            }
796
797            ProdID::Infix1 => {
798                // Expr -> Expr atomOper Expr
799                let expr2_tok = self.tokens_pop()?;
800                let oper_tok = self.tokens_pop()?;
801                let expr1_tok = self.tokens_pop()?;
802                let line_no = expr1_tok.line_no;
803                let op_tab_index = oper_tok.op_tab_index;
804
805                let expr2 = Term::try_from(expr2_tok.value)?;
806                let oper = Term::try_from(oper_tok.value)?;
807                let expr1 = Term::try_from(expr1_tok.value)?;
808
809                let term = arena.funcv([oper, expr1, expr2])?;
810                let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
811
812                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
813            }
814
815            ProdID::Infix2 => {
816                // Expr -> Expr func Seq ) Expr
817                let expr2_tok = self.tokens_pop()?;
818                self.tokens_pop()?;
819                let index = usize::try_from(self.tokens_pop()?.value)?;
820                let oper_tok = self.tokens_pop()?;
821                let expr1_tok = self.tokens_pop()?;
822                let line_no = expr1_tok.line_no;
823                let op_tab_index = oper_tok.op_tab_index;
824
825                let expr2 = Term::try_from(expr2_tok.value)?;
826                let oper = Term::try_from(oper_tok.value)?;
827                let expr1 = Term::try_from(expr1_tok.value)?;
828
829                let xs = [oper, expr1, expr2];
830                let vs = xs.iter().chain(self.terms[index..].iter());
831                let term = arena.funcv(vs)?;
832                self.terms.truncate(index);
833
834                let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
835
836                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
837            }
838
839            ProdID::Prefix1 => {
840                // Expr -> atom Expr
841                let expr1_tok = self.tokens_pop()?;
842                let oper_tok = self.tokens_pop()?;
843                let line_no = oper_tok.line_no;
844                let op_tab_index = oper_tok.op_tab_index;
845
846                let expr1 = Term::try_from(expr1_tok.value)?;
847                let oper = Term::try_from(oper_tok.value)?;
848
849                let term = match oper.view(arena)? {
850                    // Arena terms parser currently gives special treatment to unary minus
851                    // on integer and real literals (it directly negates them).
852                    // TODO: Consider handling minus at the lexical level.
853                    View::Atom(s)
854                        if s == "-"
855                            && matches!(expr1.view(arena)?, View::Int(_) | View::Real(_)) =>
856                    {
857                        match expr1.view(arena)? {
858                            View::Int(i) => arena.int(-i),
859                            View::Real(r) => arena.real(-r),
860                            _ => unreachable!(),
861                        }
862                    }
863                    _ => {
864                        let term = arena.funcv([oper, expr1])?;
865                        self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?
866                    }
867                };
868
869                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
870            }
871
872            ProdID::Prefix2 => {
873                // Expr -> func Seq ) Expr
874                let expr1_tok = self.tokens_pop()?;
875                self.tokens_pop()?;
876                let index = usize::try_from(self.tokens_pop()?.value)?;
877                let oper_tok = self.tokens_pop()?;
878                let line_no = oper_tok.line_no;
879                let op_tab_index = oper_tok.op_tab_index;
880
881                let oper = Term::try_from(oper_tok.value)?;
882                let expr1 = Term::try_from(expr1_tok.value)?;
883
884                let xs = [oper, expr1];
885                let vs = xs.iter().chain(self.terms[index..].iter());
886                let term = arena.funcv(vs)?;
887                self.terms.truncate(index);
888
889                let term = self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?;
890
891                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
892            }
893
894            ProdID::Postfix1 => {
895                // Expr -> Expr atomOper
896                let oper_tok = self.tokens_pop()?;
897                let expr1_tok = self.tokens_pop()?;
898                let line_no = expr1_tok.line_no;
899                let op_tab_index = oper_tok.op_tab_index;
900
901                let oper = Term::try_from(oper_tok.value)?;
902                let expr1 = Term::try_from(expr1_tok.value)?;
903
904                let term = arena.funcv([oper, expr1])?;
905                let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
906
907                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
908            }
909
910            ProdID::Postfix2 => {
911                // Expr -> Expr func Seq )
912                self.tokens_pop()?;
913                let index = usize::try_from(self.tokens_pop()?.value)?;
914                let oper_tok = self.tokens_pop()?;
915                let expr1_tok = self.tokens_pop()?;
916                let line_no = expr1_tok.line_no;
917                let op_tab_index = oper_tok.op_tab_index;
918
919                let oper = Term::try_from(oper_tok.value)?;
920                let expr1 = Term::try_from(expr1_tok.value)?;
921
922                let xs = [oper, expr1];
923                let vs = xs.iter().chain(self.terms[index..].iter());
924                let term = arena.funcv(vs)?;
925                self.terms.truncate(index);
926
927                let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
928
929                self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
930            }
931
932            ProdID::Seq1 => {
933                // Seq -> BareSeq
934                let mut bare_seq_tok = self.tokens_pop()?;
935                bare_seq_tok.token_id = TokenID::Seq;
936                self.tokens_push(bare_seq_tok);
937            }
938
939            ProdID::Seq2 => {
940                // Seq -> BareSeq ,
941                self.tokens_pop()?;
942                let mut bare_seq_tok = self.tokens_pop()?;
943                bare_seq_tok.token_id = TokenID::Seq;
944                self.tokens_push(bare_seq_tok);
945            }
946
947            ProdID::BareSeq1 => {
948                // BareSeq -> Expr
949                let expr_tok = self.tokens_pop()?;
950                let line_no = expr_tok.line_no;
951                let expr = Term::try_from(expr_tok.value)?;
952
953                let index = self.terms.len();
954                self.terms.push(expr);
955
956                self.tokens_push(TermToken::new(
957                    TokenID::BareSeq,
958                    Value::Index(index),
959                    line_no,
960                ));
961            }
962
963            ProdID::BareSeq2 => {
964                // BareSeq -> BareSeq , Expr
965                let expr_tok = self.tokens_pop()?;
966                let expr = Term::try_from(expr_tok.value)?;
967                self.tokens_pop()?;
968
969                self.terms.push(expr);
970            }
971        }
972        Ok(())
973    }
974}
975
976/// Unit tests for the [`TermParser`] implementation.
977#[cfg(test)]
978mod tests {
979    use super::*;
980
981    const SAMPLE_DEFS: &str = r#"[
982op(==(x,y),infix,350,none),
983op(!=(x,y),infix,350,none),
984op( <(x,y),infix,350,none),
985op( >(x,y),infix,350,none),
986op(<=(x,y),infix,350,none),
987op(>=(x,y),infix,350,none),
988op('+'(x,y),infix,380,left),
989op('-'(x,y),infix,380,left),
990op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
991op('*'(x,y),infix,400,left),
992op('/'(x,y),infix,400,left),
993op('+'(x),prefix,800,right),
994op(and(x,y),infix,300,left),
995op(or(x,y),infix,250,left),
996op(not(x),prefix,800,right),
997]"#;
998
999    fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Result<Vec<Term>> {
1000        let mut parser = TermParser::try_new(s.bytes().fuse(), Some(parser_oper_defs(arena)))?;
1001        if let Some(defs) = defs {
1002            parser.define_opers(arena, defs.bytes().fuse(), None)?;
1003        }
1004        parser.try_collect_terms(arena)
1005    }
1006
1007    #[test]
1008    fn one_term() {
1009        let _ = env_logger::builder().is_test(true).try_init();
1010        let arena = &mut Arena::new();
1011        let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .").unwrap();
1012        dbg!(&ts);
1013        let s = format!("{}", ts[0].display(arena));
1014        dbg!(&s);
1015        assert_eq!(ts.len(), 1);
1016        assert_eq!(s, "'<='('*'(2, 2), 5)");
1017    }
1018
1019    #[test]
1020    #[should_panic]
1021    fn missing_ops() {
1022        let arena = &mut Arena::new();
1023        let _ts = parse(arena, None, "2 * 2 <= 5").unwrap();
1024    }
1025
1026    #[test]
1027    fn more_complicated_term() {
1028        let _ = env_logger::builder().is_test(true).try_init();
1029        let arena = &mut Arena::new();
1030        let x = "(
1031[(1, 2) | unit] ++ foo(baz(1e-9)),
1032date{2025-09-30T18:24:22.154Z},
1033\"aaa{
10341 + 2
1035}bbb{
10363 * 4
1037}ccc\",
1038{player = {pos = {x = 0, y = 0}, health = 100}},
1039)";
1040        let ts = parse(arena, Some(SAMPLE_DEFS), x).unwrap();
1041        let s = format!("{}", ts[0].display(arena));
1042        assert_eq!(ts.len(), 1);
1043        assert_eq!(
1044            s,
1045            "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = {pos = {x = 0, y = 0}, health = 100}\")"
1046        );
1047    }
1048}