arena_terms_parser/parser.rs
1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!
8//! The parser constructs arena-allocated [`Term`] values (from the [`arena_terms`] crate)
9//! representing atoms, numbers, compound terms, lists, tuples, and other structures.
10//!
11//! # Components
12//! - [`TermLexer`]: Produces [`TermToken`]s for the parser.
13//! - [`OperDefs`]: Defines fixity, precedence, and associativity.
14//! - [`TermParser`]: Performs syntax analysis using generated SLR tables.
15//!
16//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
17//!
18//! [`TermParser`]: struct.TermParser
19//! [`TermLexer`]: crate::lexer::TermLexer
20//! [`TermToken`]: crate::lexer::TermToken
21//! [`OperDefs`]: crate::oper::OperDefs
22//! [`arena_terms`]: https://crates.io/crates/arena-terms
23//! [`aslr`]: https://crates.io/crates/parlex-gen
24use crate::lexer::{TermLexer, TermToken, Value};
25use crate::oper::{Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, OperDef, OperDefTab, OperDefs};
26use anyhow::{Context, Result, anyhow, bail};
27use arena_terms::{Arena, IntoTerm, Term, View, atom, func, list};
28use once_cell::sync::Lazy;
29use parlex::{Lexer, LexerCtx, LexerData, Token};
30use smartstring::alias::String;
31use std::iter::FusedIterator;
32use std::str::FromStr;
33use std::{fmt, mem};
34
35/// Includes the generated SLR parser tables and definitions.
36///
37/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
38/// during the build process. It defines the parsing automaton, rule metadata,
39/// and associated enum types used by the [`TermParser`].
40include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
41
42/// Constructs the default operator definitions used by the [`TermParser`].
43///
44/// This function populates an [`OperDefs`] table in the given [`Arena`],
45/// defining built-in operators such as `-` (prefix), `++` (infix), and `=` (infix),
46/// along with their precedence and associativity rules.
47///
48/// ```prolog
49/// [ op(-(x), prefix, 800, right, none, false),
50/// op(++(x, y), infix, 500, left, none, false),
51/// op(=(x, y), infix, 100, right, none, false),
52/// op(op(f,
53/// =(type, fun),
54/// =(prec, 0),
55/// =(assoc, none),
56/// =(rename_to, none),
57/// =(embed_type, false)),
58/// fun, 0, none, none, false)
59/// ]
60/// ```
61///
62/// The resulting definitions form the standard operator environment available
63/// to the parser when no user-defined operator table is provided.
64///
65/// # Parameters
66/// - `arena`: The [`Arena`] used for allocating operator term structures.
67///
68/// # Returns
69/// An initialized [`OperDefs`] instance containing the default operator set.
70///
71/// [`TermParser`]: crate::parser::TermParser
72/// [`OperDefs`]: crate::oper::OperDefs
73/// [`Arena`]: arena_terms::Arena
74/// [`aslr`]: https://crates.io/crates/parlex-gen
75pub fn parser_oper_defs(arena: &mut Arena) -> OperDefs {
76 let term = list![
77 func!(
78 "op";
79 func!("-"; atom!("x")),
80 atom!("prefix"),
81 800,
82 atom!("right"),
83 atom!("none"),
84 atom!("false"),
85 ),
86 func!(
87 "op";
88 func!("++"; atom!("x"), atom!("y")),
89 atom!("infix"),
90 500,
91 atom!("left"),
92 atom!("none"),
93 atom!("false"),
94 ),
95 func!(
96 "op";
97 func!("="; atom!("x"), atom!("y")),
98 atom!("infix"),
99 100,
100 atom!("right"),
101 atom!("none"),
102 atom!("false"),
103 ),
104 func!(
105 "op";
106 func!(
107 "op";
108 atom!("f"),
109 func!("="; atom!("type"), atom!("fun")),
110 func!("="; atom!("prec"), 0),
111 func!("="; atom!("assoc"), atom!("none")),
112 func!("="; atom!("rename_to"), atom!("none")),
113 func!("="; atom!("embed_type"), atom!("false")),
114 ),
115 atom!("fun"),
116 0,
117 atom!("none"),
118 atom!("none"),
119 atom!("false"),
120 ),
121 => arena
122 ];
123 OperDefs::try_from_ops(arena, term).unwrap()
124}
125
126/// Prolog-like term parser with operator precedence and associativity handling.
127///
128/// The [`TermParser`] drives the parsing of Prolog-style terms using the
129/// [`parlex`] SLR(1) runtime library. It builds upon the [`TermLexer`] for tokenization
130/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
131///
132/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
133/// allowing user-defined or default operators to control how expressions are
134/// grouped and nested according to their **fixity**, **precedence**, and
135/// **associativity**.
136///
137/// # Core Components
138/// - [`ParserCtx`] — manages parse states, actions, and reductions generated by
139/// the `aslr` tool.
140/// - [`TermLexer`] — provides token streams of [`TermToken`]s for the parser.
141/// - [`Arena`] — stores terms compactly for minimal heap allocation.
142///
143/// # Typical Workflow
144/// 1. Create a [`TermParser`] from a byte iterator (`try_new`).
145/// 2. (Optionally) extend or redefine operator sets using [`define_opers`].
146/// 3. Call [`try_next_term`] or [`try_collect_terms`] to parse one or more
147/// terms into arena-backed [`Term`]s.
148///
149/// [`Arena`]: arena_terms::Arena
150/// [`Term`]: arena_terms::Term
151/// [`OperDefs`]: crate::oper::OperDefs
152/// [`ParserCtx`]: parlex::ParserCtx
153/// [`TermLexer`]: crate::lexer::TermLexer
154/// [`TermToken`]: crate::lexer::TermToken
155pub struct TermParser<I>
156where
157 I: FusedIterator<Item = u8>,
158{
159 /// The parser context that manages parse state, tables, and token input.
160 ///
161 /// This field wraps the generated `ParserData` and integrates the lexer
162 /// with the runtime parser loop from [`parlex`]. It drives token reading,
163 /// state transitions, and reductions.
164 ctx: ParserCtx<TermLexer<I>, <Self as Parser<Arena>>::ParserData, Arena>,
165
166 /// Stack of intermediate [`Term`] values used for reduction of term sequences.
167 ///
168 /// [`Value::Index`] refers to an entry in this stack, enabling grammar
169 /// actions to compose and reduce sequences of terms into higher-level
170 /// structures during parsing.
171 terms: Vec<Term>,
172}
173
174/// Implementation of [`TermParser`] methods.
175///
176/// This `impl` defines the core construction and execution logic for the
177/// Prolog-like term parser. It provides utilities to initialize a new
178/// parser instance, collect or stream parsed [`Term`] values, dynamically
179/// extend operator definitions, and normalize parsed terms.
180///
181/// The parser integrates with the [`parlex`] runtime library and operates over
182/// tokens produced by the [`TermLexer`], yielding arena-allocated [`Term`]
183/// values suitable for further semantic processing.
184///
185/// # Type Parameters
186/// - `I`: The input source, which must implement [`FusedIterator`] yielding bytes.
187impl<I> TermParser<I>
188where
189 I: FusedIterator<Item = u8>,
190{
191 /// Creates a new [`TermParser`] for the given input stream.
192 ///
193 /// Initializes an internal [`TermLexer`] (with optional operator definitions)
194 /// and wraps it in a [`ParserCtx`].
195 ///
196 /// # Parameters
197 /// - `input`: A fused iterator over bytes to be parsed.
198 /// - `opers`: Optional [`OperDefs`] defining operator precedence and fixity.
199 ///
200 /// # Returns
201 /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
202 ///
203 /// # Errors
204 /// Returns an error if the lexer context cannot be initialized
205 /// or if the generated parser tables fail to load.
206 pub fn try_new(input: I, opers: Option<OperDefs>) -> Result<Self> {
207 let lexer = TermLexer::try_new(input, opers)?;
208 let ctx = ParserCtx::new(lexer);
209 Ok(Self {
210 ctx,
211 terms: Vec::new(),
212 })
213 }
214
215 /// Parses all terms from the input until end-of-stream.
216 ///
217 /// Repeatedly calls [`try_next_term`] until no more terms are available,
218 /// collecting them into a single [`Vec`] of [`Term`]s.
219 ///
220 /// # Returns
221 /// A vector of all successfully parsed [`Term`]s.
222 ///
223 /// # Errors
224 /// Returns an error if any term fails syntactic or semantic validation.
225 pub fn try_collect_terms(&mut self, arena: &mut Arena) -> Result<Vec<Term>> {
226 let mut ts = Vec::new();
227 while let Some(t) = self.try_next_term(arena)? {
228 ts.push(t);
229 }
230 Ok(ts)
231 }
232
233 /// Parses the next term from the input stream.
234 ///
235 /// Returns the next complete [`Term`], or `None` if end-of-input is reached.
236 /// This method performs incremental parsing suitable for stream-based term input.
237 ///
238 /// # Parameters
239 /// - `arena`: Arena for allocating internal term structures.
240 ///
241 /// # Errors
242 /// Returns an error on unexpected tokens, incomplete constructs,
243 /// or invalid operator combinations.
244 #[inline]
245 pub fn try_next_term(&mut self, arena: &mut Arena) -> Result<Option<Term>> {
246 while let Some(tok) = self.try_next(arena)? {
247 match tok.token_id {
248 TokenID::Term => match tok.value {
249 Value::None => {}
250 Value::Term(term) => return Ok(Some(term)),
251 value => bail!("Unexpected token value {:?}", value),
252 },
253 token_id => bail!("Unexpected token id {:?}", token_id),
254 }
255 }
256 Ok(None)
257 }
258
259 /// Defines or extends operator definitions directly from a Prolog-like
260 /// `op/6` term list read from a separate input source.
261 ///
262 /// This allows dynamic addition of new operator fixities and precedence
263 /// rules during runtime parsing.
264 ///
265 /// # Parameters
266 /// - `arena`: Arena allocator used for constructing term structures.
267 /// - `defs_input`: Input byte iterator yielding the operator definition terms.
268 /// - `opers`: Optional initial operator table to extend.
269 /// If `None`, the default operator definitions are used.
270 ///
271 /// # Errors
272 /// Returns an error if parsing the operator term list fails or produces
273 /// an invalid operator specification.
274 pub fn define_opers<J: FusedIterator<Item = u8>>(
275 &mut self,
276 arena: &mut Arena,
277 defs_input: J,
278 opers: Option<OperDefs>,
279 ) -> Result<()> {
280 let opers = match opers {
281 Some(opers) => opers,
282 None => parser_oper_defs(arena),
283 };
284
285 let defs_lexer = TermLexer::try_new(defs_input, Some(opers))?;
286 let defs_ctx = ParserCtx::new(defs_lexer);
287 let mut defs_parser = TermParser {
288 ctx: defs_ctx,
289 terms: Vec::new(),
290 };
291 while let Some(term) = defs_parser.try_next_term(arena)? {
292 log::trace!(
293 "Stats: {:?}, {:?}",
294 defs_parser.ctx().lexer.stats(),
295 defs_parser.stats()
296 );
297 defs_parser
298 .ctx_mut()
299 .lexer
300 .opers
301 .define_opers(arena, term)?;
302 }
303 let defs_opers = std::mem::take(&mut defs_parser.ctx_mut().lexer.opers);
304 self.ctx_mut().lexer.opers = defs_opers;
305
306 Ok(())
307 }
308
309 /// Normalizes a parsed term using its operator definition.
310 ///
311 /// This process transforms terms according to their declared fixity,
312 /// applying named default arguments and other attributes specified
313 /// in the corresponding operator definition.
314 ///
315 /// # Parameters
316 /// - `arena`: Arena used to store normalized term structures.
317 /// - `term`: The parsed term to normalize.
318 /// - `fixity`: Operator fixity (`fun`, `prefix`, `infix`, or `postfix`).
319 /// - `op_tab_index`: Optional index into the operator definition table, if the
320 /// term corresponds to a defined operator.
321 ///
322 /// # Returns
323 /// A normalized [`Term`] allocated in the given arena, ready for evaluation or
324 /// further semantic analysis.
325 ///
326 /// # Errors
327 /// Returns an error if normalization fails due to invalid fixity, mismatched
328 /// arity, or inconsistent operator metadata.
329 fn normalize_term(
330 &self,
331 arena: &mut Arena,
332 term: Term,
333 fixity: Fixity,
334 op_tab_index: Option<usize>,
335 ) -> Result<Term> {
336 match self.ctx().lexer.opers.get(op_tab_index)[fixity] {
337 Some(ref op_def) => {
338 let (functor, vs) = match term.view(arena)? {
339 View::Atom(_) => (term, &[] as &[Term]),
340 View::Func(_, functor, args) => {
341 if args.is_empty() {
342 bail!("invalid Func");
343 }
344 (*functor, args)
345 }
346 _ => {
347 return Ok(term);
348 }
349 };
350 let name = functor.atom_name(arena)?;
351
352 let n_required_args = OperDef::required_arity(fixity);
353 if vs.len() < n_required_args {
354 bail!(
355 "missing {} required arguments in term {:?}",
356 n_required_args - vs.len(),
357 name
358 );
359 }
360
361 let args = &op_def.args;
362 let mut xs: Vec<Option<Term>> = vec![None; args.len()];
363
364 for (i, value) in vs.iter().enumerate() {
365 if i < n_required_args {
366 xs[i] = Some(*value);
367 } else {
368 match value.view(arena)? {
369 View::Func(ar, functor, vs)
370 if vs.len() == 2 && functor.atom_name(ar)? == "=" =>
371 {
372 let arg_name = vs[0].atom_name(arena)?;
373
374 if let Some(pos) = args.iter().position(|x| x.name == arg_name) {
375 if xs[pos].is_none() {
376 xs[pos] = Some(vs[1]);
377 } else {
378 bail!(
379 "cannot redefine argument {:?} at position {} in {:?}",
380 arg_name,
381 pos,
382 name
383 );
384 }
385 } else {
386 bail!("invalid argument name {:?} in {:?}", arg_name, name);
387 }
388 }
389 _ => {
390 if xs[i].is_none() {
391 xs[i] = Some(*value);
392 } else {
393 bail!(
394 "cannot redefine argument {:?} at position {} in {:?}",
395 args[i].name,
396 i,
397 name
398 );
399 }
400 }
401 }
402 }
403 }
404
405 let vs: Option<Vec<_>> = xs
406 .into_iter()
407 .enumerate()
408 .map(|(i, x)| x.or(args[i].default))
409 .collect();
410 let mut vs = match vs {
411 Some(vs) => vs,
412 None => bail!("missing arguments in {:?}", name),
413 };
414
415 let rename_to = match op_def.rename_to {
416 Some(rename_to) => rename_to,
417 None => functor,
418 };
419
420 if op_def.embed_fixity {
421 vs.insert(0, arena.atom(String::from(fixity)));
422 }
423
424 if vs.is_empty() {
425 Ok(rename_to)
426 } else {
427 Ok(arena.funcv(std::iter::once(&rename_to).chain(vs.iter()))?)
428 }
429 }
430 None => match fixity {
431 Fixity::Fun => Ok(term),
432 _ => bail!("missing opdef for fixity {:?}", fixity),
433 },
434 }
435 }
436}
437
438/// Implements the [`Parser`] trait for [`TermParser`], integrating with the **parlex** runtime library.
439///
440/// This binding connects the generated SLR parser tables (`ParData`) with the concrete
441/// term parser. It exposes the parser context, statistics, and the key callbacks
442/// required during shift/reduce parsing: ambiguity resolution and reductions.
443///
444/// # Associated Types
445/// - `Lexer` — The input lexer producing [`TermToken`]s (`TermLexer<I>`).
446/// - `ParserData` — The generated parser tables and rule enums (`ParData`).
447impl<I> Parser<Arena> for TermParser<I>
448where
449 I: FusedIterator<Item = u8>,
450{
451 type Lexer = TermLexer<I>;
452 type ParserData = ParData;
453
454 /// Returns a shared reference to the internal [`ParserCtx`].
455 fn ctx(&self) -> &ParserCtx<Self::Lexer, Self::ParserData, Arena> {
456 &self.ctx
457 }
458
459 /// Returns a mutable reference to the internal [`ParserCtx`].
460 fn ctx_mut(&mut self) -> &mut ParserCtx<Self::Lexer, Self::ParserData, Arena> {
461 &mut self.ctx
462 }
463
464 /// Returns cumulative parsing statistics (tokens, shifts, reductions, ambiguities).
465 fn stats(&self) -> ParserStats {
466 self.ctx().stats.clone()
467 }
468
469 /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
470 ///
471 /// Given an ambiguity identifier and the lookahead token `tok2`, this method
472 /// chooses the appropriate parser action (shift or reduce) according to the
473 /// operator precedence and associativity rules.
474 ///
475 /// # Parameters
476 /// - `_arena`: Arena used to allocate or inspect terms.
477 /// - `ambig`: The generated ambiguity ID (`AmbigID`).
478 /// - `tok2`: The lookahead token at the ambiguity point.
479 ///
480 /// # Returns
481 /// The selected parser [`Action`] to disambiguate the current state.
482 ///
483 /// # Errors
484 /// Returns an error if the ambiguity cannot be resolved consistently.
485 fn resolve_ambiguity(
486 &mut self,
487 _arena: &mut Arena,
488 ambig: AmbigID,
489 tok2: &TermToken,
490 ) -> Result<Action> {
491 let ambigs = ParData::lookup_ambig(ambig);
492
493 let shift_action = ambigs[0];
494 assert!(matches!(shift_action, Action::Shift(_)));
495
496 let reduce_action = ambigs[1];
497 assert!(matches!(reduce_action, Action::Reduce(_)));
498
499 let Action::Reduce(prod) = reduce_action else {
500 bail!("can't match reduce action")
501 };
502
503 log::trace!(
504 "Conflict between reducing {:?} and shifting {:?}",
505 prod,
506 tok2
507 );
508
509 let (fixity1, tok1) = match prod {
510 ProdID::Infix1 => {
511 // Expr -> Expr atomOper Expr
512 (Fixity::Infix, self.tokens_peek(1))
513 }
514 ProdID::Infix2 => {
515 // Expr -> Expr funcOper Seq ) Expr
516 (Fixity::Infix, self.tokens_peek(3))
517 }
518 ProdID::Prefix1 => {
519 // Expr -> atomOper Expr
520 (Fixity::Prefix, self.tokens_peek(1))
521 }
522 ProdID::Prefix2 => {
523 // Expr -> funcOper Seq ) Expr
524 (Fixity::Prefix, self.tokens_peek(3))
525 }
526 ProdID::Postfix1 => {
527 // Expr -> Expr atomOper
528 (Fixity::Postfix, self.tokens_peek(0))
529 }
530 ProdID::Postfix2 => {
531 // Expr -> Expr funcOper Seq )
532 (Fixity::Postfix, self.tokens_peek(2))
533 }
534 _ => bail!(
535 "unexpected conflict: reduction of {:?} with shifting token {:?}",
536 prod,
537 tok2
538 ),
539 };
540
541 let op_tab1 = self.ctx().lexer.opers.get(tok1.op_tab_index);
542 let op_tab2 = self.ctx().lexer.opers.get(tok2.op_tab_index);
543
544 assert!(op_tab1.is_oper());
545
546 if op_tab2.is_oper() {
547 let op_def1 = match op_tab1[fixity1] {
548 Some(ref op_def1) => op_def1,
549 None => return Ok(shift_action),
550 };
551
552 let prec1 = op_def1.prec;
553 let assoc1 = op_def1.assoc;
554
555 let min_prec2 = std::cmp::min(
556 op_tab2[Fixity::Infix]
557 .as_ref()
558 .map(|x| x.prec)
559 .unwrap_or(MAX_OPER_PREC),
560 op_tab2[Fixity::Postfix]
561 .as_ref()
562 .map(|x| x.prec)
563 .unwrap_or(MAX_OPER_PREC),
564 );
565 let max_prec2 = std::cmp::max(
566 op_tab2[Fixity::Infix]
567 .as_ref()
568 .map(|x| x.prec)
569 .unwrap_or(MIN_OPER_PREC),
570 op_tab2[Fixity::Postfix]
571 .as_ref()
572 .map(|x| x.prec)
573 .unwrap_or(MIN_OPER_PREC),
574 );
575
576 if prec1 > min_prec2 {
577 Ok(reduce_action)
578 } else if prec1 < max_prec2 {
579 Ok(shift_action)
580 } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
581 if assoc1 == Assoc::None {
582 bail!(
583 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
584 tok1
585 );
586 }
587 if op_tab2[Fixity::Infix]
588 .as_ref()
589 .is_some_and(|x| x.assoc == Assoc::None)
590 || op_tab2[Fixity::Postfix]
591 .as_ref()
592 .is_some_and(|x| x.assoc == Assoc::None)
593 {
594 bail!(
595 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
596 tok2
597 );
598 }
599 if op_tab2[Fixity::Infix]
600 .as_ref()
601 .is_some_and(|x| x.assoc != assoc1)
602 || op_tab2[Fixity::Postfix]
603 .as_ref()
604 .is_some_and(|x| x.assoc != assoc1)
605 {
606 bail!(
607 "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
608 tok1,
609 tok2
610 );
611 } else {
612 if assoc1 == Assoc::Left {
613 Ok(reduce_action)
614 } else {
615 Ok(shift_action)
616 }
617 }
618 } else {
619 bail!(
620 "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
621 tok1,
622 tok2
623 );
624 }
625 } else {
626 Ok(shift_action)
627 }
628 }
629
630 /// Performs a grammar reduction for the given production rule.
631 ///
632 /// Applies the semantic action for `prod`, typically constructing or
633 /// normalizing an arena-backed [`Term`], and pushes the resulting token
634 /// onto the parser’s value stack.
635 ///
636 /// # Parameters
637 /// - `arena`: Arena used to allocate or inspect terms.
638 /// - `prod`: The production being reduced (`ProdID`).
639 /// - `token`: The lookahead token (normally not used).
640 ///
641 /// # Errors
642 /// Returns an error if the reduction fails due to arity mismatches,
643 /// invalid operator metadata, or inconsistent stack state.
644 fn reduce(&mut self, arena: &mut Arena, prod: ProdID, token: &TermToken) -> Result<()> {
645 match prod {
646 ProdID::Start => {
647 // Accept - does not get reduced
648 unreachable!()
649 }
650
651 ProdID::Term1 => {
652 // Term -> Expr
653 let mut expr_tok = self.tokens_pop()?;
654 expr_tok.token_id = TokenID::Term;
655 self.tokens_push(expr_tok);
656 }
657
658 ProdID::Term2 => {
659 // Term -> Expr .
660 self.tokens_pop()?;
661 let mut expr_tok = self.tokens_pop()?;
662 expr_tok.token_id = TokenID::Term;
663 self.tokens_push(expr_tok);
664 }
665
666 ProdID::Term3 => {
667 // Term ->
668 self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
669 }
670
671 ProdID::Term4 => {
672 // Term -> .
673 self.tokens_pop()?;
674 self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
675 }
676
677 ProdID::Func => {
678 // Expr -> func Seq )
679 self.tokens_pop()?;
680 let index = usize::try_from(self.tokens_pop()?.value)?;
681 let func_tok = self.tokens_pop()?;
682 let line_no = func_tok.line_no;
683 let op_tab_index = func_tok.op_tab_index;
684 let functor = Term::try_from(func_tok.value)?;
685
686 let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
687 let term = arena.funcv(vs)?;
688 self.terms.truncate(index);
689
690 let term = self.normalize_term(arena, term, Fixity::Fun, op_tab_index)?;
691
692 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
693 }
694
695 ProdID::List => {
696 // Expr -> [ Seq ]
697 self.tokens_pop()?;
698 let seq_tok = self.tokens_pop()?;
699 let left_brack_tok = self.tokens_pop()?;
700 let index = usize::try_from(seq_tok.value)?;
701
702 let term = arena.list(&self.terms[index..]);
703 self.terms.truncate(index);
704
705 self.tokens_push(TermToken::new(
706 TokenID::Expr,
707 Value::Term(term),
708 left_brack_tok.line_no,
709 ));
710 }
711
712 ProdID::Nil => {
713 // Expr -> [ ]
714 self.tokens_pop()?;
715 let left_brack_tok = self.tokens_pop()?;
716 self.tokens_push(TermToken::new(
717 TokenID::Expr,
718 Value::Term(Term::NIL),
719 left_brack_tok.line_no,
720 ));
721 }
722
723 ProdID::List2 => {
724 // Expr -> [ Seq | Expr ]
725 self.tokens_pop()?;
726 let tail = Term::try_from(self.tokens_pop()?.value)?;
727 self.tokens_pop()?;
728 let index = usize::try_from(self.tokens_pop()?.value)?;
729 let left_brack_tok = self.tokens_pop()?;
730
731 let term = arena.listc(&self.terms[index..], tail);
732 self.terms.truncate(index);
733
734 self.tokens_push(TermToken::new(
735 TokenID::Expr,
736 Value::Term(term),
737 left_brack_tok.line_no,
738 ));
739 }
740
741 ProdID::Tuple => {
742 // Expr -> ( Seq )
743 self.tokens_pop()?;
744 let seq_tok = self.tokens_pop()?;
745 let left_paren_tok = self.tokens_pop()?;
746
747 let index = usize::try_from(seq_tok.value)?;
748
749 // Arena terms parser does not currently support unary tuples.
750 // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
751 let vs = &self.terms[index..];
752 let term = if vs.len() == 1 {
753 vs[0]
754 } else {
755 arena.tuple(vs)
756 };
757 self.terms.truncate(index);
758
759 self.tokens_push(TermToken::new(
760 TokenID::Expr,
761 Value::Term(term),
762 left_paren_tok.line_no,
763 ));
764 }
765
766 ProdID::Unit => {
767 // Expr -> ( )
768 self.tokens_pop()?;
769 let left_paren_tok = self.tokens_pop()?;
770 self.tokens_push(TermToken::new(
771 TokenID::Expr,
772 Value::Term(Term::UNIT),
773 left_paren_tok.line_no,
774 ));
775 }
776
777 ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
778 // Expr -> xxx
779 let mut tok = self.tokens_pop()?;
780 tok.token_id = TokenID::Expr;
781 self.tokens_push(tok);
782 }
783
784 ProdID::Atom => {
785 // Expr -> atom
786 let atom_tok = self.tokens_pop()?;
787 let line_no = atom_tok.line_no;
788 let op_tab_index = atom_tok.op_tab_index;
789
790 let atom = Term::try_from(atom_tok.value)?;
791
792 let term = self.normalize_term(arena, atom, Fixity::Fun, op_tab_index)?;
793
794 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
795 }
796
797 ProdID::Infix1 => {
798 // Expr -> Expr atomOper Expr
799 let expr2_tok = self.tokens_pop()?;
800 let oper_tok = self.tokens_pop()?;
801 let expr1_tok = self.tokens_pop()?;
802 let line_no = expr1_tok.line_no;
803 let op_tab_index = oper_tok.op_tab_index;
804
805 let expr2 = Term::try_from(expr2_tok.value)?;
806 let oper = Term::try_from(oper_tok.value)?;
807 let expr1 = Term::try_from(expr1_tok.value)?;
808
809 let term = arena.funcv([oper, expr1, expr2])?;
810 let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
811
812 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
813 }
814
815 ProdID::Infix2 => {
816 // Expr -> Expr func Seq ) Expr
817 let expr2_tok = self.tokens_pop()?;
818 self.tokens_pop()?;
819 let index = usize::try_from(self.tokens_pop()?.value)?;
820 let oper_tok = self.tokens_pop()?;
821 let expr1_tok = self.tokens_pop()?;
822 let line_no = expr1_tok.line_no;
823 let op_tab_index = oper_tok.op_tab_index;
824
825 let expr2 = Term::try_from(expr2_tok.value)?;
826 let oper = Term::try_from(oper_tok.value)?;
827 let expr1 = Term::try_from(expr1_tok.value)?;
828
829 let xs = [oper, expr1, expr2];
830 let vs = xs.iter().chain(self.terms[index..].iter());
831 let term = arena.funcv(vs)?;
832 self.terms.truncate(index);
833
834 let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
835
836 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
837 }
838
839 ProdID::Prefix1 => {
840 // Expr -> atom Expr
841 let expr1_tok = self.tokens_pop()?;
842 let oper_tok = self.tokens_pop()?;
843 let line_no = oper_tok.line_no;
844 let op_tab_index = oper_tok.op_tab_index;
845
846 let expr1 = Term::try_from(expr1_tok.value)?;
847 let oper = Term::try_from(oper_tok.value)?;
848
849 let term = match oper.view(arena)? {
850 // Arena terms parser currently gives special treatment to unary minus
851 // on integer and real literals (it directly negates them).
852 // TODO: Consider handling minus at the lexical level.
853 View::Atom(s)
854 if s == "-"
855 && matches!(expr1.view(arena)?, View::Int(_) | View::Real(_)) =>
856 {
857 match expr1.view(arena)? {
858 View::Int(i) => arena.int(-i),
859 View::Real(r) => arena.real(-r),
860 _ => unreachable!(),
861 }
862 }
863 _ => {
864 let term = arena.funcv([oper, expr1])?;
865 self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?
866 }
867 };
868
869 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
870 }
871
872 ProdID::Prefix2 => {
873 // Expr -> func Seq ) Expr
874 let expr1_tok = self.tokens_pop()?;
875 self.tokens_pop()?;
876 let index = usize::try_from(self.tokens_pop()?.value)?;
877 let oper_tok = self.tokens_pop()?;
878 let line_no = oper_tok.line_no;
879 let op_tab_index = oper_tok.op_tab_index;
880
881 let oper = Term::try_from(oper_tok.value)?;
882 let expr1 = Term::try_from(expr1_tok.value)?;
883
884 let xs = [oper, expr1];
885 let vs = xs.iter().chain(self.terms[index..].iter());
886 let term = arena.funcv(vs)?;
887 self.terms.truncate(index);
888
889 let term = self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?;
890
891 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
892 }
893
894 ProdID::Postfix1 => {
895 // Expr -> Expr atomOper
896 let oper_tok = self.tokens_pop()?;
897 let expr1_tok = self.tokens_pop()?;
898 let line_no = expr1_tok.line_no;
899 let op_tab_index = oper_tok.op_tab_index;
900
901 let oper = Term::try_from(oper_tok.value)?;
902 let expr1 = Term::try_from(expr1_tok.value)?;
903
904 let term = arena.funcv([oper, expr1])?;
905 let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
906
907 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
908 }
909
910 ProdID::Postfix2 => {
911 // Expr -> Expr func Seq )
912 self.tokens_pop()?;
913 let index = usize::try_from(self.tokens_pop()?.value)?;
914 let oper_tok = self.tokens_pop()?;
915 let expr1_tok = self.tokens_pop()?;
916 let line_no = expr1_tok.line_no;
917 let op_tab_index = oper_tok.op_tab_index;
918
919 let oper = Term::try_from(oper_tok.value)?;
920 let expr1 = Term::try_from(expr1_tok.value)?;
921
922 let xs = [oper, expr1];
923 let vs = xs.iter().chain(self.terms[index..].iter());
924 let term = arena.funcv(vs)?;
925 self.terms.truncate(index);
926
927 let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
928
929 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
930 }
931
932 ProdID::Seq1 => {
933 // Seq -> BareSeq
934 let mut bare_seq_tok = self.tokens_pop()?;
935 bare_seq_tok.token_id = TokenID::Seq;
936 self.tokens_push(bare_seq_tok);
937 }
938
939 ProdID::Seq2 => {
940 // Seq -> BareSeq ,
941 self.tokens_pop()?;
942 let mut bare_seq_tok = self.tokens_pop()?;
943 bare_seq_tok.token_id = TokenID::Seq;
944 self.tokens_push(bare_seq_tok);
945 }
946
947 ProdID::BareSeq1 => {
948 // BareSeq -> Expr
949 let expr_tok = self.tokens_pop()?;
950 let line_no = expr_tok.line_no;
951 let expr = Term::try_from(expr_tok.value)?;
952
953 let index = self.terms.len();
954 self.terms.push(expr);
955
956 self.tokens_push(TermToken::new(
957 TokenID::BareSeq,
958 Value::Index(index),
959 line_no,
960 ));
961 }
962
963 ProdID::BareSeq2 => {
964 // BareSeq -> BareSeq , Expr
965 let expr_tok = self.tokens_pop()?;
966 let expr = Term::try_from(expr_tok.value)?;
967 self.tokens_pop()?;
968
969 self.terms.push(expr);
970 }
971 }
972 Ok(())
973 }
974}
975
976/// Unit tests for the [`TermParser`] implementation.
977#[cfg(test)]
978mod tests {
979 use super::*;
980
981 const SAMPLE_DEFS: &str = r#"[
982op(==(x,y),infix,350,none),
983op(!=(x,y),infix,350,none),
984op( <(x,y),infix,350,none),
985op( >(x,y),infix,350,none),
986op(<=(x,y),infix,350,none),
987op(>=(x,y),infix,350,none),
988op('+'(x,y),infix,380,left),
989op('-'(x,y),infix,380,left),
990op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
991op('*'(x,y),infix,400,left),
992op('/'(x,y),infix,400,left),
993op('+'(x),prefix,800,right),
994op(and(x,y),infix,300,left),
995op(or(x,y),infix,250,left),
996op(not(x),prefix,800,right),
997]"#;
998
999 fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Result<Vec<Term>> {
1000 let mut parser = TermParser::try_new(s.bytes().fuse(), Some(parser_oper_defs(arena)))?;
1001 if let Some(defs) = defs {
1002 parser.define_opers(arena, defs.bytes().fuse(), None)?;
1003 }
1004 parser.try_collect_terms(arena)
1005 }
1006
1007 #[test]
1008 fn one_term() {
1009 let _ = env_logger::builder().is_test(true).try_init();
1010 let arena = &mut Arena::new();
1011 let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .").unwrap();
1012 dbg!(&ts);
1013 let s = format!("{}", ts[0].display(arena));
1014 dbg!(&s);
1015 assert_eq!(ts.len(), 1);
1016 assert_eq!(s, "'<='('*'(2, 2), 5)");
1017 }
1018
1019 #[test]
1020 #[should_panic]
1021 fn missing_ops() {
1022 let arena = &mut Arena::new();
1023 let _ts = parse(arena, None, "2 * 2 <= 5").unwrap();
1024 }
1025
1026 #[test]
1027 fn more_complicated_term() {
1028 let _ = env_logger::builder().is_test(true).try_init();
1029 let arena = &mut Arena::new();
1030 let x = "(
1031[(1, 2) | unit] ++ foo(baz(1e-9)),
1032date{2025-09-30T18:24:22.154Z},
1033\"aaa{
10341 + 2
1035}bbb{
10363 * 4
1037}ccc\",
1038{player = {pos = {x = 0, y = 0}, health = 100}},
1039)";
1040 let ts = parse(arena, Some(SAMPLE_DEFS), x).unwrap();
1041 let s = format!("{}", ts[0].display(arena));
1042 assert_eq!(ts.len(), 1);
1043 assert_eq!(
1044 s,
1045 "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = {pos = {x = 0, y = 0}, health = 100}\")"
1046 );
1047 }
1048}