arena_terms_parser/parser.rs
1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!
8//! The parser constructs arena-allocated [`Term`] values (from the [`arena_terms`] crate)
9//! representing atoms, numbers, compound terms, lists, tuples, and other structures.
10//!
11//! # Components
12//! - [`TermLexer`]: Produces [`TermToken`]s for the parser.
13//! - [`OperDefs`]: Defines fixity, precedence, and associativity.
14//! - [`TermParser`]: Performs syntax analysis using generated SLR tables.
15//!
16//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
17//!
18//! [`TermParser`]: struct.TermParser
19//! [`TermLexer`]: crate::lexer::TermLexer
20//! [`TermToken`]: crate::lexer::TermToken
21//! [`OperDefs`]: crate::oper::OperDefs
22//! [`arena_terms`]: https://crates.io/crates/arena-terms
23//! [`aslr`]: https://crates.io/crates/parlex-gen
24use crate::lexer::{TermLexer, TermToken, Value};
25use crate::oper::{Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, OperDef, OperDefTab, OperDefs};
26use anyhow::{Context, Result, anyhow, bail};
27use arena_terms::{Arena, IntoTerm, Term, View, atom, func, list};
28use parlex::{Lexer, LexerCtx, LexerData, Token};
29use smartstring::alias::String;
30use std::iter::FusedIterator;
31use std::str::FromStr;
32use std::{fmt, mem};
33
34/// Includes the generated SLR parser tables and definitions.
35///
36/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
37/// during the build process. It defines the parsing automaton, rule metadata,
38/// and associated enum types used by the [`TermParser`].
39include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
40
41/// Constructs the default operator definitions used by the [`TermParser`].
42///
43/// This function populates an [`OperDefs`] table in the given [`Arena`],
44/// defining built-in operators such as `-` (prefix), `++` (infix), and `=` (infix),
45/// along with their precedence and associativity rules.
46///
47/// ```prolog
48/// [ op(-(x), prefix, 800, right, none, false),
49/// op(++(x, y), infix, 500, left, none, false),
50/// op(=(x, y), infix, 100, right, none, false),
51/// op(op(f,
52/// =(type, fun),
53/// =(prec, 0),
54/// =(assoc, none),
55/// =(rename_to, none),
56/// =(embed_type, false)),
57/// fun, 0, none, none, false)
58/// ]
59/// ```
60///
61/// The resulting definitions form the standard operator environment available
62/// to the parser when no user-defined operator table is provided.
63///
64/// # Parameters
65/// - `arena`: The [`Arena`] used for allocating operator term structures.
66///
67/// # Returns
68/// An initialized [`OperDefs`] instance containing the default operator set.
69///
70/// [`TermParser`]: crate::parser::TermParser
71/// [`OperDefs`]: crate::oper::OperDefs
72/// [`Arena`]: arena_terms::Arena
73/// [`aslr`]: https://crates.io/crates/parlex-gen
74pub fn parser_oper_defs(arena: &mut Arena) -> OperDefs {
75 let term = list![
76 func!(
77 "op";
78 func!("-"; atom!("x")),
79 atom!("prefix"),
80 800,
81 atom!("right"),
82 atom!("none"),
83 atom!("false"),
84 ),
85 func!(
86 "op";
87 func!("++"; atom!("x"), atom!("y")),
88 atom!("infix"),
89 500,
90 atom!("left"),
91 atom!("none"),
92 atom!("false"),
93 ),
94 func!(
95 "op";
96 func!("="; atom!("x"), atom!("y")),
97 atom!("infix"),
98 100,
99 atom!("right"),
100 atom!("none"),
101 atom!("false"),
102 ),
103 func!(
104 "op";
105 func!(
106 "op";
107 atom!("f"),
108 func!("="; atom!("type"), atom!("fun")),
109 func!("="; atom!("prec"), 0),
110 func!("="; atom!("assoc"), atom!("none")),
111 func!("="; atom!("rename_to"), atom!("none")),
112 func!("="; atom!("embed_type"), atom!("false")),
113 ),
114 atom!("fun"),
115 0,
116 atom!("none"),
117 atom!("none"),
118 atom!("false"),
119 ),
120 => arena
121 ];
122 OperDefs::try_from_ops(arena, term).unwrap()
123}
124
125/// Prolog-like term parser with operator precedence and associativity handling.
126///
127/// The [`TermParser`] drives the parsing of Prolog-style terms using the
128/// [`parlex`] SLR(1) runtime library. It builds upon the [`TermLexer`] for tokenization
129/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
130///
131/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
132/// allowing user-defined or default operators to control how expressions are
133/// grouped and nested according to their **fixity**, **precedence**, and
134/// **associativity**.
135///
136/// # Core Components
137/// - [`ParserCtx`] — manages parse states, actions, and reductions generated by
138/// the `aslr` tool.
139/// - [`TermLexer`] — provides token streams of [`TermToken`]s for the parser.
140/// - [`Arena`] — stores terms compactly for minimal heap allocation.
141///
142/// # Typical Workflow
143/// 1. Create a [`TermParser`] from a byte iterator (`try_new`).
144/// 2. (Optionally) extend or redefine operator sets using [`define_opers`].
145/// 3. Call [`try_next_term`] or [`try_collect_terms`] to parse one or more
146/// terms into arena-backed [`Term`]s.
147///
148/// [`Arena`]: arena_terms::Arena
149/// [`Term`]: arena_terms::Term
150/// [`OperDefs`]: crate::oper::OperDefs
151/// [`ParserCtx`]: parlex::ParserCtx
152/// [`TermLexer`]: crate::lexer::TermLexer
153/// [`TermToken`]: crate::lexer::TermToken
154pub struct TermParser<I>
155where
156 I: FusedIterator<Item = u8>,
157{
158 /// The parser context that manages parse state, tables, and token input.
159 ///
160 /// This field wraps the generated `ParserData` and integrates the lexer
161 /// with the runtime parser loop from [`parlex`]. It drives token reading,
162 /// state transitions, and reductions.
163 ctx: ParserCtx<TermLexer<I>, <Self as Parser<Arena>>::ParserData, Arena>,
164
165 /// Stack of intermediate [`Term`] values used for reduction of term sequences.
166 ///
167 /// [`Value::Index`] refers to an entry in this stack, enabling grammar
168 /// actions to compose and reduce sequences of terms into higher-level
169 /// structures during parsing.
170 terms: Vec<Term>,
171}
172
173/// Implementation of [`TermParser`] methods.
174///
175/// This `impl` defines the core construction and execution logic for the
176/// Prolog-like term parser. It provides utilities to initialize a new
177/// parser instance, collect or stream parsed [`Term`] values, dynamically
178/// extend operator definitions, and normalize parsed terms.
179///
180/// The parser integrates with the [`parlex`] runtime library and operates over
181/// tokens produced by the [`TermLexer`], yielding arena-allocated [`Term`]
182/// values suitable for further semantic processing.
183///
184/// # Type Parameters
185/// - `I`: The input source, which must implement [`FusedIterator`] yielding bytes.
186impl<I> TermParser<I>
187where
188 I: FusedIterator<Item = u8>,
189{
190 /// Creates a new [`TermParser`] for the given input stream.
191 ///
192 /// Initializes an internal [`TermLexer`] (with optional operator definitions)
193 /// and wraps it in a [`ParserCtx`].
194 ///
195 /// # Parameters
196 /// - `input`: A fused iterator over bytes to be parsed.
197 /// - `opers`: Optional [`OperDefs`] defining operator precedence and fixity.
198 ///
199 /// # Returns
200 /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
201 ///
202 /// # Errors
203 /// Returns an error if the lexer context cannot be initialized
204 /// or if the generated parser tables fail to load.
205 pub fn try_new(input: I, opers: Option<OperDefs>) -> Result<Self> {
206 let lexer = TermLexer::try_new(input, opers)?;
207 let ctx = ParserCtx::new(lexer);
208 Ok(Self {
209 ctx,
210 terms: Vec::new(),
211 })
212 }
213
214 /// Parses all terms from the input until end-of-stream.
215 ///
216 /// Repeatedly calls [`try_next_term`] until no more terms are available,
217 /// collecting them into a single [`Vec`] of [`Term`]s.
218 ///
219 /// # Returns
220 /// A vector of all successfully parsed [`Term`]s.
221 ///
222 /// # Errors
223 /// Returns an error if any term fails syntactic or semantic validation.
224 pub fn try_collect_terms(&mut self, arena: &mut Arena) -> Result<Vec<Term>> {
225 let mut ts = Vec::new();
226 while let Some(t) = self.try_next_term(arena)? {
227 ts.push(t);
228 }
229 Ok(ts)
230 }
231
232 /// Parses the next term from the input stream.
233 ///
234 /// Returns the next complete [`Term`], or `None` if end-of-input is reached.
235 /// This method performs incremental parsing suitable for stream-based term input.
236 ///
237 /// # Parameters
238 /// - `arena`: Arena for allocating internal term structures.
239 ///
240 /// # Errors
241 /// Returns an error on unexpected tokens, incomplete constructs,
242 /// or invalid operator combinations.
243 #[inline]
244 pub fn try_next_term(&mut self, arena: &mut Arena) -> Result<Option<Term>> {
245 while let Some(tok) = self.try_next(arena)? {
246 match tok.token_id {
247 TokenID::Term => match tok.value {
248 Value::None => {}
249 Value::Term(term) => return Ok(Some(term)),
250 value => bail!("Unexpected token value {:?}", value),
251 },
252 token_id => bail!("Unexpected token id {:?}", token_id),
253 }
254 }
255 Ok(None)
256 }
257
258 /// Defines or extends operator definitions directly from a Prolog-like
259 /// `op/6` term list read from a separate input source.
260 ///
261 /// This allows dynamic addition of new operator fixities and precedence
262 /// rules during runtime parsing.
263 ///
264 /// # Parameters
265 /// - `arena`: Arena allocator used for constructing term structures.
266 /// - `defs_input`: Input byte iterator yielding the operator definition terms.
267 /// - `opers`: Optional initial operator table to extend.
268 /// If `None`, the default operator definitions are used.
269 ///
270 /// # Errors
271 /// Returns an error if parsing the operator term list fails or produces
272 /// an invalid operator specification.
273 pub fn define_opers<J: FusedIterator<Item = u8>>(
274 &mut self,
275 arena: &mut Arena,
276 defs_input: J,
277 opers: Option<OperDefs>,
278 ) -> Result<()> {
279 let opers = match opers {
280 Some(opers) => opers,
281 None => parser_oper_defs(arena),
282 };
283
284 let defs_lexer = TermLexer::try_new(defs_input, Some(opers))?;
285 let defs_ctx = ParserCtx::new(defs_lexer);
286 let mut defs_parser = TermParser {
287 ctx: defs_ctx,
288 terms: Vec::new(),
289 };
290 while let Some(term) = defs_parser.try_next_term(arena)? {
291 log::trace!(
292 "Stats: {:?}, {:?}",
293 defs_parser.ctx().lexer.stats(),
294 defs_parser.stats()
295 );
296 defs_parser
297 .ctx_mut()
298 .lexer
299 .opers
300 .define_opers(arena, term)?;
301 }
302 let defs_opers = std::mem::take(&mut defs_parser.ctx_mut().lexer.opers);
303 self.ctx_mut().lexer.opers = defs_opers;
304
305 Ok(())
306 }
307
308 /// Normalizes a parsed term using its operator definition.
309 ///
310 /// This process transforms terms according to their declared fixity,
311 /// applying named default arguments and other attributes specified
312 /// in the corresponding operator definition.
313 ///
314 /// # Parameters
315 /// - `arena`: Arena used to store normalized term structures.
316 /// - `term`: The parsed term to normalize.
317 /// - `fixity`: Operator fixity (`fun`, `prefix`, `infix`, or `postfix`).
318 /// - `op_tab_index`: Optional index into the operator definition table, if the
319 /// term corresponds to a defined operator.
320 ///
321 /// # Returns
322 /// A normalized [`Term`] allocated in the given arena, ready for evaluation or
323 /// further semantic analysis.
324 ///
325 /// # Errors
326 /// Returns an error if normalization fails due to invalid fixity, mismatched
327 /// arity, or inconsistent operator metadata.
328 fn normalize_term(
329 &self,
330 arena: &mut Arena,
331 term: Term,
332 fixity: Fixity,
333 op_tab_index: Option<usize>,
334 ) -> Result<Term> {
335 match self.ctx().lexer.opers.get(op_tab_index)[fixity] {
336 Some(ref op_def) => {
337 let (functor, vs) = match term.view(arena)? {
338 View::Atom(_) => (term, &[] as &[Term]),
339 View::Func(_, functor, args) => {
340 if args.is_empty() {
341 bail!("invalid Func");
342 }
343 (*functor, args)
344 }
345 _ => {
346 return Ok(term);
347 }
348 };
349 let name = functor.atom_name(arena)?;
350
351 let n_required_args = OperDef::required_arity(fixity);
352 if vs.len() < n_required_args {
353 bail!(
354 "missing {} required arguments in term {:?}",
355 n_required_args - vs.len(),
356 name
357 );
358 }
359
360 let args = &op_def.args;
361 let mut xs: Vec<Option<Term>> = vec![None; args.len()];
362
363 for (i, value) in vs.iter().enumerate() {
364 if i < n_required_args {
365 xs[i] = Some(*value);
366 } else {
367 match value.view(arena)? {
368 View::Func(ar, functor, vs)
369 if vs.len() == 2 && functor.atom_name(ar)? == "=" =>
370 {
371 let arg_name = vs[0].atom_name(arena)?;
372
373 if let Some(pos) = args.iter().position(|x| x.name == arg_name) {
374 if xs[pos].is_none() {
375 xs[pos] = Some(vs[1]);
376 } else {
377 bail!(
378 "cannot redefine argument {:?} at position {} in {:?}",
379 arg_name,
380 pos,
381 name
382 );
383 }
384 } else {
385 bail!("invalid argument name {:?} in {:?}", arg_name, name);
386 }
387 }
388 _ => {
389 if xs[i].is_none() {
390 xs[i] = Some(*value);
391 } else {
392 bail!(
393 "cannot redefine argument {:?} at position {} in {:?}",
394 args[i].name,
395 i,
396 name
397 );
398 }
399 }
400 }
401 }
402 }
403
404 let vs: Option<Vec<_>> = xs
405 .into_iter()
406 .enumerate()
407 .map(|(i, x)| x.or(args[i].default))
408 .collect();
409 let mut vs = match vs {
410 Some(vs) => vs,
411 None => bail!("missing arguments in {:?}", name),
412 };
413
414 let rename_to = match op_def.rename_to {
415 Some(rename_to) => rename_to,
416 None => functor,
417 };
418
419 if op_def.embed_fixity {
420 vs.insert(0, arena.atom(String::from(fixity)));
421 }
422
423 if vs.is_empty() {
424 Ok(rename_to)
425 } else {
426 Ok(arena.funcv(std::iter::once(&rename_to).chain(vs.iter()))?)
427 }
428 }
429 None => match fixity {
430 Fixity::Fun => Ok(term),
431 _ => bail!("missing opdef for fixity {:?}", fixity),
432 },
433 }
434 }
435}
436
437/// Implements the [`Parser`] trait for [`TermParser`], integrating with the **parlex** runtime library.
438///
439/// This binding connects the generated SLR parser tables (`ParData`) with the concrete
440/// term parser. It exposes the parser context, statistics, and the key callbacks
441/// required during shift/reduce parsing: ambiguity resolution and reductions.
442///
443/// # Associated Types
444/// - `Lexer` — The input lexer producing [`TermToken`]s (`TermLexer<I>`).
445/// - `ParserData` — The generated parser tables and rule enums (`ParData`).
446impl<I> Parser<Arena> for TermParser<I>
447where
448 I: FusedIterator<Item = u8>,
449{
450 type Lexer = TermLexer<I>;
451 type ParserData = ParData;
452
453 /// Returns a shared reference to the internal [`ParserCtx`].
454 fn ctx(&self) -> &ParserCtx<Self::Lexer, Self::ParserData, Arena> {
455 &self.ctx
456 }
457
458 /// Returns a mutable reference to the internal [`ParserCtx`].
459 fn ctx_mut(&mut self) -> &mut ParserCtx<Self::Lexer, Self::ParserData, Arena> {
460 &mut self.ctx
461 }
462
463 /// Returns cumulative parsing statistics (tokens, shifts, reductions, ambiguities).
464 fn stats(&self) -> ParserStats {
465 self.ctx().stats.clone()
466 }
467
468 /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
469 ///
470 /// Given an ambiguity identifier and the lookahead token `tok2`, this method
471 /// chooses the appropriate parser action (shift or reduce) according to the
472 /// operator precedence and associativity rules.
473 ///
474 /// # Parameters
475 /// - `_arena`: Arena used to allocate or inspect terms.
476 /// - `ambig`: The generated ambiguity ID (`AmbigID`).
477 /// - `tok2`: The lookahead token at the ambiguity point.
478 ///
479 /// # Returns
480 /// The selected parser [`Action`] to disambiguate the current state.
481 ///
482 /// # Errors
483 /// Returns an error if the ambiguity cannot be resolved consistently.
484 fn resolve_ambiguity(
485 &mut self,
486 _arena: &mut Arena,
487 ambig: AmbigID,
488 tok2: &TermToken,
489 ) -> Result<Action> {
490 let ambigs = ParData::lookup_ambig(ambig);
491
492 let shift_action = ambigs[0];
493 assert!(matches!(shift_action, Action::Shift(_)));
494
495 let reduce_action = ambigs[1];
496 assert!(matches!(reduce_action, Action::Reduce(_)));
497
498 let Action::Reduce(prod) = reduce_action else {
499 bail!("can't match reduce action")
500 };
501
502 log::trace!(
503 "Conflict between reducing {:?} and shifting {:?}",
504 prod,
505 tok2
506 );
507
508 let (fixity1, tok1) = match prod {
509 ProdID::Infix1 => {
510 // Expr -> Expr atomOper Expr
511 (Fixity::Infix, self.tokens_peek(1))
512 }
513 ProdID::Infix2 => {
514 // Expr -> Expr funcOper Seq ) Expr
515 (Fixity::Infix, self.tokens_peek(3))
516 }
517 ProdID::Prefix1 => {
518 // Expr -> atomOper Expr
519 (Fixity::Prefix, self.tokens_peek(1))
520 }
521 ProdID::Prefix2 => {
522 // Expr -> funcOper Seq ) Expr
523 (Fixity::Prefix, self.tokens_peek(3))
524 }
525 ProdID::Postfix1 => {
526 // Expr -> Expr atomOper
527 (Fixity::Postfix, self.tokens_peek(0))
528 }
529 ProdID::Postfix2 => {
530 // Expr -> Expr funcOper Seq )
531 (Fixity::Postfix, self.tokens_peek(2))
532 }
533 _ => bail!(
534 "unexpected conflict: reduction of {:?} with shifting token {:?}",
535 prod,
536 tok2
537 ),
538 };
539
540 let op_tab1 = self.ctx().lexer.opers.get(tok1.op_tab_index);
541 let op_tab2 = self.ctx().lexer.opers.get(tok2.op_tab_index);
542
543 assert!(op_tab1.is_oper());
544
545 if op_tab2.is_oper() {
546 let op_def1 = match op_tab1[fixity1] {
547 Some(ref op_def1) => op_def1,
548 None => return Ok(shift_action),
549 };
550
551 let prec1 = op_def1.prec;
552 let assoc1 = op_def1.assoc;
553
554 let min_prec2 = std::cmp::min(
555 op_tab2[Fixity::Infix]
556 .as_ref()
557 .map(|x| x.prec)
558 .unwrap_or(MAX_OPER_PREC),
559 op_tab2[Fixity::Postfix]
560 .as_ref()
561 .map(|x| x.prec)
562 .unwrap_or(MAX_OPER_PREC),
563 );
564 let max_prec2 = std::cmp::max(
565 op_tab2[Fixity::Infix]
566 .as_ref()
567 .map(|x| x.prec)
568 .unwrap_or(MIN_OPER_PREC),
569 op_tab2[Fixity::Postfix]
570 .as_ref()
571 .map(|x| x.prec)
572 .unwrap_or(MIN_OPER_PREC),
573 );
574
575 if prec1 > min_prec2 {
576 Ok(reduce_action)
577 } else if prec1 < max_prec2 {
578 Ok(shift_action)
579 } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
580 if assoc1 == Assoc::None {
581 bail!(
582 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
583 tok1
584 );
585 }
586 if op_tab2[Fixity::Infix]
587 .as_ref()
588 .is_some_and(|x| x.assoc == Assoc::None)
589 || op_tab2[Fixity::Postfix]
590 .as_ref()
591 .is_some_and(|x| x.assoc == Assoc::None)
592 {
593 bail!(
594 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
595 tok2
596 );
597 }
598 if op_tab2[Fixity::Infix]
599 .as_ref()
600 .is_some_and(|x| x.assoc != assoc1)
601 || op_tab2[Fixity::Postfix]
602 .as_ref()
603 .is_some_and(|x| x.assoc != assoc1)
604 {
605 bail!(
606 "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
607 tok1,
608 tok2
609 );
610 } else {
611 if assoc1 == Assoc::Left {
612 Ok(reduce_action)
613 } else {
614 Ok(shift_action)
615 }
616 }
617 } else {
618 bail!(
619 "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
620 tok1,
621 tok2
622 );
623 }
624 } else {
625 Ok(shift_action)
626 }
627 }
628
629 /// Performs a grammar reduction for the given production rule.
630 ///
631 /// Applies the semantic action for `prod`, typically constructing or
632 /// normalizing an arena-backed [`Term`], and pushes the resulting token
633 /// onto the parser’s value stack.
634 ///
635 /// # Parameters
636 /// - `arena`: Arena used to allocate or inspect terms.
637 /// - `prod`: The production being reduced (`ProdID`).
638 /// - `token`: The lookahead token (normally not used).
639 ///
640 /// # Errors
641 /// Returns an error if the reduction fails due to arity mismatches,
642 /// invalid operator metadata, or inconsistent stack state.
643 fn reduce(&mut self, arena: &mut Arena, prod: ProdID, token: &TermToken) -> Result<()> {
644 match prod {
645 ProdID::Start => {
646 // Accept - does not get reduced
647 unreachable!()
648 }
649
650 ProdID::Term1 => {
651 // Term -> Expr
652 let mut expr_tok = self.tokens_pop()?;
653 expr_tok.token_id = TokenID::Term;
654 self.tokens_push(expr_tok);
655 }
656
657 ProdID::Term2 => {
658 // Term -> Expr .
659 self.tokens_pop()?;
660 let mut expr_tok = self.tokens_pop()?;
661 expr_tok.token_id = TokenID::Term;
662 self.tokens_push(expr_tok);
663 }
664
665 ProdID::Term3 => {
666 // Term ->
667 self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
668 }
669
670 ProdID::Term4 => {
671 // Term -> .
672 self.tokens_pop()?;
673 self.tokens_push(TermToken::new(TokenID::Term, Value::None, token.line_no));
674 }
675
676 ProdID::Func => {
677 // Expr -> func Seq )
678 self.tokens_pop()?;
679 let index = usize::try_from(self.tokens_pop()?.value)?;
680 let func_tok = self.tokens_pop()?;
681 let line_no = func_tok.line_no;
682 let op_tab_index = func_tok.op_tab_index;
683 let functor = Term::try_from(func_tok.value)?;
684
685 let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
686 let term = arena.funcv(vs)?;
687 self.terms.truncate(index);
688
689 let term = self.normalize_term(arena, term, Fixity::Fun, op_tab_index)?;
690
691 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
692 }
693
694 ProdID::List => {
695 // Expr -> [ Seq ]
696 self.tokens_pop()?;
697 let seq_tok = self.tokens_pop()?;
698 let left_brack_tok = self.tokens_pop()?;
699 let index = usize::try_from(seq_tok.value)?;
700
701 let term = arena.list(&self.terms[index..]);
702 self.terms.truncate(index);
703
704 self.tokens_push(TermToken::new(
705 TokenID::Expr,
706 Value::Term(term),
707 left_brack_tok.line_no,
708 ));
709 }
710
711 ProdID::Nil => {
712 // Expr -> [ ]
713 self.tokens_pop()?;
714 let left_brack_tok = self.tokens_pop()?;
715 self.tokens_push(TermToken::new(
716 TokenID::Expr,
717 Value::Term(Term::NIL),
718 left_brack_tok.line_no,
719 ));
720 }
721
722 ProdID::List2 => {
723 // Expr -> [ Seq | Expr ]
724 self.tokens_pop()?;
725 let tail = Term::try_from(self.tokens_pop()?.value)?;
726 self.tokens_pop()?;
727 let index = usize::try_from(self.tokens_pop()?.value)?;
728 let left_brack_tok = self.tokens_pop()?;
729
730 let term = arena.listc(&self.terms[index..], tail);
731 self.terms.truncate(index);
732
733 self.tokens_push(TermToken::new(
734 TokenID::Expr,
735 Value::Term(term),
736 left_brack_tok.line_no,
737 ));
738 }
739
740 ProdID::Tuple => {
741 // Expr -> ( Seq )
742 self.tokens_pop()?;
743 let seq_tok = self.tokens_pop()?;
744 let left_paren_tok = self.tokens_pop()?;
745
746 let index = usize::try_from(seq_tok.value)?;
747
748 // Arena terms parser does not currently support unary tuples.
749 // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
750 let vs = &self.terms[index..];
751 let term = if vs.len() == 1 {
752 vs[0]
753 } else {
754 arena.tuple(vs)
755 };
756 self.terms.truncate(index);
757
758 self.tokens_push(TermToken::new(
759 TokenID::Expr,
760 Value::Term(term),
761 left_paren_tok.line_no,
762 ));
763 }
764
765 ProdID::Unit => {
766 // Expr -> ( )
767 self.tokens_pop()?;
768 let left_paren_tok = self.tokens_pop()?;
769 self.tokens_push(TermToken::new(
770 TokenID::Expr,
771 Value::Term(Term::UNIT),
772 left_paren_tok.line_no,
773 ));
774 }
775
776 ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
777 // Expr -> xxx
778 let mut tok = self.tokens_pop()?;
779 tok.token_id = TokenID::Expr;
780 self.tokens_push(tok);
781 }
782
783 ProdID::Atom => {
784 // Expr -> atom
785 let atom_tok = self.tokens_pop()?;
786 let line_no = atom_tok.line_no;
787 let op_tab_index = atom_tok.op_tab_index;
788
789 let atom = Term::try_from(atom_tok.value)?;
790
791 let term = self.normalize_term(arena, atom, Fixity::Fun, op_tab_index)?;
792
793 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
794 }
795
796 ProdID::Infix1 => {
797 // Expr -> Expr atomOper Expr
798 let expr2_tok = self.tokens_pop()?;
799 let oper_tok = self.tokens_pop()?;
800 let expr1_tok = self.tokens_pop()?;
801 let line_no = expr1_tok.line_no;
802 let op_tab_index = oper_tok.op_tab_index;
803
804 let expr2 = Term::try_from(expr2_tok.value)?;
805 let oper = Term::try_from(oper_tok.value)?;
806 let expr1 = Term::try_from(expr1_tok.value)?;
807
808 let term = arena.funcv([oper, expr1, expr2])?;
809 let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
810
811 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
812 }
813
814 ProdID::Infix2 => {
815 // Expr -> Expr func Seq ) Expr
816 let expr2_tok = self.tokens_pop()?;
817 self.tokens_pop()?;
818 let index = usize::try_from(self.tokens_pop()?.value)?;
819 let oper_tok = self.tokens_pop()?;
820 let expr1_tok = self.tokens_pop()?;
821 let line_no = expr1_tok.line_no;
822 let op_tab_index = oper_tok.op_tab_index;
823
824 let expr2 = Term::try_from(expr2_tok.value)?;
825 let oper = Term::try_from(oper_tok.value)?;
826 let expr1 = Term::try_from(expr1_tok.value)?;
827
828 let xs = [oper, expr1, expr2];
829 let vs = xs.iter().chain(self.terms[index..].iter());
830 let term = arena.funcv(vs)?;
831 self.terms.truncate(index);
832
833 let term = self.normalize_term(arena, term, Fixity::Infix, op_tab_index)?;
834
835 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
836 }
837
838 ProdID::Prefix1 => {
839 // Expr -> atom Expr
840 let expr1_tok = self.tokens_pop()?;
841 let oper_tok = self.tokens_pop()?;
842 let line_no = oper_tok.line_no;
843 let op_tab_index = oper_tok.op_tab_index;
844
845 let expr1 = Term::try_from(expr1_tok.value)?;
846 let oper = Term::try_from(oper_tok.value)?;
847
848 let term = match oper.view(arena)? {
849 // Arena terms parser currently gives special treatment to unary minus
850 // on integer and real literals (it directly negates them).
851 // TODO: Consider handling minus at the lexical level.
852 View::Atom(s)
853 if s == "-"
854 && matches!(expr1.view(arena)?, View::Int(_) | View::Real(_)) =>
855 {
856 match expr1.view(arena)? {
857 View::Int(i) => arena.int(-i),
858 View::Real(r) => arena.real(-r),
859 _ => unreachable!(),
860 }
861 }
862 _ => {
863 let term = arena.funcv([oper, expr1])?;
864 self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?
865 }
866 };
867
868 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
869 }
870
871 ProdID::Prefix2 => {
872 // Expr -> func Seq ) Expr
873 let expr1_tok = self.tokens_pop()?;
874 self.tokens_pop()?;
875 let index = usize::try_from(self.tokens_pop()?.value)?;
876 let oper_tok = self.tokens_pop()?;
877 let line_no = oper_tok.line_no;
878 let op_tab_index = oper_tok.op_tab_index;
879
880 let oper = Term::try_from(oper_tok.value)?;
881 let expr1 = Term::try_from(expr1_tok.value)?;
882
883 let xs = [oper, expr1];
884 let vs = xs.iter().chain(self.terms[index..].iter());
885 let term = arena.funcv(vs)?;
886 self.terms.truncate(index);
887
888 let term = self.normalize_term(arena, term, Fixity::Prefix, op_tab_index)?;
889
890 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
891 }
892
893 ProdID::Postfix1 => {
894 // Expr -> Expr atomOper
895 let oper_tok = self.tokens_pop()?;
896 let expr1_tok = self.tokens_pop()?;
897 let line_no = expr1_tok.line_no;
898 let op_tab_index = oper_tok.op_tab_index;
899
900 let oper = Term::try_from(oper_tok.value)?;
901 let expr1 = Term::try_from(expr1_tok.value)?;
902
903 let term = arena.funcv([oper, expr1])?;
904 let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
905
906 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
907 }
908
909 ProdID::Postfix2 => {
910 // Expr -> Expr func Seq )
911 self.tokens_pop()?;
912 let index = usize::try_from(self.tokens_pop()?.value)?;
913 let oper_tok = self.tokens_pop()?;
914 let expr1_tok = self.tokens_pop()?;
915 let line_no = expr1_tok.line_no;
916 let op_tab_index = oper_tok.op_tab_index;
917
918 let oper = Term::try_from(oper_tok.value)?;
919 let expr1 = Term::try_from(expr1_tok.value)?;
920
921 let xs = [oper, expr1];
922 let vs = xs.iter().chain(self.terms[index..].iter());
923 let term = arena.funcv(vs)?;
924 self.terms.truncate(index);
925
926 let term = self.normalize_term(arena, term, Fixity::Postfix, op_tab_index)?;
927
928 self.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), line_no));
929 }
930
931 ProdID::Seq1 => {
932 // Seq -> BareSeq
933 let mut bare_seq_tok = self.tokens_pop()?;
934 bare_seq_tok.token_id = TokenID::Seq;
935 self.tokens_push(bare_seq_tok);
936 }
937
938 ProdID::Seq2 => {
939 // Seq -> BareSeq ,
940 self.tokens_pop()?;
941 let mut bare_seq_tok = self.tokens_pop()?;
942 bare_seq_tok.token_id = TokenID::Seq;
943 self.tokens_push(bare_seq_tok);
944 }
945
946 ProdID::BareSeq1 => {
947 // BareSeq -> Expr
948 let expr_tok = self.tokens_pop()?;
949 let line_no = expr_tok.line_no;
950 let expr = Term::try_from(expr_tok.value)?;
951
952 let index = self.terms.len();
953 self.terms.push(expr);
954
955 self.tokens_push(TermToken::new(
956 TokenID::BareSeq,
957 Value::Index(index),
958 line_no,
959 ));
960 }
961
962 ProdID::BareSeq2 => {
963 // BareSeq -> BareSeq , Expr
964 let expr_tok = self.tokens_pop()?;
965 let expr = Term::try_from(expr_tok.value)?;
966 self.tokens_pop()?;
967
968 self.terms.push(expr);
969 }
970 }
971 Ok(())
972 }
973}
974
975/// Unit tests for the [`TermParser`] implementation.
976#[cfg(test)]
977mod tests {
978 use super::*;
979
980 const SAMPLE_DEFS: &str = r#"[
981op(==(x,y),infix,350,none),
982op(!=(x,y),infix,350,none),
983op( <(x,y),infix,350,none),
984op( >(x,y),infix,350,none),
985op(<=(x,y),infix,350,none),
986op(>=(x,y),infix,350,none),
987op('+'(x,y),infix,380,left),
988op('-'(x,y),infix,380,left),
989op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
990op('*'(x,y),infix,400,left),
991op('/'(x,y),infix,400,left),
992op('+'(x),prefix,800,right),
993op(and(x,y),infix,300,left),
994op(or(x,y),infix,250,left),
995op(not(x),prefix,800,right),
996]"#;
997
998 fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Result<Vec<Term>> {
999 let mut parser = TermParser::try_new(s.bytes().fuse(), Some(parser_oper_defs(arena)))?;
1000 if let Some(defs) = defs {
1001 parser.define_opers(arena, defs.bytes().fuse(), None)?;
1002 }
1003 parser.try_collect_terms(arena)
1004 }
1005
1006 #[test]
1007 fn one_term() {
1008 let _ = env_logger::builder().is_test(true).try_init();
1009 let arena = &mut Arena::new();
1010 let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .").unwrap();
1011 dbg!(&ts);
1012 let s = format!("{}", ts[0].display(arena));
1013 dbg!(&s);
1014 assert_eq!(ts.len(), 1);
1015 assert_eq!(s, "'<='('*'(2, 2), 5)");
1016 }
1017
1018 #[test]
1019 #[should_panic]
1020 fn missing_ops() {
1021 let arena = &mut Arena::new();
1022 let _ts = parse(arena, None, "2 * 2 <= 5").unwrap();
1023 }
1024
1025 #[test]
1026 fn more_complicated_term() {
1027 let _ = env_logger::builder().is_test(true).try_init();
1028 let arena = &mut Arena::new();
1029 let x = "(
1030[(1, 2) | unit] ++ foo(baz(1e-9)),
1031date{2025-09-30T18:24:22.154Z},
1032\"aaa{
10331 + 2
1034}bbb{
10353 * 4
1036}ccc\",
1037{player = {pos = {x = 0, y = 0}, health = 100}},
1038)";
1039 let ts = parse(arena, Some(SAMPLE_DEFS), x).unwrap();
1040 let s = format!("{}", ts[0].display(arena));
1041 assert_eq!(ts.len(), 1);
1042 assert_eq!(
1043 s,
1044 "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = {pos = {x = 0, y = 0}, health = 100}\")"
1045 );
1046 }
1047}