arena_terms_parser/parser.rs
1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!
8//! The parser consumes tokens produced by [`TermLexer`] and uses a mutable
9//! [`Arena`] as shared context to construct arena-allocated [`Term`] values
10//! (from the [`arena_terms`] crate) representing atoms, numbers, compound terms,
11//! lists, tuples, and other structures.
12//!
13//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
14//!
15//! [`TermParser`]: struct.TermParser
16//! [`TermLexer`]: crate::lexer::TermLexer
17//! [`TermToken`]: crate::lexer::TermToken
18//! [`OperDefs`]: crate::oper::OperDefs
19//! [`arena_terms`]: https://crates.io/crates/arena-terms
20//! [`aslr`]: https://crates.io/crates/parlex-gen
21
22use crate::{TermLexer, TermToken, TokenID, Value};
23use arena_terms::{Arena, Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, Term, View};
24use parlex::{
25 LexerStats, ParlexError, Parser, ParserAction, ParserData, ParserDriver, ParserStats, Token,
26};
27use parser_data::{AmbigID, ParData, ProdID, StateID};
28use std::marker::PhantomData;
29use try_next::TryNextWithContext;
30
31/// Includes the generated SLR parser tables and definitions.
32///
33/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
34/// during the build process. It defines the parsing automaton, rule metadata,
35/// and associated enum types used by the [`TermParser`].
36pub mod parser_data {
37 include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
38}
39
40/// A driver that defines semantic actions for the term parser.
41///
42/// The [`TermParserDriver`] type implements [`ParserDriver`] and acts as the
43/// bridge between the parser engine ([`Parser`]) and calculator-specific
44/// semantic logic.
45///
46/// It provides the behavior for grammar reductions and ambiguity resolution
47/// during parsing. Each reduction corresponds to a grammar production rule
48/// in [`ParData`], and is responsible for building a term.
49///
50/// # Type Parameters
51///
52/// - `I`: The input source (the lexer) that yields [`TermToken`]s. Must implement
53/// [`TryNextWithContext<Arena, Item = TermToken>`].
54///
55/// # Associated Types
56///
57/// - `ParserData = ParData`:
58/// Generated parser metadata containing grammar rules, production IDs,
59/// and ambiguity identifiers.
60/// - `Token = TermToken`:
61/// The token type produced by the lexer and consumed by this parser.
62/// - `Parser = Parser<I, Self, Arena>`:
63/// The parser engine parameterized by this driver and context.
64/// - `Error = TermParserError`:
65/// Unified error type propagated during parsing.
66/// - `Context = Arena`:
67/// Externally supplied context.
68///
69/// # Responsibilities
70///
71/// The parser driver performs calculator-specific actions:
72///
73/// - **`resolve_ambiguity`** — invoked when the grammar allows multiple valid
74/// interpretations of a token sequence. The driver chooses which parse path
75/// to follow by returning an appropriate [`ParserAction`].
76/// - **`reduce`** — executed when a grammar production completes. The driver
77/// can perform semantic actions such as arithmetic evaluation, updating the
78/// symbol table, or producing intermediate values.
79pub struct TermParserDriver<I> {
80 /// Marker to associate the driver with its input type `I`.
81 _marker: PhantomData<I>,
82
83 /// Stack of intermediate [`Term`] values used for reduction of term sequences.
84 ///
85 /// [`Value::Index`] refers to an entry in this stack, enabling grammar
86 /// actions to compose and reduce sequences of terms into higher-level
87 /// structures during parsing.
88 terms: Vec<Term>,
89}
90
91impl<I> ParserDriver for TermParserDriver<I>
92where
93 I: TryNextWithContext<Arena, LexerStats, Item = TermToken, Error: std::fmt::Display + 'static>,
94{
95 /// Parser metadata generated from the calculator grammar.
96 type ParserData = ParData;
97
98 /// Token type consumed by the parser.
99 type Token = TermToken;
100
101 /// Concrete parser engine type.
102 type Parser = Parser<I, Self, Self::Context>;
103
104 /// Context (symbol table or shared state).
105 type Context = Arena;
106
107 /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
108 ///
109 /// Given an ambiguity identifier and the lookahead token `tok2`, this method
110 /// chooses the appropriate parser action (shift or reduce) according to the
111 /// operator precedence and associativity rules.
112 ///
113 /// # Parameters
114 /// - `_arena`: Arena used to allocate or inspect terms.
115 /// - `ambig`: The generated ambiguity ID (`AmbigID`).
116 /// - `tok2`: The lookahead token at the ambiguity point.
117 ///
118 /// # Returns
119 /// The selected parser [`Action`] to disambiguate the current state.
120 ///
121 /// # Errors
122 /// Returns an error if the ambiguity cannot be resolved consistently.
123 ///
124 /// # Notes
125 /// This grammar contains only **Shift/Reduce** conflicts — cases where
126 /// the parser can either:
127 /// - **Reduce** using a completed production rule, or
128 /// - **Shift** the next incoming token (`tok2`).
129 ///
130 /// Other types of conflicts (such as **Reduce/Reduce**) are much more
131 /// difficult to handle programmatically and usually require modifying
132 /// the grammar itself to eliminate ambiguity.
133 fn resolve_ambiguity(
134 &mut self,
135 parser: &mut Self::Parser,
136 arena: &mut Self::Context,
137 ambig: <Self::ParserData as ParserData>::AmbigID,
138 tok2: &Self::Token,
139 ) -> Result<ParserAction<StateID, ProdID, AmbigID>, ParlexError> {
140 let ambigs = ParData::lookup_ambig(ambig);
141 let shift_action = ambigs[0];
142 let ParserAction::Shift(_) = shift_action else {
143 panic!("expected shift");
144 };
145 let reduce_action = ambigs[1];
146 let ParserAction::Reduce(prod_id) = reduce_action else {
147 panic!("expected reduce");
148 };
149
150 log::trace!(
151 "Conflict between reducing {:?} and shifting {:?}",
152 prod_id,
153 tok2
154 );
155
156 let (fixity1, tok1) = match prod_id {
157 ProdID::Infix1 => {
158 // Expr -> Expr atomOper Expr
159 (Fixity::Infix, parser.tokens_peek(1))
160 }
161 ProdID::Infix2 => {
162 // Expr -> Expr funcOper Seq ) Expr
163 (Fixity::Infix, parser.tokens_peek(3))
164 }
165 ProdID::Prefix1 => {
166 // Expr -> atomOper Expr
167 (Fixity::Prefix, parser.tokens_peek(1))
168 }
169 ProdID::Prefix2 => {
170 // Expr -> funcOper Seq ) Expr
171 (Fixity::Prefix, parser.tokens_peek(3))
172 }
173 ProdID::Postfix1 => {
174 // Expr -> Expr atomOper
175 (Fixity::Postfix, parser.tokens_peek(0))
176 }
177 ProdID::Postfix2 => {
178 // Expr -> Expr funcOper Seq )
179 (Fixity::Postfix, parser.tokens_peek(2))
180 }
181 _ => {
182 return Err(ParlexError {
183 message: format!(
184 "unexpected conflict: reduction of {:?} with shifting token {:?}",
185 prod_id, tok2
186 ),
187 span: tok2.span(),
188 });
189 }
190 };
191
192 let op_tab1 = arena.get_oper(tok1.op_tab_index);
193 let op_tab2 = arena.get_oper(tok2.op_tab_index);
194
195 assert!(op_tab1.is_oper());
196
197 if op_tab2.is_oper() {
198 let op_def1 = match op_tab1[fixity1] {
199 Some(ref op_def1) => op_def1,
200 None => return Ok(shift_action),
201 };
202
203 let prec1 = op_def1.prec;
204 let assoc1 = op_def1.assoc;
205
206 let min_prec2 = std::cmp::min(
207 op_tab2[Fixity::Infix]
208 .as_ref()
209 .map(|x| x.prec)
210 .unwrap_or(MAX_OPER_PREC),
211 op_tab2[Fixity::Postfix]
212 .as_ref()
213 .map(|x| x.prec)
214 .unwrap_or(MAX_OPER_PREC),
215 );
216 let max_prec2 = std::cmp::max(
217 op_tab2[Fixity::Infix]
218 .as_ref()
219 .map(|x| x.prec)
220 .unwrap_or(MIN_OPER_PREC),
221 op_tab2[Fixity::Postfix]
222 .as_ref()
223 .map(|x| x.prec)
224 .unwrap_or(MIN_OPER_PREC),
225 );
226
227 if prec1 > min_prec2 {
228 Ok(reduce_action)
229 } else if prec1 < max_prec2 {
230 Ok(shift_action)
231 } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
232 if assoc1 == Assoc::None {
233 return Err(ParlexError {
234 message: format!(
235 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
236 tok1
237 ),
238 span: tok2.span(),
239 });
240 }
241 if op_tab2[Fixity::Infix]
242 .as_ref()
243 .is_some_and(|x| x.assoc == Assoc::None)
244 || op_tab2[Fixity::Postfix]
245 .as_ref()
246 .is_some_and(|x| x.assoc == Assoc::None)
247 {
248 return Err(ParlexError {
249 message: format!(
250 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
251 tok2
252 ),
253 span: tok2.span(),
254 });
255 }
256 if op_tab2[Fixity::Infix]
257 .as_ref()
258 .is_some_and(|x| x.assoc != assoc1)
259 || op_tab2[Fixity::Postfix]
260 .as_ref()
261 .is_some_and(|x| x.assoc != assoc1)
262 {
263 return Err(ParlexError {
264 message: format!(
265 "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
266 tok1, tok2
267 ),
268 span: tok2.span(),
269 });
270 } else {
271 if assoc1 == Assoc::Left {
272 Ok(reduce_action)
273 } else {
274 Ok(shift_action)
275 }
276 }
277 } else {
278 return Err(ParlexError {
279 message: format!(
280 "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
281 tok1, tok2
282 ),
283 span: tok2.span(),
284 });
285 }
286 } else {
287 Ok(shift_action)
288 }
289 }
290
291 /// Performs a grammar reduction for the given production rule.
292 ///
293 /// Applies the semantic action for `prod_id`, typically constructing or
294 /// normalizing an arena-backed [`Term`], and pushes the resulting token
295 /// onto the parser’s value stack.
296 ///
297 /// # Parameters
298 /// - `arena`: Arena used to allocate or inspect terms.
299 /// - `prod_id`: The production being reduced (`ProdID`).
300 /// - `token`: The lookahead token (normally not used).
301 ///
302 /// # Errors
303 /// Returns an error if the reduction fails due to arity mismatches,
304 /// invalid operator metadata, or inconsistent stack state.
305 fn reduce(
306 &mut self,
307 parser: &mut Self::Parser,
308 arena: &mut Self::Context,
309 prod_id: <Self::ParserData as ParserData>::ProdID,
310 token: &Self::Token,
311 ) -> Result<(), ParlexError> {
312 match prod_id {
313 ProdID::Start => {
314 // Accept - does not get reduced
315 unreachable!()
316 }
317
318 ProdID::Term1 => {
319 // Term -> Expr
320 let mut expr_tok = parser.tokens_pop();
321 expr_tok.token_id = TokenID::Term;
322 parser.tokens_push(expr_tok);
323 }
324
325 ProdID::Term2 => {
326 // Term -> Expr .
327 let dot = parser.tokens_pop();
328 let mut expr_tok = parser.tokens_pop();
329 expr_tok.token_id = TokenID::Term;
330 expr_tok.merge_span(&dot);
331 parser.tokens_push(expr_tok);
332 }
333
334 ProdID::Term3 => {
335 // Term ->
336 parser.tokens_push(TermToken::new(TokenID::Term, Value::None, token.span()));
337 }
338
339 ProdID::Term4 => {
340 // Term -> .
341 let dot = parser.tokens_pop();
342 parser.tokens_push(TermToken::new(TokenID::Term, Value::None, dot.span()));
343 }
344
345 ProdID::Func => {
346 // Expr -> func Seq )
347 let right_paren = parser.tokens_pop();
348 let index = usize::try_from(parser.tokens_pop().value)?;
349 let mut func_tok = parser.tokens_pop();
350 func_tok.merge_span(&right_paren);
351 let span = func_tok.span();
352 let op_tab_index = func_tok.op_tab_index;
353 let functor = Term::try_from(func_tok.value)?;
354
355 let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
356 let term = arena
357 .funcv(vs)
358 .map_err(|e| ParlexError::from_err(e, span))?;
359 self.terms.truncate(index);
360
361 let term = arena
362 .normalize_term(term, Fixity::Fun, op_tab_index)
363 .map_err(|e| ParlexError::from_err(e, span))?;
364
365 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
366 }
367
368 ProdID::List => {
369 // Expr -> [ Seq ]
370 let right_brack_tok = parser.tokens_pop();
371 let seq_tok = parser.tokens_pop();
372 let mut left_brack_tok = parser.tokens_pop();
373 left_brack_tok.merge_span(&right_brack_tok);
374 let index = usize::try_from(seq_tok.value)?;
375
376 let term = arena.list(&self.terms[index..]);
377 self.terms.truncate(index);
378
379 parser.tokens_push(TermToken::new(
380 TokenID::Expr,
381 Value::Term(term),
382 left_brack_tok.span(),
383 ));
384 }
385
386 ProdID::Nil => {
387 // Expr -> [ ]
388 let right_brack_tok = parser.tokens_pop();
389 let mut left_brack_tok = parser.tokens_pop();
390 left_brack_tok.merge_span(&right_brack_tok);
391 parser.tokens_push(TermToken::new(
392 TokenID::Expr,
393 Value::Term(Term::NIL),
394 left_brack_tok.span(),
395 ));
396 }
397
398 ProdID::List2 => {
399 // Expr -> [ Seq | Expr ]
400 let right_brack_tok = parser.tokens_pop();
401 let tail = Term::try_from(parser.tokens_pop().value)?;
402 parser.tokens_pop();
403 let index = usize::try_from(parser.tokens_pop().value)?;
404 let mut left_brack_tok = parser.tokens_pop();
405 left_brack_tok.merge_span(&right_brack_tok);
406
407 let term = arena.listc(&self.terms[index..], tail);
408 self.terms.truncate(index);
409
410 parser.tokens_push(TermToken::new(
411 TokenID::Expr,
412 Value::Term(term),
413 left_brack_tok.span(),
414 ));
415 }
416
417 ProdID::Tuple => {
418 // Expr -> ( Seq )
419 let right_paren_tok = parser.tokens_pop();
420 let seq_tok = parser.tokens_pop();
421 let mut left_paren_tok = parser.tokens_pop();
422 left_paren_tok.merge_span(&right_paren_tok);
423
424 let index = usize::try_from(seq_tok.value)?;
425
426 // Arena terms parser does not currently support unary tuples.
427 // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
428 let vs = &self.terms[index..];
429 let term = if vs.len() == 1 {
430 vs[0]
431 } else {
432 arena.tuple(vs)
433 };
434 self.terms.truncate(index);
435
436 parser.tokens_push(TermToken::new(
437 TokenID::Expr,
438 Value::Term(term),
439 left_paren_tok.span(),
440 ));
441 }
442
443 ProdID::Unit => {
444 // Expr -> ( )
445 let right_paren_tok = parser.tokens_pop();
446 let mut left_paren_tok = parser.tokens_pop();
447 left_paren_tok.merge_span(&right_paren_tok);
448
449 parser.tokens_push(TermToken::new(
450 TokenID::Expr,
451 Value::Term(Term::UNIT),
452 left_paren_tok.span(),
453 ));
454 }
455
456 ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
457 // Expr -> xxx
458 let mut tok = parser.tokens_pop();
459 tok.token_id = TokenID::Expr;
460 parser.tokens_push(tok);
461 }
462
463 ProdID::Atom => {
464 // Expr -> atom
465 let atom_tok = parser.tokens_pop();
466 let span = atom_tok.span();
467 let op_tab_index = atom_tok.op_tab_index;
468
469 let atom = Term::try_from(atom_tok.value)?;
470
471 let term = arena
472 .normalize_term(atom, Fixity::Fun, op_tab_index)
473 .map_err(|e| ParlexError::from_err(e, span))?;
474
475 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
476 }
477
478 ProdID::Infix1 => {
479 // Expr -> Expr atomOper Expr
480 let expr2_tok = parser.tokens_pop();
481 let oper_tok = parser.tokens_pop();
482 let mut expr1_tok = parser.tokens_pop();
483 expr1_tok.merge_span(&expr2_tok);
484 let span = expr1_tok.span();
485 let op_tab_index = oper_tok.op_tab_index;
486
487 let expr2 = Term::try_from(expr2_tok.value)?;
488 let oper = Term::try_from(oper_tok.value)?;
489 let expr1 = Term::try_from(expr1_tok.value)?;
490
491 let term = arena
492 .funcv([oper, expr1, expr2])
493 .map_err(|e| ParlexError::from_err(e, span))?;
494 let term = arena
495 .normalize_term(term, Fixity::Infix, op_tab_index)
496 .map_err(|e| ParlexError::from_err(e, span))?;
497
498 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
499 }
500
501 ProdID::Infix2 => {
502 // Expr -> Expr func Seq ) Expr
503 let expr2_tok = parser.tokens_pop();
504 parser.tokens_pop();
505 let index = usize::try_from(parser.tokens_pop().value)?;
506 let oper_tok = parser.tokens_pop();
507 let mut expr1_tok = parser.tokens_pop();
508 expr1_tok.merge_span(&expr2_tok);
509
510 let span = expr1_tok.span();
511 let op_tab_index = oper_tok.op_tab_index;
512
513 let expr2 = Term::try_from(expr2_tok.value)?;
514 let oper = Term::try_from(oper_tok.value)?;
515 let expr1 = Term::try_from(expr1_tok.value)?;
516
517 let xs = [oper, expr1, expr2];
518 let vs = xs.iter().chain(self.terms[index..].iter());
519 let term = arena
520 .funcv(vs)
521 .map_err(|e| ParlexError::from_err(e, span))?;
522 self.terms.truncate(index);
523
524 let term = arena
525 .normalize_term(term, Fixity::Infix, op_tab_index)
526 .map_err(|e| ParlexError::from_err(e, span))?;
527
528 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
529 }
530
531 ProdID::Prefix1 => {
532 // Expr -> atom Expr
533 let expr1_tok = parser.tokens_pop();
534 let mut oper_tok = parser.tokens_pop();
535 oper_tok.merge_span(&expr1_tok);
536
537 let span = oper_tok.span();
538 let op_tab_index = oper_tok.op_tab_index;
539
540 let expr1 = Term::try_from(expr1_tok.value)?;
541 let oper = Term::try_from(oper_tok.value)?;
542
543 let term = match oper
544 .view(arena)
545 .map_err(|e| ParlexError::from_err(e, span))?
546 {
547 // Arena terms parser currently gives special treatment to unary minus
548 // on integer and real literals (it directly negates them).
549 // TODO: Consider handling minus at the lexical level.
550 View::Atom(s)
551 if s == "-"
552 && matches!(
553 expr1
554 .view(arena)
555 .map_err(|e| ParlexError::from_err(e, span))?,
556 View::Int(_) | View::Real(_)
557 ) =>
558 {
559 match expr1
560 .view(arena)
561 .map_err(|e| ParlexError::from_err(e, span))?
562 {
563 View::Int(i) => arena.int(-i),
564 View::Real(r) => arena.real(-r),
565 _ => unreachable!(),
566 }
567 }
568 _ => {
569 let term = arena
570 .funcv([oper, expr1])
571 .map_err(|e| ParlexError::from_err(e, span))?;
572 arena
573 .normalize_term(term, Fixity::Prefix, op_tab_index)
574 .map_err(|e| ParlexError::from_err(e, span))?
575 }
576 };
577
578 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
579 }
580
581 ProdID::Prefix2 => {
582 // Expr -> func Seq ) Expr
583 let expr1_tok = parser.tokens_pop();
584 parser.tokens_pop();
585 let index = usize::try_from(parser.tokens_pop().value)?;
586 let mut oper_tok = parser.tokens_pop();
587 oper_tok.merge_span(&expr1_tok);
588
589 let span = oper_tok.span();
590 let op_tab_index = oper_tok.op_tab_index;
591
592 let oper = Term::try_from(oper_tok.value)?;
593 let expr1 = Term::try_from(expr1_tok.value)?;
594
595 let xs = [oper, expr1];
596 let vs = xs.iter().chain(self.terms[index..].iter());
597 let term = arena
598 .funcv(vs)
599 .map_err(|e| ParlexError::from_err(e, span))?;
600 self.terms.truncate(index);
601
602 let term = arena
603 .normalize_term(term, Fixity::Prefix, op_tab_index)
604 .map_err(|e| ParlexError::from_err(e, span))?;
605
606 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
607 }
608
609 ProdID::Postfix1 => {
610 // Expr -> Expr atomOper
611 let oper_tok = parser.tokens_pop();
612 let mut expr1_tok = parser.tokens_pop();
613 expr1_tok.merge_span(&oper_tok);
614
615 let span = expr1_tok.span();
616 let op_tab_index = oper_tok.op_tab_index;
617
618 let oper = Term::try_from(oper_tok.value)?;
619 let expr1 = Term::try_from(expr1_tok.value)?;
620
621 let term = arena
622 .funcv([oper, expr1])
623 .map_err(|e| ParlexError::from_err(e, span))?;
624 let term = arena
625 .normalize_term(term, Fixity::Postfix, op_tab_index)
626 .map_err(|e| ParlexError::from_err(e, span))?;
627
628 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
629 }
630
631 ProdID::Postfix2 => {
632 // Expr -> Expr func Seq )
633 let right_paren_tok = parser.tokens_pop();
634 let index = usize::try_from(parser.tokens_pop().value)?;
635 let oper_tok = parser.tokens_pop();
636 let mut expr1_tok = parser.tokens_pop();
637 expr1_tok.merge_span(&right_paren_tok);
638
639 let span = expr1_tok.span();
640 let op_tab_index = oper_tok.op_tab_index;
641
642 let oper = Term::try_from(oper_tok.value)?;
643 let expr1 = Term::try_from(expr1_tok.value)?;
644
645 let xs = [oper, expr1];
646 let vs = xs.iter().chain(self.terms[index..].iter());
647 let term = arena
648 .funcv(vs)
649 .map_err(|e| ParlexError::from_err(e, span))?;
650 self.terms.truncate(index);
651
652 let term = arena
653 .normalize_term(term, Fixity::Postfix, op_tab_index)
654 .map_err(|e| ParlexError::from_err(e, span))?;
655
656 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
657 }
658
659 ProdID::Seq1 => {
660 // Seq -> BareSeq
661 let mut bare_seq_tok = parser.tokens_pop();
662 bare_seq_tok.token_id = TokenID::Seq;
663 parser.tokens_push(bare_seq_tok);
664 }
665
666 ProdID::Seq2 => {
667 // Seq -> BareSeq ,
668 parser.tokens_pop();
669 let mut bare_seq_tok = parser.tokens_pop();
670
671 bare_seq_tok.token_id = TokenID::Seq;
672 parser.tokens_push(bare_seq_tok);
673 }
674
675 ProdID::BareSeq1 => {
676 // BareSeq -> Expr
677 let expr_tok = parser.tokens_pop();
678 let span = expr_tok.span();
679 let expr = Term::try_from(expr_tok.value)?;
680
681 let index = self.terms.len();
682 self.terms.push(expr);
683
684 parser.tokens_push(TermToken::new(TokenID::BareSeq, Value::Index(index), span));
685 }
686
687 ProdID::BareSeq2 => {
688 // BareSeq -> BareSeq , Expr
689 let expr_tok = parser.tokens_pop();
690 let expr = Term::try_from(expr_tok.value)?;
691 parser.tokens_pop();
692
693 self.terms.push(expr);
694 }
695 }
696 Ok(())
697 }
698}
699
700/// Prolog-like term parser with operator precedence and associativity handling.
701///
702/// The [`TermTokenParser`] drives the parsing of Prolog-style terms using the
703/// [`parlex`] SLR(1) core library. It builds upon the [`TermLexer`] for tokenization
704/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
705///
706/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
707/// allowing user-defined or default operators to control how expressions are
708/// grouped and nested according to their **fixity**, **precedence**, and
709/// **associativity**.
710///
711/// /// # Input / Output
712///
713/// - **Input**: any byte stream `I` implementing
714/// [`TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>`].
715/// - **Output**: completed parsing units as [`TermToken`] values.
716///
717/// # End Tokens and Multiple Sentences
718///
719/// The underlying lexer typically emits an explicit [`TokenID::End`] token at
720/// the end of a *parsing unit* (end of “sentence” or expression). The parser
721/// uses this to finalize and emit one result. If the input contains multiple
722/// independent sentences, you will receive multiple results — one per `End` —
723/// and `None` only after all input is consumed.
724///
725/// # Empty Statements
726///
727/// The terms grammar also accepts an *empty* term, which is returned
728/// as a token with [`Value::None`].
729/// This occurs, for example, when the last statement in the input is terminated
730/// by a semicolon (`.`) but followed by no further expression. In that case:
731///
732/// 1. The parser first emits the token for the preceding completed term.
733/// 2. It then emits an additional token representing the *empty* term
734/// (`Value::None`).
735/// 3. Finally, it returns `None`, indicating the end of the input stream.
736///
737/// This design allows the parser to fully reflect the structure of the input.
738///
739/// # Errors
740///
741/// All failures are surfaced through a composed
742/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
743/// - `I::Error` — errors from the input source,
744/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
745/// symbol-table issues).
746///
747/// # Example
748///
749/// ```rust
750/// # use arena_terms_parser::{TermToken, TermTokenParser, TokenID, Value};
751/// # use arena_terms::Arena;
752/// # use try_next::{IterInput, TryNextWithContext};
753/// let mut arena = Arena::try_with_default_opers().unwrap();
754/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
755/// let mut parser = TermTokenParser::try_new(input).unwrap();
756/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
757/// assert_eq!(vs.len(), 4);
758/// ```
759///
760/// [`Arena`]: arena_terms::Arena
761/// [`Term`]: arena_terms::Term
762/// [`OperDefs`]: crate::OperDefs
763/// [`TermLexer`]: crate::TermLexer
764/// [`TermToken`]: crate::TermToken
765pub struct TermTokenParser<I>
766where
767 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
768{
769 parser: Parser<TermLexer<I>, TermParserDriver<TermLexer<I>>, Arena>,
770}
771
772impl<I> TermTokenParser<I>
773where
774 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
775{
776 /// Creates a new [`TermTokenParser`] for the given input stream and operator definitions.
777 ///
778 /// # Parameters
779 /// - `input`: A fused iterator over bytes to be parsed.
780 /// - `arena`: A term arena, used to initialized default operator defs.
781 ///
782 /// # Returns
783 /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
784 ///
785 /// # Errors
786 /// Returns an error if the lexer context cannot be initialized
787 /// or if the generated parser tables fail to load.
788 pub fn try_new(input: I) -> Result<Self, ParlexError> {
789 let lexer = TermLexer::try_new(input)?;
790 let driver = TermParserDriver {
791 _marker: PhantomData,
792 terms: Vec::new(),
793 };
794 let parser = Parser::new(lexer, driver);
795 Ok(Self { parser })
796 }
797}
798
799/// Defines or extends operator definitions directly from a Prolog-like
800/// `op/6` term list read from an input source.
801///
802/// This allows dynamic addition of new operator fixities and precedence
803/// rules during parsing.
804///
805/// # Parameters
806/// - `arena`: Arena allocator used for constructing term structures.
807/// - `defs_input`: Input byte iterator yielding the operator definition terms.
808///
809/// # Errors
810/// Returns an error if parsing the operator term list fails or produces
811/// an invalid operator specification.
812pub fn define_opers<I>(arena: &mut Arena, defs_input: I) -> Result<(), ParlexError>
813where
814 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
815{
816 let mut defs_parser = TermParser::try_new(defs_input)?;
817 while let Some(term) = defs_parser.try_next_with_context(arena)? {
818 arena
819 .define_opers(term)
820 .map_err(|e| ParlexError::from_err(e, None))?;
821 }
822 Ok(())
823}
824
825impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermTokenParser<I>
826where
827 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
828{
829 /// Tokens produced by this lexer.
830 type Item = TermToken;
831
832 /// Unified error type.
833 type Error = ParlexError;
834
835 /// Advances the parser and returns the next token, or `None` at end of input.
836 ///
837 /// The provided `context` (an [`Arena`]) may be mutated by rule
838 /// actions (for example, to intern terms). This method is fallible;
839 /// both input and lexical errors are converted into [`Self::Error`].
840 ///
841 /// # End of Input
842 ///
843 /// When the lexer reaches the end of the input stream, it will typically
844 /// emit a final [`TokenID::End`] token before returning `None`.
845 ///
846 /// This explicit *End* token is expected by the **Parlex parser** to
847 /// signal successful termination of a complete parsing unit.
848 /// Consumers should treat this token as a logical *end-of-sentence* or
849 /// *end-of-expression* marker, depending on the grammar.
850 ///
851 /// If the input contains **multiple independent sentences or expressions**,
852 /// the lexer may emit multiple `End` tokens—one after each completed unit.
853 /// In such cases, the parser can restart or resume parsing after each `End`
854 /// to produce multiple parse results from a single input stream.
855 ///
856 /// Once all input has been consumed, the lexer returns `None`.
857 fn try_next_with_context(
858 &mut self,
859 context: &mut Arena,
860 ) -> Result<Option<TermToken>, ParlexError> {
861 self.parser.try_next_with_context(context)
862 }
863
864 fn stats(&self) -> (LexerStats, ParserStats) {
865 self.parser.stats()
866 }
867}
868
869/// Prolog-like term parser with operator precedence and associativity handling.
870///
871/// The [`TermParser`] drives the parsing of Prolog-style terms using the
872/// [`parlex`] SLR(1) core library. It builds upon the [`TermTokenParser`] for tokenization
873/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
874///
875/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
876/// allowing user-defined or default operators to control how expressions are
877/// grouped and nested according to their **fixity**, **precedence**, and
878/// **associativity**.
879///
880/// /// # Input / Output
881///
882/// - **Input**: any byte stream `I` implementing
883/// [`TryNextWithContext<Arena, Item = u8>`].
884/// - **Output**: completed parsing units as [`TermToken`] values.
885///
886/// # End Tokens and Multiple Sentences
887///
888/// The underlying parser emits an explicit tokens at
889/// the end of a *parsing unit* (end of “sentence” or expression). The parser
890/// uses this to finalize and emit one result. If the input contains multiple
891/// independent sentences, you will receive multiple results — one per `End` —
892/// and `None` only after all input is consumed.
893///
894/// # Empty Statements
895///
896/// The terms grammar also accepts an *empty* term, which is returned
897/// as a token with [`Value::None`].
898/// This occurs, for example, when the last statement in the input is terminated
899/// by a semicolon (`.`) but followed by no further expression. In that case:
900///
901/// 1. The parser first emits the token for the preceding completed term.
902/// 2. It then emits an additional token representing the *empty* term
903/// (`Value::None`).
904/// 3. Finally, it returns `None`, indicating the end of the input stream.
905///
906/// This design allows the parser to fully reflect the structure of the input.
907///
908/// # Errors
909///
910/// All failures are surfaced through a composed
911/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
912/// - `I::Error` — errors from the input source,
913/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
914/// symbol-table issues).
915///
916/// # Example
917///
918/// ```rust
919/// # use arena_terms_parser::{TermToken, TermParser, TokenID, Value};
920/// # use arena_terms::Arena;
921/// # use try_next::{IterInput, TryNextWithContext};
922/// let mut arena = Arena::try_with_default_opers().unwrap();
923/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
924/// let mut parser = TermParser::try_new(input).unwrap();
925/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
926/// assert_eq!(vs.len(), 4);
927/// ```
928///
929/// [`Arena`]: arena_terms::Arena
930/// [`Term`]: arena_terms::Term
931/// [`OperDefs`]: crate::OperDefs
932/// [`TermLexer`]: crate::TermLexer
933/// [`TermToken`]: crate::TermToken
934pub struct TermParser<I>
935where
936 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
937{
938 pub(crate) parser: TermTokenParser<I>,
939}
940
941impl<I> TermParser<I>
942where
943 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
944{
945 /// Creates a new [`TermParser`] for the given input stream and operator definitions.
946 ///
947 /// # Parameters
948 /// - `input`: A fused iterator over bytes to be parsed.
949 /// - `arena`: A term arena, used to initialized default operator defs.
950 ///
951 /// # Returns
952 /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
953 ///
954 /// # Errors
955 /// Returns an error if the lexer context cannot be initialized
956 /// or if the generated parser tables fail to load.
957 pub fn try_new(input: I) -> Result<Self, ParlexError> {
958 let parser: TermTokenParser<I> = TermTokenParser::try_new(input)?;
959 Ok(Self { parser })
960 }
961}
962
963impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermParser<I>
964where
965 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
966{
967 /// Tokens produced by this lexer.
968 type Item = Term;
969
970 /// Unified error type.
971 type Error = ParlexError;
972
973 /// Advances the parser and returns the next token, or `None` at end of input.
974 ///
975 /// The provided `context` (an [`Arena`]) may be mutated by rule
976 /// actions (for example, to intern terms). This method is fallible;
977 /// both input and lexical errors are converted into [`Self::Error`].
978 ///
979 /// # End of Input
980 ///
981 /// When the lexer reaches the end of the input stream, it will typically
982 /// emit a final [`TokenID::End`] token before returning `None`.
983 ///
984 /// This explicit *End* token is expected by the **Parlex parser** to
985 /// signal successful termination of a complete parsing unit.
986 /// Consumers should treat this token as a logical *end-of-sentence* or
987 /// *end-of-expression* marker, depending on the grammar.
988 ///
989 /// If the input contains **multiple independent sentences or expressions**,
990 /// the lexer may emit multiple `End` tokens—one after each completed unit.
991 /// In such cases, the parser can restart or resume parsing after each `End`
992 /// to produce multiple parse results from a single input stream.
993 ///
994 /// Once all input has been consumed, the lexer returns `None`.
995 fn try_next_with_context(&mut self, context: &mut Arena) -> Result<Option<Term>, ParlexError> {
996 while let Some(TermToken { value, .. }) = self.parser.try_next_with_context(context)? {
997 match value {
998 Value::Term(term) => return Ok(Some(term)),
999 Value::None => continue,
1000 Value::Index(_) => {
1001 return Err(ParlexError {
1002 message: format!("index token not expected"),
1003 span: None,
1004 });
1005 }
1006 }
1007 }
1008 Ok(None)
1009 }
1010
1011 fn stats(&self) -> (LexerStats, ParserStats) {
1012 self.parser.stats()
1013 }
1014}
1015
1016/// Unit tests for the [`TermParser`] implementation.
1017#[cfg(test)]
1018mod tests {
1019 use super::*;
1020 use try_next::IterInput;
1021
1022 const SAMPLE_DEFS: &str = r#"[
1023op(==(x,y),infix,350,none),
1024op(!=(x,y),infix,350,none),
1025op( <(x,y),infix,350,none),
1026op( >(x,y),infix,350,none),
1027op(<=(x,y),infix,350,none),
1028op(>=(x,y),infix,350,none),
1029op('+'(x,y),infix,380,left),
1030op('-'(x,y),infix,380,left),
1031op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
1032op('*'(x,y),infix,400,left),
1033op('/'(x,y),infix,400,left),
1034op('+'(x),prefix,800,right),
1035op(and(x,y),infix,300,left),
1036op(or(x,y),infix,250,left),
1037op(not(x),prefix,800,right),
1038]"#;
1039
1040 fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Vec<Term> {
1041 let input = IterInput::from(s.bytes());
1042 let mut parser = TermParser::try_new(input).expect("cannot create parser");
1043 if let Some(defs) = defs {
1044 let defs_input = IterInput::from(defs.bytes());
1045 define_opers(arena, defs_input).expect("cannot define ops");
1046 }
1047 let ts = parser
1048 .try_collect_with_context(arena)
1049 .expect("parser error");
1050 dbg!(parser.stats());
1051 ts
1052 }
1053
1054 #[test]
1055 fn one_term() {
1056 let _ = env_logger::builder().is_test(true).try_init();
1057 let arena = &mut Arena::try_with_default_opers().unwrap();
1058 let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .");
1059 dbg!(&ts);
1060 let s = format!("{}", ts[0].display(arena));
1061 dbg!(&s);
1062 assert_eq!(ts.len(), 1);
1063 assert_eq!(s, "'<='('*'(2, 2), 5)");
1064 }
1065
1066 #[test]
1067 #[should_panic]
1068 fn missing_ops() {
1069 let arena = &mut Arena::try_with_default_opers().unwrap();
1070 let _ts = parse(arena, None, "2 * 2 <= 5");
1071 }
1072
1073 #[test]
1074 fn more_complicated_term() {
1075 let _ = env_logger::builder().is_test(true).try_init();
1076 let arena = &mut Arena::try_with_default_opers().unwrap();
1077 let x = "(
1078[(1, 2) | unit] ++ foo(baz(1e-9)),
1079date{2025-09-30T18:24:22.154Z},
1080\"aaa{
10811 + 2
1082}bbb{
10833 * 4
1084}ccc\",
1085{player = {pos = {x = 0, y = 0}, health = 100}},
1086)";
1087 let ts = parse(arena, Some(SAMPLE_DEFS), x);
1088 let s = format!("{}", ts[0].display(arena));
1089 assert_eq!(ts.len(), 1);
1090 assert_eq!(
1091 s,
1092 "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = {pos = {x = 0, y = 0}, health = 100}\")"
1093 );
1094 }
1095}