arena_terms_parser/
parser.rs

1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!             
8//! The parser consumes tokens produced by [`TermLexer`] and uses a mutable
9//! [`Arena`] as shared context to construct arena-allocated [`Term`] values
10//! (from the [`arena_terms`] crate) representing atoms, numbers, compound terms,
11//! lists, tuples, and other structures.
12//!
13//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
14//!
15//! [`TermParser`]: struct.TermParser
16//! [`TermLexer`]: crate::lexer::TermLexer
17//! [`TermToken`]: crate::lexer::TermToken
18//! [`OperDefs`]: crate::oper::OperDefs
19//! [`arena_terms`]: https://crates.io/crates/arena-terms
20//! [`aslr`]: https://crates.io/crates/parlex-gen
21
22use crate::encoding::Encoding;
23use crate::{TermLexer, TermToken, TokenID, Value};
24use arena_terms::{Arena, Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, Term, View};
25use parlex::{
26    LexerStats, ParlexError, Parser, ParserAction, ParserData, ParserDriver, ParserStats, Token,
27};
28use parser_data::{AmbigID, ParData, ProdID, StateID};
29use std::marker::PhantomData;
30use try_next::TryNextWithContext;
31
32/// Includes the generated SLR parser tables and definitions.
33///         
34/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
35/// during the build process. It defines the parsing automaton, rule metadata,
36/// and associated enum types used by the [`TermParser`].
37pub mod parser_data {
38    include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
39}
40
41/// A driver that defines semantic actions for the term parser.
42///
43/// The [`TermParserDriver`] type implements [`ParserDriver`] and acts as the
44/// bridge between the parser engine ([`Parser`]) and calculator-specific
45/// semantic logic.
46///
47/// It provides the behavior for grammar reductions and ambiguity resolution
48/// during parsing. Each reduction corresponds to a grammar production rule
49/// in [`ParData`], and is responsible for building a term.
50///
51/// # Type Parameters
52///
53/// - `I`: The input source (the lexer) that yields [`TermToken`]s. Must implement
54///   [`TryNextWithContext<Arena, Item = TermToken>`].
55///
56/// # Associated Types
57///
58/// - `ParserData = ParData`:
59///   Generated parser metadata containing grammar rules, production IDs,
60///   and ambiguity identifiers.
61/// - `Token = TermToken`:
62///   The token type produced by the lexer and consumed by this parser.
63/// - `Parser = Parser<I, Self, Arena>`:
64///   The parser engine parameterized by this driver and context.
65/// - `Error = TermParserError`:
66///   Unified error type propagated during parsing.
67/// - `Context = Arena`:
68///   Externally supplied context.
69///
70/// # Responsibilities
71///
72/// The parser driver performs calculator-specific actions:
73///
74/// - **`resolve_ambiguity`** — invoked when the grammar allows multiple valid
75///   interpretations of a token sequence. The driver chooses which parse path
76///   to follow by returning an appropriate [`ParserAction`].
77/// - **`reduce`** — executed when a grammar production completes. The driver
78///   can perform semantic actions such as arithmetic evaluation, updating the
79///   symbol table, or producing intermediate values.
80pub struct TermParserDriver<I> {
81    /// Marker to associate the driver with its input type `I`.
82    _marker: PhantomData<I>,
83
84    /// Stack of intermediate [`Term`] values used for reduction of term sequences.
85    ///
86    /// [`Value::Index`] refers to an entry in this stack, enabling grammar
87    /// actions to compose and reduce sequences of terms into higher-level
88    /// structures during parsing.
89    terms: Vec<Term>,
90}
91
92impl<I> ParserDriver for TermParserDriver<I>
93where
94    I: TryNextWithContext<Arena, LexerStats, Item = TermToken, Error: std::fmt::Display + 'static>,
95{
96    /// Parser metadata generated from the calculator grammar.
97    type ParserData = ParData;
98
99    /// Token type consumed by the parser.
100    type Token = TermToken;
101
102    /// Concrete parser engine type.
103    type Parser = Parser<I, Self, Self::Context>;
104
105    /// Context (symbol table or shared state).
106    type Context = Arena;
107
108    /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
109    ///
110    /// Given an ambiguity identifier and the lookahead token `tok2`, this method
111    /// chooses the appropriate parser action (shift or reduce) according to the
112    /// operator precedence and associativity rules.
113    ///
114    /// # Parameters
115    /// - `_arena`: Arena used to allocate or inspect terms.
116    /// - `ambig`:  The generated ambiguity ID (`AmbigID`).
117    /// - `tok2`:   The lookahead token at the ambiguity point.
118    ///
119    /// # Returns
120    /// The selected parser [`Action`] to disambiguate the current state.
121    ///
122    /// # Errors
123    /// Returns an error if the ambiguity cannot be resolved consistently.
124    ///
125    /// # Notes
126    /// This grammar contains only **Shift/Reduce** conflicts — cases where
127    /// the parser can either:
128    /// - **Reduce** using a completed production rule, or
129    /// - **Shift** the next incoming token (`tok2`).
130    ///
131    /// Other types of conflicts (such as **Reduce/Reduce**) are much more
132    /// difficult to handle programmatically and usually require modifying
133    /// the grammar itself to eliminate ambiguity.
134    fn resolve_ambiguity(
135        &mut self,
136        parser: &mut Self::Parser,
137        arena: &mut Self::Context,
138        ambig: <Self::ParserData as ParserData>::AmbigID,
139        tok2: &Self::Token,
140    ) -> Result<ParserAction<StateID, ProdID, AmbigID>, ParlexError> {
141        let ambigs = ParData::lookup_ambig(ambig);
142        let shift_action = ambigs[0];
143        let ParserAction::Shift(_) = shift_action else {
144            panic!("expected shift");
145        };
146        let reduce_action = ambigs[1];
147        let ParserAction::Reduce(prod_id) = reduce_action else {
148            panic!("expected reduce");
149        };
150
151        log::trace!(
152            "Conflict between reducing {:?} and shifting {:?}",
153            prod_id,
154            tok2
155        );
156
157        let (fixity1, tok1) = match prod_id {
158            ProdID::Infix1 => {
159                // Expr -> Expr atomOper Expr
160                (Fixity::Infix, parser.tokens_peek(1))
161            }
162            ProdID::Infix2 => {
163                // Expr -> Expr funcOper Seq ) Expr
164                (Fixity::Infix, parser.tokens_peek(3))
165            }
166            ProdID::Prefix1 => {
167                // Expr -> atomOper Expr
168                (Fixity::Prefix, parser.tokens_peek(1))
169            }
170            ProdID::Prefix2 => {
171                // Expr -> funcOper Seq ) Expr
172                (Fixity::Prefix, parser.tokens_peek(3))
173            }
174            ProdID::Postfix1 => {
175                // Expr -> Expr atomOper
176                (Fixity::Postfix, parser.tokens_peek(0))
177            }
178            ProdID::Postfix2 => {
179                // Expr -> Expr funcOper Seq )
180                (Fixity::Postfix, parser.tokens_peek(2))
181            }
182            _ => {
183                return Err(ParlexError {
184                    message: format!(
185                        "unexpected conflict: reduction of {:?} with shifting token {:?}",
186                        prod_id, tok2
187                    ),
188                    span: tok2.span(),
189                });
190            }
191        };
192
193        let op_tab1 = arena.get_oper(tok1.op_tab_index);
194        let op_tab2 = arena.get_oper(tok2.op_tab_index);
195
196        assert!(op_tab1.is_oper());
197
198        if op_tab2.is_oper() {
199            let op_def1 = match op_tab1[fixity1] {
200                Some(ref op_def1) => op_def1,
201                None => return Ok(shift_action),
202            };
203
204            let prec1 = op_def1.prec;
205            let assoc1 = op_def1.assoc;
206
207            let min_prec2 = std::cmp::min(
208                op_tab2[Fixity::Infix]
209                    .as_ref()
210                    .map(|x| x.prec)
211                    .unwrap_or(MAX_OPER_PREC),
212                op_tab2[Fixity::Postfix]
213                    .as_ref()
214                    .map(|x| x.prec)
215                    .unwrap_or(MAX_OPER_PREC),
216            );
217            let max_prec2 = std::cmp::max(
218                op_tab2[Fixity::Infix]
219                    .as_ref()
220                    .map(|x| x.prec)
221                    .unwrap_or(MIN_OPER_PREC),
222                op_tab2[Fixity::Postfix]
223                    .as_ref()
224                    .map(|x| x.prec)
225                    .unwrap_or(MIN_OPER_PREC),
226            );
227
228            if prec1 > min_prec2 {
229                Ok(reduce_action)
230            } else if prec1 < max_prec2 {
231                Ok(shift_action)
232            } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
233                if assoc1 == Assoc::None {
234                    return Err(ParlexError {
235                        message: format!(
236                            "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
237                            tok1
238                        ),
239                        span: tok2.span(),
240                    });
241                }
242                if op_tab2[Fixity::Infix]
243                    .as_ref()
244                    .is_some_and(|x| x.assoc == Assoc::None)
245                    || op_tab2[Fixity::Postfix]
246                        .as_ref()
247                        .is_some_and(|x| x.assoc == Assoc::None)
248                {
249                    return Err(ParlexError {
250                        message: format!(
251                            "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
252                            tok2
253                        ),
254                        span: tok2.span(),
255                    });
256                }
257                if op_tab2[Fixity::Infix]
258                    .as_ref()
259                    .is_some_and(|x| x.assoc != assoc1)
260                    || op_tab2[Fixity::Postfix]
261                        .as_ref()
262                        .is_some_and(|x| x.assoc != assoc1)
263                {
264                    return Err(ParlexError {
265                        message: format!(
266                            "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
267                            tok1, tok2
268                        ),
269                        span: tok2.span(),
270                    });
271                } else {
272                    if assoc1 == Assoc::Left {
273                        Ok(reduce_action)
274                    } else {
275                        Ok(shift_action)
276                    }
277                }
278            } else {
279                return Err(ParlexError {
280                    message: format!(
281                        "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
282                        tok1, tok2
283                    ),
284                    span: tok2.span(),
285                });
286            }
287        } else {
288            Ok(shift_action)
289        }
290    }
291
292    /// Performs a grammar reduction for the given production rule.
293    ///
294    /// Applies the semantic action for `prod_id`, typically constructing or
295    /// normalizing an arena-backed [`Term`], and pushes the resulting token
296    /// onto the parser’s value stack.
297    ///
298    /// # Parameters
299    /// - `arena`: Arena used to allocate or inspect terms.
300    /// - `prod_id`:  The production being reduced (`ProdID`).
301    /// - `token`: The lookahead token (normally not used).
302    ///
303    /// # Errors
304    /// Returns an error if the reduction fails due to arity mismatches,
305    /// invalid operator metadata, or inconsistent stack state.
306    fn reduce(
307        &mut self,
308        parser: &mut Self::Parser,
309        arena: &mut Self::Context,
310        prod_id: <Self::ParserData as ParserData>::ProdID,
311        token: &Self::Token,
312    ) -> Result<(), ParlexError> {
313        match prod_id {
314            ProdID::Start => {
315                // Accept - does not get reduced
316                unreachable!()
317            }
318
319            ProdID::Term1 => {
320                // Term -> Expr
321                let mut expr_tok = parser.tokens_pop();
322                expr_tok.token_id = TokenID::Term;
323                parser.tokens_push(expr_tok);
324            }
325
326            ProdID::Term2 => {
327                // Term -> Expr .
328                let dot = parser.tokens_pop();
329                let mut expr_tok = parser.tokens_pop();
330                expr_tok.token_id = TokenID::Term;
331                expr_tok.merge_span(&dot);
332                parser.tokens_push(expr_tok);
333            }
334
335            ProdID::Term3 => {
336                // Term ->
337                parser.tokens_push(TermToken::new(TokenID::Term, Value::None, token.span()));
338            }
339
340            ProdID::Term4 => {
341                // Term -> .
342                let dot = parser.tokens_pop();
343                parser.tokens_push(TermToken::new(TokenID::Term, Value::None, dot.span()));
344            }
345
346            ProdID::Func => {
347                // Expr -> func Seq )
348                let right_paren = parser.tokens_pop();
349                let index = usize::try_from(parser.tokens_pop().value)?;
350                let mut func_tok = parser.tokens_pop();
351                func_tok.merge_span(&right_paren);
352                let span = func_tok.span();
353                let op_tab_index = func_tok.op_tab_index;
354                let functor = Term::try_from(func_tok.value)?;
355
356                let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
357                let term = arena
358                    .funcv(vs)
359                    .map_err(|e| ParlexError::from_err(e, span))?;
360                self.terms.truncate(index);
361
362                let term = arena
363                    .normalize_term(term, Fixity::Fun, op_tab_index)
364                    .map_err(|e| ParlexError::from_err(e, span))?;
365
366                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
367            }
368
369            ProdID::List => {
370                // Expr -> [ Seq ]
371                let right_brack_tok = parser.tokens_pop();
372                let seq_tok = parser.tokens_pop();
373                let mut left_brack_tok = parser.tokens_pop();
374                left_brack_tok.merge_span(&right_brack_tok);
375                let index = usize::try_from(seq_tok.value)?;
376
377                let term = arena.list(&self.terms[index..]);
378                self.terms.truncate(index);
379
380                parser.tokens_push(TermToken::new(
381                    TokenID::Expr,
382                    Value::Term(term),
383                    left_brack_tok.span(),
384                ));
385            }
386
387            ProdID::Nil => {
388                // Expr -> [ ]
389                let right_brack_tok = parser.tokens_pop();
390                let mut left_brack_tok = parser.tokens_pop();
391                left_brack_tok.merge_span(&right_brack_tok);
392                parser.tokens_push(TermToken::new(
393                    TokenID::Expr,
394                    Value::Term(Term::NIL),
395                    left_brack_tok.span(),
396                ));
397            }
398
399            ProdID::List2 => {
400                // Expr -> [ Seq | Expr ]
401                let right_brack_tok = parser.tokens_pop();
402                let tail = Term::try_from(parser.tokens_pop().value)?;
403                parser.tokens_pop();
404                let index = usize::try_from(parser.tokens_pop().value)?;
405                let mut left_brack_tok = parser.tokens_pop();
406                left_brack_tok.merge_span(&right_brack_tok);
407
408                let term = arena.listc(&self.terms[index..], tail);
409                self.terms.truncate(index);
410
411                parser.tokens_push(TermToken::new(
412                    TokenID::Expr,
413                    Value::Term(term),
414                    left_brack_tok.span(),
415                ));
416            }
417
418            ProdID::Tuple => {
419                // Expr -> ( Seq )
420                let right_paren_tok = parser.tokens_pop();
421                let seq_tok = parser.tokens_pop();
422                let mut left_paren_tok = parser.tokens_pop();
423                left_paren_tok.merge_span(&right_paren_tok);
424
425                let index = usize::try_from(seq_tok.value)?;
426
427                // Arena terms parser does not currently support unary tuples.
428                // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
429                let vs = &self.terms[index..];
430                let term = if vs.len() == 1 {
431                    vs[0]
432                } else {
433                    arena.tuple(vs)
434                };
435                self.terms.truncate(index);
436
437                parser.tokens_push(TermToken::new(
438                    TokenID::Expr,
439                    Value::Term(term),
440                    left_paren_tok.span(),
441                ));
442            }
443
444            ProdID::Unit => {
445                // Expr -> ( )
446                let right_paren_tok = parser.tokens_pop();
447                let mut left_paren_tok = parser.tokens_pop();
448                left_paren_tok.merge_span(&right_paren_tok);
449
450                parser.tokens_push(TermToken::new(
451                    TokenID::Expr,
452                    Value::Term(Term::UNIT),
453                    left_paren_tok.span(),
454                ));
455            }
456
457            ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
458                // Expr -> xxx
459                let mut tok = parser.tokens_pop();
460                tok.token_id = TokenID::Expr;
461                parser.tokens_push(tok);
462            }
463
464            ProdID::Atom => {
465                // Expr -> atom
466                let atom_tok = parser.tokens_pop();
467                let span = atom_tok.span();
468                let op_tab_index = atom_tok.op_tab_index;
469
470                let atom = Term::try_from(atom_tok.value)?;
471
472                let term = arena
473                    .normalize_term(atom, Fixity::Fun, op_tab_index)
474                    .map_err(|e| ParlexError::from_err(e, span))?;
475
476                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
477            }
478
479            ProdID::Infix1 => {
480                // Expr -> Expr atomOper Expr
481                let expr2_tok = parser.tokens_pop();
482                let oper_tok = parser.tokens_pop();
483                let mut expr1_tok = parser.tokens_pop();
484                expr1_tok.merge_span(&expr2_tok);
485                let span = expr1_tok.span();
486                let op_tab_index = oper_tok.op_tab_index;
487
488                let expr2 = Term::try_from(expr2_tok.value)?;
489                let oper = Term::try_from(oper_tok.value)?;
490                let expr1 = Term::try_from(expr1_tok.value)?;
491
492                let term = arena
493                    .funcv([oper, expr1, expr2])
494                    .map_err(|e| ParlexError::from_err(e, span))?;
495                let term = arena
496                    .normalize_term(term, Fixity::Infix, op_tab_index)
497                    .map_err(|e| ParlexError::from_err(e, span))?;
498
499                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
500            }
501
502            ProdID::Infix2 => {
503                // Expr -> Expr func Seq ) Expr
504                let expr2_tok = parser.tokens_pop();
505                parser.tokens_pop();
506                let index = usize::try_from(parser.tokens_pop().value)?;
507                let oper_tok = parser.tokens_pop();
508                let mut expr1_tok = parser.tokens_pop();
509                expr1_tok.merge_span(&expr2_tok);
510
511                let span = expr1_tok.span();
512                let op_tab_index = oper_tok.op_tab_index;
513
514                let expr2 = Term::try_from(expr2_tok.value)?;
515                let oper = Term::try_from(oper_tok.value)?;
516                let expr1 = Term::try_from(expr1_tok.value)?;
517
518                let xs = [oper, expr1, expr2];
519                let vs = xs.iter().chain(self.terms[index..].iter());
520                let term = arena
521                    .funcv(vs)
522                    .map_err(|e| ParlexError::from_err(e, span))?;
523                self.terms.truncate(index);
524
525                let term = arena
526                    .normalize_term(term, Fixity::Infix, op_tab_index)
527                    .map_err(|e| ParlexError::from_err(e, span))?;
528
529                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
530            }
531
532            ProdID::Prefix1 => {
533                // Expr -> atom Expr
534                let expr1_tok = parser.tokens_pop();
535                let mut oper_tok = parser.tokens_pop();
536                oper_tok.merge_span(&expr1_tok);
537
538                let span = oper_tok.span();
539                let op_tab_index = oper_tok.op_tab_index;
540
541                let expr1 = Term::try_from(expr1_tok.value)?;
542                let oper = Term::try_from(oper_tok.value)?;
543
544                let term = match oper
545                    .view(arena)
546                    .map_err(|e| ParlexError::from_err(e, span))?
547                {
548                    // Arena terms parser currently gives special treatment to unary minus
549                    // on integer and real literals (it directly negates them).
550                    // TODO: Consider handling minus at the lexical level.
551                    View::Atom(s)
552                        if s == "-"
553                            && matches!(
554                                expr1
555                                    .view(arena)
556                                    .map_err(|e| ParlexError::from_err(e, span))?,
557                                View::Int(_) | View::Real(_)
558                            ) =>
559                    {
560                        match expr1
561                            .view(arena)
562                            .map_err(|e| ParlexError::from_err(e, span))?
563                        {
564                            View::Int(i) => arena.int(-i),
565                            View::Real(r) => arena.real(-r),
566                            _ => unreachable!(),
567                        }
568                    }
569                    _ => {
570                        let term = arena
571                            .funcv([oper, expr1])
572                            .map_err(|e| ParlexError::from_err(e, span))?;
573                        arena
574                            .normalize_term(term, Fixity::Prefix, op_tab_index)
575                            .map_err(|e| ParlexError::from_err(e, span))?
576                    }
577                };
578
579                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
580            }
581
582            ProdID::Prefix2 => {
583                // Expr -> func Seq ) Expr
584                let expr1_tok = parser.tokens_pop();
585                parser.tokens_pop();
586                let index = usize::try_from(parser.tokens_pop().value)?;
587                let mut oper_tok = parser.tokens_pop();
588                oper_tok.merge_span(&expr1_tok);
589
590                let span = oper_tok.span();
591                let op_tab_index = oper_tok.op_tab_index;
592
593                let oper = Term::try_from(oper_tok.value)?;
594                let expr1 = Term::try_from(expr1_tok.value)?;
595
596                let xs = [oper, expr1];
597                let vs = xs.iter().chain(self.terms[index..].iter());
598                let term = arena
599                    .funcv(vs)
600                    .map_err(|e| ParlexError::from_err(e, span))?;
601                self.terms.truncate(index);
602
603                let term = arena
604                    .normalize_term(term, Fixity::Prefix, op_tab_index)
605                    .map_err(|e| ParlexError::from_err(e, span))?;
606
607                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
608            }
609
610            ProdID::Postfix1 => {
611                // Expr -> Expr atomOper
612                let oper_tok = parser.tokens_pop();
613                let mut expr1_tok = parser.tokens_pop();
614                expr1_tok.merge_span(&oper_tok);
615
616                let span = expr1_tok.span();
617                let op_tab_index = oper_tok.op_tab_index;
618
619                let oper = Term::try_from(oper_tok.value)?;
620                let expr1 = Term::try_from(expr1_tok.value)?;
621
622                let term = arena
623                    .funcv([oper, expr1])
624                    .map_err(|e| ParlexError::from_err(e, span))?;
625                let term = arena
626                    .normalize_term(term, Fixity::Postfix, op_tab_index)
627                    .map_err(|e| ParlexError::from_err(e, span))?;
628
629                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
630            }
631
632            ProdID::Postfix2 => {
633                // Expr -> Expr func Seq )
634                let right_paren_tok = parser.tokens_pop();
635                let index = usize::try_from(parser.tokens_pop().value)?;
636                let oper_tok = parser.tokens_pop();
637                let mut expr1_tok = parser.tokens_pop();
638                expr1_tok.merge_span(&right_paren_tok);
639
640                let span = expr1_tok.span();
641                let op_tab_index = oper_tok.op_tab_index;
642
643                let oper = Term::try_from(oper_tok.value)?;
644                let expr1 = Term::try_from(expr1_tok.value)?;
645
646                let xs = [oper, expr1];
647                let vs = xs.iter().chain(self.terms[index..].iter());
648                let term = arena
649                    .funcv(vs)
650                    .map_err(|e| ParlexError::from_err(e, span))?;
651                self.terms.truncate(index);
652
653                let term = arena
654                    .normalize_term(term, Fixity::Postfix, op_tab_index)
655                    .map_err(|e| ParlexError::from_err(e, span))?;
656
657                parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
658            }
659
660            ProdID::Seq1 => {
661                // Seq -> BareSeq
662                let mut bare_seq_tok = parser.tokens_pop();
663                bare_seq_tok.token_id = TokenID::Seq;
664                parser.tokens_push(bare_seq_tok);
665            }
666
667            ProdID::Seq2 => {
668                // Seq -> BareSeq ,
669                parser.tokens_pop();
670                let mut bare_seq_tok = parser.tokens_pop();
671
672                bare_seq_tok.token_id = TokenID::Seq;
673                parser.tokens_push(bare_seq_tok);
674            }
675
676            ProdID::BareSeq1 => {
677                // BareSeq -> Expr
678                let expr_tok = parser.tokens_pop();
679                let span = expr_tok.span();
680                let expr = Term::try_from(expr_tok.value)?;
681
682                let index = self.terms.len();
683                self.terms.push(expr);
684
685                parser.tokens_push(TermToken::new(TokenID::BareSeq, Value::Index(index), span));
686            }
687
688            ProdID::BareSeq2 => {
689                // BareSeq -> BareSeq , Expr
690                let expr_tok = parser.tokens_pop();
691                let expr = Term::try_from(expr_tok.value)?;
692                parser.tokens_pop();
693
694                self.terms.push(expr);
695            }
696        }
697        Ok(())
698    }
699}
700
701/// Prolog-like term parser with operator precedence and associativity handling.
702///
703/// The [`TermTokenParser`] drives the parsing of Prolog-style terms using the
704/// [`parlex`] SLR(1) core library. It builds upon the [`TermLexer`] for tokenization
705/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
706///
707/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
708/// allowing user-defined or default operators to control how expressions are
709/// grouped and nested according to their **fixity**, **precedence**, and
710/// **associativity**.
711///
712/// /// # Input / Output
713///
714/// - **Input**: any byte stream `I` implementing
715///   [`TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>`].
716/// - **Output**: completed parsing units as [`TermToken`] values.
717///
718/// # End Tokens and Multiple Sentences
719///
720/// The underlying lexer typically emits an explicit [`TokenID::End`] token at
721/// the end of a *parsing unit* (end of “sentence” or expression). The parser
722/// uses this to finalize and emit one result. If the input contains multiple
723/// independent sentences, you will receive multiple results — one per `End` —
724/// and `None` only after all input is consumed.
725///
726/// # Empty Statements
727///
728/// The terms grammar also accepts an *empty* term, which is returned
729/// as a token with [`Value::None`].
730/// This occurs, for example, when the last statement in the input is terminated
731/// by a semicolon (`.`) but followed by no further expression. In that case:
732///
733/// 1. The parser first emits the token for the preceding completed term.
734/// 2. It then emits an additional token representing the *empty* term
735///    (`Value::None`).
736/// 3. Finally, it returns `None`, indicating the end of the input stream.
737///
738/// This design allows the parser to fully reflect the structure of the input.
739///
740/// # Errors
741///
742/// All failures are surfaced through a composed
743/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
744/// - `I::Error` — errors from the input source,
745/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
746///   symbol-table issues).
747///
748/// # Example
749///
750/// ```rust
751/// # use arena_terms_parser::{Encoding, TermToken, TermTokenParser, TokenID, Value};
752/// # use arena_terms::Arena;
753/// # use try_next::{IterInput, TryNextWithContext};
754/// let mut arena = Arena::try_with_default_opers().unwrap();
755/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
756/// let mut parser = TermTokenParser::try_new(input, Encoding::Utf8).unwrap();
757/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
758/// assert_eq!(vs.len(), 4);
759/// ```
760///
761/// [`Arena`]: arena_terms::Arena
762/// [`Term`]: arena_terms::Term
763/// [`OperDefs`]: crate::OperDefs
764/// [`TermLexer`]: crate::TermLexer
765/// [`TermToken`]: crate::TermToken
766pub struct TermTokenParser<I>
767where
768    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
769{
770    parser: Parser<TermLexer<I>, TermParserDriver<TermLexer<I>>, Arena>,
771}
772
773impl<I> TermTokenParser<I>
774where
775    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
776{
777    /// Creates a new [`TermTokenParser`] for the given input stream and operator definitions.
778    ///
779    /// # Parameters
780    /// - `input`: A fused iterator over bytes to be parsed.
781    /// - `arena`: A term arena, used to initialized default operator defs.
782    ///
783    /// # Returns
784    /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
785    ///
786    /// # Errors
787    /// Returns an error if the lexer context cannot be initialized
788    /// or if the generated parser tables fail to load.
789    pub fn try_new(input: I, encoding: Encoding) -> Result<Self, ParlexError> {
790        let lexer = TermLexer::try_new(input, encoding)?;
791        let driver = TermParserDriver {
792            _marker: PhantomData,
793            terms: Vec::new(),
794        };
795        let parser = Parser::new(lexer, driver);
796        Ok(Self { parser })
797    }
798}
799
800/// Defines or extends operator definitions directly from a Prolog-like
801/// `op/6` term list read from an input source.
802///
803/// This allows dynamic addition of new operator fixities and precedence
804/// rules during parsing.
805///
806/// # Parameters
807/// - `arena`: Arena allocator used for constructing term structures.
808/// - `defs_input`: Input byte iterator yielding the operator definition terms.
809/// - `encoding`: Input encoding of the definitions stream.
810///
811/// # Errors
812/// Returns an error if parsing the operator term list fails or produces
813/// an invalid operator specification.
814pub fn define_opers<I>(arena: &mut Arena, defs_input: I, encoding: Encoding) -> Result<(), ParlexError>
815where
816    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
817{
818    let mut defs_parser = TermParser::try_new(defs_input, encoding)?;
819    while let Some(term) = defs_parser.try_next_with_context(arena)? {
820        arena
821            .define_opers(term)
822            .map_err(|e| ParlexError::from_err(e, None))?;
823    }
824    Ok(())
825}
826
827impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermTokenParser<I>
828where
829    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
830{
831    /// Tokens produced by this lexer.
832    type Item = TermToken;
833
834    /// Unified error type.
835    type Error = ParlexError;
836
837    /// Advances the parser and returns the next token, or `None` at end of input.
838    ///
839    /// The provided `context` (an [`Arena`]) may be mutated by rule
840    /// actions (for example, to intern terms). This method is fallible;
841    /// both input and lexical errors are converted into [`Self::Error`].
842    ///
843    /// # End of Input
844    ///
845    /// When the lexer reaches the end of the input stream, it will typically
846    /// emit a final [`TokenID::End`] token before returning `None`.
847    ///
848    /// This explicit *End* token is expected by the **Parlex parser** to
849    /// signal successful termination of a complete parsing unit.
850    /// Consumers should treat this token as a logical *end-of-sentence* or
851    /// *end-of-expression* marker, depending on the grammar.
852    ///
853    /// If the input contains **multiple independent sentences or expressions**,
854    /// the lexer may emit multiple `End` tokens—one after each completed unit.
855    /// In such cases, the parser can restart or resume parsing after each `End`
856    /// to produce multiple parse results from a single input stream.
857    ///
858    /// Once all input has been consumed, the lexer returns `None`.
859    fn try_next_with_context(
860        &mut self,
861        context: &mut Arena,
862    ) -> Result<Option<TermToken>, ParlexError> {
863        self.parser.try_next_with_context(context)
864    }
865
866    fn stats(&self) -> (LexerStats, ParserStats) {
867        self.parser.stats()
868    }
869}
870
871/// Prolog-like term parser with operator precedence and associativity handling.
872///
873/// The [`TermParser`] drives the parsing of Prolog-style terms using the
874/// [`parlex`] SLR(1) core library. It builds upon the [`TermTokenParser`] for tokenization
875/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
876///
877/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
878/// allowing user-defined or default operators to control how expressions are
879/// grouped and nested according to their **fixity**, **precedence**, and
880/// **associativity**.
881///
882/// /// # Input / Output
883///
884/// - **Input**: any byte stream `I` implementing
885///   [`TryNextWithContext<Arena, Item = u8>`].
886/// - **Output**: completed parsing units as [`TermToken`] values.
887///
888/// # End Tokens and Multiple Sentences
889///
890/// The underlying parser  emits an explicit tokens at
891/// the end of a *parsing unit* (end of “sentence” or expression). The parser
892/// uses this to finalize and emit one result. If the input contains multiple
893/// independent sentences, you will receive multiple results — one per `End` —
894/// and `None` only after all input is consumed.
895///
896/// # Empty Statements
897///
898/// The terms grammar also accepts an *empty* term, which is returned
899/// as a token with [`Value::None`].
900/// This occurs, for example, when the last statement in the input is terminated
901/// by a semicolon (`.`) but followed by no further expression. In that case:
902///
903/// 1. The parser first emits the token for the preceding completed term.
904/// 2. It then emits an additional token representing the *empty* term
905///    (`Value::None`).
906/// 3. Finally, it returns `None`, indicating the end of the input stream.
907///
908/// This design allows the parser to fully reflect the structure of the input.
909///
910/// # Errors
911///
912/// All failures are surfaced through a composed
913/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
914/// - `I::Error` — errors from the input source,
915/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
916///   symbol-table issues).
917///
918/// # Example
919///
920/// ```rust
921/// # use arena_terms_parser::{Encoding, TermToken, TermParser, TokenID, Value};
922/// # use arena_terms::Arena;
923/// # use try_next::{IterInput, TryNextWithContext};
924/// let mut arena = Arena::try_with_default_opers().unwrap();
925/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
926/// let mut parser = TermParser::try_new(input, Encoding::Utf8).unwrap();
927/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
928/// assert_eq!(vs.len(), 4);
929/// ```
930///
931/// [`Arena`]: arena_terms::Arena
932/// [`Term`]: arena_terms::Term
933/// [`OperDefs`]: crate::OperDefs
934/// [`TermLexer`]: crate::TermLexer
935/// [`TermToken`]: crate::TermToken
936pub struct TermParser<I>
937where
938    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
939{
940    pub(crate) parser: TermTokenParser<I>,
941}
942
943impl<I> TermParser<I>
944where
945    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
946{
947    /// Creates a new [`TermParser`] for the given input stream and operator definitions.
948    ///
949    /// # Parameters
950    /// - `input`: A fused iterator over bytes to be parsed.
951    /// - `arena`: A term arena, used to initialized default operator defs.
952    ///
953    /// # Returns
954    /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
955    ///
956    /// # Errors
957    /// Returns an error if the lexer context cannot be initialized
958    /// or if the generated parser tables fail to load.
959    pub fn try_new(input: I, encoding: Encoding) -> Result<Self, ParlexError> {
960        let parser: TermTokenParser<I> = TermTokenParser::try_new(input, encoding)?;
961        Ok(Self { parser })
962    }
963}
964
965impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermParser<I>
966where
967    I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
968{
969    /// Tokens produced by this lexer.
970    type Item = Term;
971
972    /// Unified error type.
973    type Error = ParlexError;
974
975    /// Advances the parser and returns the next token, or `None` at end of input.
976    ///
977    /// The provided `context` (an [`Arena`]) may be mutated by rule
978    /// actions (for example, to intern terms). This method is fallible;
979    /// both input and lexical errors are converted into [`Self::Error`].
980    ///
981    /// # End of Input
982    ///
983    /// When the lexer reaches the end of the input stream, it will typically
984    /// emit a final [`TokenID::End`] token before returning `None`.
985    ///
986    /// This explicit *End* token is expected by the **Parlex parser** to
987    /// signal successful termination of a complete parsing unit.
988    /// Consumers should treat this token as a logical *end-of-sentence* or
989    /// *end-of-expression* marker, depending on the grammar.
990    ///
991    /// If the input contains **multiple independent sentences or expressions**,
992    /// the lexer may emit multiple `End` tokens—one after each completed unit.
993    /// In such cases, the parser can restart or resume parsing after each `End`
994    /// to produce multiple parse results from a single input stream.
995    ///
996    /// Once all input has been consumed, the lexer returns `None`.
997    fn try_next_with_context(&mut self, context: &mut Arena) -> Result<Option<Term>, ParlexError> {
998        while let Some(TermToken { value, .. }) = self.parser.try_next_with_context(context)? {
999            match value {
1000                Value::Term(term) => return Ok(Some(term)),
1001                Value::None => continue,
1002                Value::Index(_) => {
1003                    return Err(ParlexError {
1004                        message: format!("index token not expected"),
1005                        span: None,
1006                    });
1007                }
1008            }
1009        }
1010        Ok(None)
1011    }
1012
1013    fn stats(&self) -> (LexerStats, ParserStats) {
1014        self.parser.stats()
1015    }
1016}
1017
1018/// Unit tests for the [`TermParser`] implementation.
1019#[cfg(test)]
1020mod tests {
1021    use super::*;
1022    use try_next::IterInput;
1023
1024    const SAMPLE_DEFS: &str = r#"[
1025op(==(x,y),infix,350,none),
1026op(!=(x,y),infix,350,none),
1027op( <(x,y),infix,350,none),
1028op( >(x,y),infix,350,none),
1029op(<=(x,y),infix,350,none),
1030op(>=(x,y),infix,350,none),
1031op('+'(x,y),infix,380,left),
1032op('-'(x,y),infix,380,left),
1033op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
1034op('*'(x,y),infix,400,left),
1035op('/'(x,y),infix,400,left),
1036op('+'(x),prefix,800,right),
1037op(and(x,y),infix,300,left),
1038op(or(x,y),infix,250,left),
1039op(not(x),prefix,800,right),
1040]"#;
1041
1042    fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Vec<Term> {
1043        let input = IterInput::from(s.bytes());
1044        let mut parser = TermParser::try_new(input, Encoding::Utf8).expect("cannot create parser");
1045        if let Some(defs) = defs {
1046            let defs_input = IterInput::from(defs.bytes());
1047            define_opers(arena, defs_input, Encoding::Utf8).expect("cannot define ops");
1048        }
1049        let ts = parser
1050            .try_collect_with_context(arena)
1051            .expect("parser error");
1052        dbg!(parser.stats());
1053        ts
1054    }
1055
1056    #[test]
1057    fn one_term() {
1058        let _ = env_logger::builder().is_test(true).try_init();
1059        let arena = &mut Arena::try_with_default_opers().unwrap();
1060        let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .");
1061        dbg!(&ts);
1062        let s = format!("{}", ts[0].display(arena));
1063        dbg!(&s);
1064        assert_eq!(ts.len(), 1);
1065        assert_eq!(s, "'<='('*'(2, 2), 5)");
1066    }
1067
1068    /// String interpolation with a surrounding tighter operator.
1069    ///
1070    /// `+` at precedence 380 binds tighter than `++` at 500 (default).
1071    /// Both legacy and arena-terms emit outer parens around the interpolated string:
1072    ///   "a{x}b" + 1  →  ("a" ++ (x) ++ "b") + 1
1073    ///
1074    /// Without outer parens, precedence resolution would still produce the same
1075    /// parse tree here (because `+` binds tighter, it gets reduced first inside
1076    /// the `++` chain), but the outer parens ensure correctness in edge cases
1077    /// with mixed-associativity same-precedence operators.
1078    #[test]
1079    fn string_interpolation_outer_paren_isolation() {
1080        let _ = env_logger::builder().is_test(true).try_init();
1081        let arena = &mut Arena::try_with_default_opers().unwrap();
1082        let ts = parse(arena, Some(SAMPLE_DEFS), r#""a{xx}b" + 1 ."#);
1083        assert_eq!(ts.len(), 1);
1084        let s = format!("{}", ts[0].display(arena));
1085        //   '+'('++'('++'("a", xx), "b"), 1)
1086        assert_eq!(s, r#"'+'('++'('++'("a", xx), "b"), 1)"#);
1087    }
1088
1089    /// A bare non-interpolated string `"hello"` is wrapped as `( "hello" )` by
1090    /// the lexer, but the parser unwraps unary tuples so the resulting term is
1091    /// just `"hello"` (no surrounding structure).
1092    #[test]
1093    fn bare_string_unwraps_to_plain_string() {
1094        let _ = env_logger::builder().is_test(true).try_init();
1095        let arena = &mut Arena::try_with_default_opers().unwrap();
1096        let ts = parse(arena, None, r#""hello" ."#);
1097        assert_eq!(ts.len(), 1);
1098        assert_eq!(format!("{}", ts[0].display(arena)), r#""hello""#);
1099    }
1100
1101    /// Bare strings used as function arguments: `foo("hello", "world")`.
1102    /// Despite the lexer emitting outer parens around each string, they unwrap
1103    /// correctly as distinct arguments.
1104    #[test]
1105    fn bare_strings_as_func_args() {
1106        let _ = env_logger::builder().is_test(true).try_init();
1107        let arena = &mut Arena::try_with_default_opers().unwrap();
1108        let ts = parse(arena, None, r#"foo("hello", "world") ."#);
1109        assert_eq!(ts.len(), 1);
1110        assert_eq!(
1111            format!("{}", ts[0].display(arena)),
1112            r#"foo("hello", "world")"#
1113        );
1114    }
1115
1116    /// Prefix operator applied to an interpolated string.
1117    ///
1118    /// `-` at prec 800 (prefix, right-assoc) binds tighter than `++` at 500.
1119    /// Without outer parens, `- "a{xx}b"` would parse as `(-"a") ++ xx ++ "b"`,
1120    /// applying the minus only to the first string piece. The outer parens
1121    /// ensure the minus applies to the entire interpolated string:
1122    ///   - ("a" ++ xx ++ "b")
1123    #[test]
1124    fn prefix_op_on_interpolated_string() {
1125        let _ = env_logger::builder().is_test(true).try_init();
1126        let arena = &mut Arena::try_with_default_opers().unwrap();
1127        let ts = parse(arena, Some(SAMPLE_DEFS), r#"- "a{xx}b" ."#);
1128        assert_eq!(ts.len(), 1);
1129        let s = format!("{}", ts[0].display(arena));
1130        // Minus applies to the whole interpolated string, not just "a"
1131        assert_eq!(s, r#"'-'('++'('++'("a", xx), "b"))"#);
1132    }
1133
1134    /// Prefix operator on a bare (non-interpolated) string also works —
1135    /// the outer `( STR )` unwraps so the prefix applies directly.
1136    #[test]
1137    fn prefix_op_on_bare_string() {
1138        let _ = env_logger::builder().is_test(true).try_init();
1139        let arena = &mut Arena::try_with_default_opers().unwrap();
1140        let ts = parse(arena, None, r#"- "hello" ."#);
1141        assert_eq!(ts.len(), 1);
1142        assert_eq!(format!("{}", ts[0].display(arena)), r#"'-'("hello")"#);
1143    }
1144
1145    #[test]
1146    #[should_panic]
1147    fn missing_ops() {
1148        let arena = &mut Arena::try_with_default_opers().unwrap();
1149        let _ts = parse(arena, None, "2 * 2 <= 5");
1150    }
1151
1152    #[test]
1153    fn more_complicated_term() {
1154        let _ = env_logger::builder().is_test(true).try_init();
1155        let arena = &mut Arena::try_with_default_opers().unwrap();
1156        let x = "(
1157[(1, 2) | unit] ++ foo(baz(1e-9)),
1158date{2025-09-30T18:24:22.154Z},
1159\"aaa{
11601 + 2
1161}bbb{
11623 * 4
1163}ccc\",
1164{player = {pos = {x = 0, y = 0}, health = 100}},
1165)";
1166        let ts = parse(arena, Some(SAMPLE_DEFS), x);
1167        let s = format!("{}", ts[0].display(arena));
1168        assert_eq!(ts.len(), 1);
1169        assert_eq!(
1170            s,
1171            "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = \\{pos = \\{x = 0, y = 0\\}, health = 100\\}\")"
1172        );
1173    }
1174
1175    /// Roundtrip test: parse term string → display → reparse → redisplay.
1176    /// Verifies that the term printer produces output that the parser can read
1177    /// back to produce the same term.
1178    ///
1179    /// Each vector entry is (term_string, expected_raw_value_or_None).
1180    /// - term_string: Aware eXpress syntax to parse
1181    /// - expected_raw: if Some, the expected raw string value (for string terms only)
1182    ///   if None, the display of the parsed term is used for roundtrip only
1183    #[test]
1184    fn string_roundtrip_vectors() {
1185        let _ = env_logger::builder().is_test(true).try_init();
1186
1187        // (term_syntax, expected_display, expected_raw_value_for_strings)
1188        // expected_display=None means "same as term_syntax"
1189        let vectors: Vec<(&str, Option<&str>, Option<&str>)> = vec![
1190            // ── Simple strings ──
1191            (r#""hello""#, None, Some("hello")),
1192            (r#""""#, None, Some("")),
1193            (r#""hello world""#, None, Some("hello world")),
1194            (r#""abc def ghi""#, None, Some("abc def ghi")),
1195
1196            // ── Backslash escapes ──
1197            (r#""a\\b""#, None, Some("a\\b")),
1198            (r#""a\"b""#, None, Some("a\"b")),
1199            (r#""line1\nline2""#, None, Some("line1\nline2")),
1200            (r#""col1\tcol2""#, None, Some("col1\tcol2")),
1201            (r#""ret\r""#, None, Some("ret\r")),
1202            // Named control char escapes — roundtrip through named form
1203            (r#""bell\a""#, None, Some("bell\x07")),
1204            (r#""bs\b""#, None, Some("bs\x08")),
1205            (r#""ff\f""#, None, Some("ff\x0C")),
1206            (r#""vt\v""#, None, Some("vt\x0B")),
1207            (r#""esc\e""#, None, Some("esc\x1B")),
1208            (r#""del\d""#, None, Some("del\x7F")),
1209            (r#""a\\b\\c""#, None, Some("a\\b\\c")),
1210            (r#""\\\\""#, None, Some("\\\\")),
1211            (r#""\\""#, None, Some("\\")),
1212
1213            // ── Brace escapes (string interpolation prevention) ──
1214            (r#""hello \{world\}""#, None, Some("hello {world}")),
1215            (r#""\{""#, None, Some("{")),
1216            (r#""\}""#, None, Some("}")),
1217            (r#""\{\}""#, None, Some("{}")),
1218            (r#""a\{b\}c""#, None, Some("a{b}c")),
1219            (r#""nested \{a \{b\} c\}""#, None, Some("nested {a {b} c}")),
1220            (r#""\\attrDef\{name\}\{value\}""#, None, Some("\\attrDef{name}{value}")),
1221            (r#""\\vDefine\{r_\}\{text\}""#, None, Some("\\vDefine{r_}{text}")),
1222
1223            // ── Hex escapes ──
1224            (r#""\x41""#, Some(r#""A""#), Some("A")),
1225            (r#""\x00""#, Some(r#""\x00""#), Some("\x00")),
1226            (r#""\x7E""#, Some(r#""~""#), Some("~")),
1227            // (r#""\xFF""#, None, None), // high byte — not valid UTF-8, skip
1228
1229            // ── Octal escapes ──
1230            (r#""\101""#, Some(r#""A""#), Some("A")),
1231            (r#""\0""#, Some(r#""\x00""#), Some("\x00")),
1232            (r#""\176""#, Some(r#""~""#), Some("~")),
1233
1234            // ── Control char escapes ──
1235            (r#""\^A""#, Some(r#""\x01""#), Some("\x01")),
1236            (r#""\^Z""#, Some(r#""\x1A""#), Some("\x1A")),
1237
1238            // ── Mixed escapes ──
1239            (r#""tab\there\nnewline""#, None, Some("tab\there\nnewline")),
1240            (r#""path\\to\\file\{name\}""#, None, Some("path\\to\\file{name}")),
1241            (r#""say \"hello\" \{world\}""#, None, Some("say \"hello\" {world}")),
1242
1243            // ── String interpolation (using {expr}) ──
1244            // "aaa{1+2}bbb" parses as '++'('++'("aaa", '+'(1, 2)), "bbb")
1245            // This is NOT a simple string, it's an expression
1246
1247            // ── Atoms (single-quoted) ──
1248            ("hello", Some("hello"), None),
1249            ("'hello world'", None, None),
1250            ("'it\\'s'", None, None),
1251
1252            // ── Numbers ──
1253            ("42", Some("42"), None),
1254            ("-7", Some("-7"), None),
1255            ("3.14", Some("3.14"), None),
1256            ("0", Some("0"), None),
1257            ("0.0", Some("0.0"), None),
1258
1259            // ── Lists ──
1260            ("[1, 2, 3]", Some("[1, 2, 3]"), None),
1261            ("[]", Some("nil"), None),
1262            (r#"["a", "b", "c"]"#, Some(r#"["a", "b", "c"]"#), None),
1263
1264            // ── Compound terms ──
1265            ("foo(1, 2)", Some("foo(1, 2)"), None),
1266            (r#"f("hello \{world\}")"#, Some(r#"f("hello \{world\}")"#), None),
1267
1268            // ── Tuples ──
1269            // {expr} at expression level is a raw string with balanced braces
1270            ("{1, 2}", Some(r#""1, 2""#), Some("1, 2")),
1271            ("{1, 2, 3}", Some(r#""1, 2, 3""#), Some("1, 2, 3")),
1272            ("{hello {world} end}", Some(r#""hello \{world\} end""#), Some("hello {world} end")),
1273
1274            // ── Edge cases ──
1275            (r#""  spaces  ""#, None, Some("  spaces  ")),
1276            (r#""\n\n\n""#, None, Some("\n\n\n")),
1277            (r#""\t\t""#, None, Some("\t\t")),
1278            (r#""abc\ndef\tghi""#, None, Some("abc\ndef\tghi")),
1279        ];
1280
1281        let arena = &mut Arena::try_with_default_opers().unwrap();
1282
1283        for (i, (term_str, expected_display, expected_raw)) in vectors.iter().enumerate() {
1284            // Parse the term string
1285            let terms = parse(arena, None, &format!("{} .", term_str));
1286            assert!(
1287                !terms.is_empty(),
1288                "vector {}: failed to parse: {}",
1289                i, term_str
1290            );
1291            let term = terms[0];
1292
1293            // Check raw value for string terms
1294            if let Some(raw) = expected_raw {
1295                match term.view(arena).unwrap() {
1296                    View::Str(s) => {
1297                        assert_eq!(
1298                            s, *raw,
1299                            "vector {}: raw value mismatch for {}\n  got:      {:?}\n  expected: {:?}",
1300                            i, term_str, s, raw
1301                        );
1302                    }
1303                    _ => {
1304                        // Not a string — skip raw check
1305                    }
1306                }
1307            }
1308
1309            // Display the term
1310            let displayed = format!("{}", term.display(arena));
1311            let expected_disp = expected_display.unwrap_or(term_str);
1312            assert_eq!(
1313                displayed, expected_disp,
1314                "vector {}: display mismatch for {}\n  got:      {}\n  expected: {}",
1315                i, term_str, displayed, expected_disp
1316            );
1317
1318            // Roundtrip: reparse the displayed string
1319            let terms2 = parse(arena, None, &format!("{} .", displayed));
1320            assert!(
1321                !terms2.is_empty(),
1322                "vector {}: failed to reparse displayed: {}",
1323                i, displayed
1324            );
1325            let term2 = terms2[0];
1326
1327            // Redisplay and compare
1328            let redisplayed = format!("{}", term2.display(arena));
1329            assert_eq!(
1330                redisplayed, displayed,
1331                "vector {}: roundtrip display mismatch\n  original:    {}\n  displayed:   {}\n  redisplayed: {}",
1332                i, term_str, displayed, redisplayed
1333            );
1334
1335            // Check raw value roundtrip for strings
1336            if let Some(raw) = expected_raw {
1337                match term2.view(arena).unwrap() {
1338                    View::Str(s) => {
1339                        assert_eq!(
1340                            s, *raw,
1341                            "vector {}: roundtrip raw value mismatch\n  got:      {:?}\n  expected: {:?}",
1342                            i, s, raw
1343                        );
1344                    }
1345                    _ => {}
1346                }
1347            }
1348        }
1349    }
1350}
arena_terms_parser/parser.rs

arena_terms_parser/
parser.rs