arena_terms_parser/parser.rs
1//! Parser for Prolog-like terms with operator definitions.
2//!
3//! This module defines the [`TermParser`], which implements a shift-reduce SLR(1) parser
4//! for Prolog-style terms tokenized by the [`TermLexer`]. It integrates with operator
5//! definitions ([`OperDefs`]) to correctly resolve shift/reduce conflicts according to declared
6//! precedence and associativity rules.
7//!
8//! The parser consumes tokens produced by [`TermLexer`] and uses a mutable
9//! [`Arena`] as shared context to construct arena-allocated [`Term`] values
10//! (from the [`arena_terms`] crate) representing atoms, numbers, compound terms,
11//! lists, tuples, and other structures.
12//!
13//! Generated parsing tables and rules are produced by **parlex-gen**’s [`aslr`] tool.
14//!
15//! [`TermParser`]: struct.TermParser
16//! [`TermLexer`]: crate::lexer::TermLexer
17//! [`TermToken`]: crate::lexer::TermToken
18//! [`OperDefs`]: crate::oper::OperDefs
19//! [`arena_terms`]: https://crates.io/crates/arena-terms
20//! [`aslr`]: https://crates.io/crates/parlex-gen
21
22use crate::encoding::Encoding;
23use crate::{TermLexer, TermToken, TokenID, Value};
24use arena_terms::{Arena, Assoc, Fixity, MAX_OPER_PREC, MIN_OPER_PREC, Term, View};
25use parlex::{
26 LexerStats, ParlexError, Parser, ParserAction, ParserData, ParserDriver, ParserStats, Token,
27};
28use parser_data::{AmbigID, ParData, ProdID, StateID};
29use std::marker::PhantomData;
30use try_next::TryNextWithContext;
31
32/// Includes the generated SLR parser tables and definitions.
33///
34/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
35/// during the build process. It defines the parsing automaton, rule metadata,
36/// and associated enum types used by the [`TermParser`].
37pub mod parser_data {
38 include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
39}
40
41/// A driver that defines semantic actions for the term parser.
42///
43/// The [`TermParserDriver`] type implements [`ParserDriver`] and acts as the
44/// bridge between the parser engine ([`Parser`]) and calculator-specific
45/// semantic logic.
46///
47/// It provides the behavior for grammar reductions and ambiguity resolution
48/// during parsing. Each reduction corresponds to a grammar production rule
49/// in [`ParData`], and is responsible for building a term.
50///
51/// # Type Parameters
52///
53/// - `I`: The input source (the lexer) that yields [`TermToken`]s. Must implement
54/// [`TryNextWithContext<Arena, Item = TermToken>`].
55///
56/// # Associated Types
57///
58/// - `ParserData = ParData`:
59/// Generated parser metadata containing grammar rules, production IDs,
60/// and ambiguity identifiers.
61/// - `Token = TermToken`:
62/// The token type produced by the lexer and consumed by this parser.
63/// - `Parser = Parser<I, Self, Arena>`:
64/// The parser engine parameterized by this driver and context.
65/// - `Error = TermParserError`:
66/// Unified error type propagated during parsing.
67/// - `Context = Arena`:
68/// Externally supplied context.
69///
70/// # Responsibilities
71///
72/// The parser driver performs calculator-specific actions:
73///
74/// - **`resolve_ambiguity`** — invoked when the grammar allows multiple valid
75/// interpretations of a token sequence. The driver chooses which parse path
76/// to follow by returning an appropriate [`ParserAction`].
77/// - **`reduce`** — executed when a grammar production completes. The driver
78/// can perform semantic actions such as arithmetic evaluation, updating the
79/// symbol table, or producing intermediate values.
80pub struct TermParserDriver<I> {
81 /// Marker to associate the driver with its input type `I`.
82 _marker: PhantomData<I>,
83
84 /// Stack of intermediate [`Term`] values used for reduction of term sequences.
85 ///
86 /// [`Value::Index`] refers to an entry in this stack, enabling grammar
87 /// actions to compose and reduce sequences of terms into higher-level
88 /// structures during parsing.
89 terms: Vec<Term>,
90}
91
92impl<I> ParserDriver for TermParserDriver<I>
93where
94 I: TryNextWithContext<Arena, LexerStats, Item = TermToken, Error: std::fmt::Display + 'static>,
95{
96 /// Parser metadata generated from the calculator grammar.
97 type ParserData = ParData;
98
99 /// Token type consumed by the parser.
100 type Token = TermToken;
101
102 /// Concrete parser engine type.
103 type Parser = Parser<I, Self, Self::Context>;
104
105 /// Context (symbol table or shared state).
106 type Context = Arena;
107
108 /// Resolves an ambiguity reported by the parser (e.g., shift/reduce).
109 ///
110 /// Given an ambiguity identifier and the lookahead token `tok2`, this method
111 /// chooses the appropriate parser action (shift or reduce) according to the
112 /// operator precedence and associativity rules.
113 ///
114 /// # Parameters
115 /// - `_arena`: Arena used to allocate or inspect terms.
116 /// - `ambig`: The generated ambiguity ID (`AmbigID`).
117 /// - `tok2`: The lookahead token at the ambiguity point.
118 ///
119 /// # Returns
120 /// The selected parser [`Action`] to disambiguate the current state.
121 ///
122 /// # Errors
123 /// Returns an error if the ambiguity cannot be resolved consistently.
124 ///
125 /// # Notes
126 /// This grammar contains only **Shift/Reduce** conflicts — cases where
127 /// the parser can either:
128 /// - **Reduce** using a completed production rule, or
129 /// - **Shift** the next incoming token (`tok2`).
130 ///
131 /// Other types of conflicts (such as **Reduce/Reduce**) are much more
132 /// difficult to handle programmatically and usually require modifying
133 /// the grammar itself to eliminate ambiguity.
134 fn resolve_ambiguity(
135 &mut self,
136 parser: &mut Self::Parser,
137 arena: &mut Self::Context,
138 ambig: <Self::ParserData as ParserData>::AmbigID,
139 tok2: &Self::Token,
140 ) -> Result<ParserAction<StateID, ProdID, AmbigID>, ParlexError> {
141 let ambigs = ParData::lookup_ambig(ambig);
142 let shift_action = ambigs[0];
143 let ParserAction::Shift(_) = shift_action else {
144 panic!("expected shift");
145 };
146 let reduce_action = ambigs[1];
147 let ParserAction::Reduce(prod_id) = reduce_action else {
148 panic!("expected reduce");
149 };
150
151 log::trace!(
152 "Conflict between reducing {:?} and shifting {:?}",
153 prod_id,
154 tok2
155 );
156
157 let (fixity1, tok1) = match prod_id {
158 ProdID::Infix1 => {
159 // Expr -> Expr atomOper Expr
160 (Fixity::Infix, parser.tokens_peek(1))
161 }
162 ProdID::Infix2 => {
163 // Expr -> Expr funcOper Seq ) Expr
164 (Fixity::Infix, parser.tokens_peek(3))
165 }
166 ProdID::Prefix1 => {
167 // Expr -> atomOper Expr
168 (Fixity::Prefix, parser.tokens_peek(1))
169 }
170 ProdID::Prefix2 => {
171 // Expr -> funcOper Seq ) Expr
172 (Fixity::Prefix, parser.tokens_peek(3))
173 }
174 ProdID::Postfix1 => {
175 // Expr -> Expr atomOper
176 (Fixity::Postfix, parser.tokens_peek(0))
177 }
178 ProdID::Postfix2 => {
179 // Expr -> Expr funcOper Seq )
180 (Fixity::Postfix, parser.tokens_peek(2))
181 }
182 _ => {
183 return Err(ParlexError {
184 message: format!(
185 "unexpected conflict: reduction of {:?} with shifting token {:?}",
186 prod_id, tok2
187 ),
188 span: tok2.span(),
189 });
190 }
191 };
192
193 let op_tab1 = arena.get_oper(tok1.op_tab_index);
194 let op_tab2 = arena.get_oper(tok2.op_tab_index);
195
196 assert!(op_tab1.is_oper());
197
198 if op_tab2.is_oper() {
199 let op_def1 = match op_tab1[fixity1] {
200 Some(ref op_def1) => op_def1,
201 None => return Ok(shift_action),
202 };
203
204 let prec1 = op_def1.prec;
205 let assoc1 = op_def1.assoc;
206
207 let min_prec2 = std::cmp::min(
208 op_tab2[Fixity::Infix]
209 .as_ref()
210 .map(|x| x.prec)
211 .unwrap_or(MAX_OPER_PREC),
212 op_tab2[Fixity::Postfix]
213 .as_ref()
214 .map(|x| x.prec)
215 .unwrap_or(MAX_OPER_PREC),
216 );
217 let max_prec2 = std::cmp::max(
218 op_tab2[Fixity::Infix]
219 .as_ref()
220 .map(|x| x.prec)
221 .unwrap_or(MIN_OPER_PREC),
222 op_tab2[Fixity::Postfix]
223 .as_ref()
224 .map(|x| x.prec)
225 .unwrap_or(MIN_OPER_PREC),
226 );
227
228 if prec1 > min_prec2 {
229 Ok(reduce_action)
230 } else if prec1 < max_prec2 {
231 Ok(shift_action)
232 } else if min_prec2 == max_prec2 && prec1 == min_prec2 {
233 if assoc1 == Assoc::None {
234 return Err(ParlexError {
235 message: format!(
236 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
237 tok1
238 ),
239 span: tok2.span(),
240 });
241 }
242 if op_tab2[Fixity::Infix]
243 .as_ref()
244 .is_some_and(|x| x.assoc == Assoc::None)
245 || op_tab2[Fixity::Postfix]
246 .as_ref()
247 .is_some_and(|x| x.assoc == Assoc::None)
248 {
249 return Err(ParlexError {
250 message: format!(
251 "precedence conflict: cannot chain non-associative operator {:?}; use parenthesis",
252 tok2
253 ),
254 span: tok2.span(),
255 });
256 }
257 if op_tab2[Fixity::Infix]
258 .as_ref()
259 .is_some_and(|x| x.assoc != assoc1)
260 || op_tab2[Fixity::Postfix]
261 .as_ref()
262 .is_some_and(|x| x.assoc != assoc1)
263 {
264 return Err(ParlexError {
265 message: format!(
266 "associativity conflict: cannot chain operators {:?} and {:?}; use parenthesis",
267 tok1, tok2
268 ),
269 span: tok2.span(),
270 });
271 } else {
272 if assoc1 == Assoc::Left {
273 Ok(reduce_action)
274 } else {
275 Ok(shift_action)
276 }
277 }
278 } else {
279 return Err(ParlexError {
280 message: format!(
281 "precedence conflict: cannot chain operators {:?} and {:?}; use parenthesis",
282 tok1, tok2
283 ),
284 span: tok2.span(),
285 });
286 }
287 } else {
288 Ok(shift_action)
289 }
290 }
291
292 /// Performs a grammar reduction for the given production rule.
293 ///
294 /// Applies the semantic action for `prod_id`, typically constructing or
295 /// normalizing an arena-backed [`Term`], and pushes the resulting token
296 /// onto the parser’s value stack.
297 ///
298 /// # Parameters
299 /// - `arena`: Arena used to allocate or inspect terms.
300 /// - `prod_id`: The production being reduced (`ProdID`).
301 /// - `token`: The lookahead token (normally not used).
302 ///
303 /// # Errors
304 /// Returns an error if the reduction fails due to arity mismatches,
305 /// invalid operator metadata, or inconsistent stack state.
306 fn reduce(
307 &mut self,
308 parser: &mut Self::Parser,
309 arena: &mut Self::Context,
310 prod_id: <Self::ParserData as ParserData>::ProdID,
311 token: &Self::Token,
312 ) -> Result<(), ParlexError> {
313 match prod_id {
314 ProdID::Start => {
315 // Accept - does not get reduced
316 unreachable!()
317 }
318
319 ProdID::Term1 => {
320 // Term -> Expr
321 let mut expr_tok = parser.tokens_pop();
322 expr_tok.token_id = TokenID::Term;
323 parser.tokens_push(expr_tok);
324 }
325
326 ProdID::Term2 => {
327 // Term -> Expr .
328 let dot = parser.tokens_pop();
329 let mut expr_tok = parser.tokens_pop();
330 expr_tok.token_id = TokenID::Term;
331 expr_tok.merge_span(&dot);
332 parser.tokens_push(expr_tok);
333 }
334
335 ProdID::Term3 => {
336 // Term ->
337 parser.tokens_push(TermToken::new(TokenID::Term, Value::None, token.span()));
338 }
339
340 ProdID::Term4 => {
341 // Term -> .
342 let dot = parser.tokens_pop();
343 parser.tokens_push(TermToken::new(TokenID::Term, Value::None, dot.span()));
344 }
345
346 ProdID::Func => {
347 // Expr -> func Seq )
348 let right_paren = parser.tokens_pop();
349 let index = usize::try_from(parser.tokens_pop().value)?;
350 let mut func_tok = parser.tokens_pop();
351 func_tok.merge_span(&right_paren);
352 let span = func_tok.span();
353 let op_tab_index = func_tok.op_tab_index;
354 let functor = Term::try_from(func_tok.value)?;
355
356 let vs = std::iter::once(&functor).chain(self.terms[index..].iter());
357 let term = arena
358 .funcv(vs)
359 .map_err(|e| ParlexError::from_err(e, span))?;
360 self.terms.truncate(index);
361
362 let term = arena
363 .normalize_term(term, Fixity::Fun, op_tab_index)
364 .map_err(|e| ParlexError::from_err(e, span))?;
365
366 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
367 }
368
369 ProdID::List => {
370 // Expr -> [ Seq ]
371 let right_brack_tok = parser.tokens_pop();
372 let seq_tok = parser.tokens_pop();
373 let mut left_brack_tok = parser.tokens_pop();
374 left_brack_tok.merge_span(&right_brack_tok);
375 let index = usize::try_from(seq_tok.value)?;
376
377 let term = arena.list(&self.terms[index..]);
378 self.terms.truncate(index);
379
380 parser.tokens_push(TermToken::new(
381 TokenID::Expr,
382 Value::Term(term),
383 left_brack_tok.span(),
384 ));
385 }
386
387 ProdID::Nil => {
388 // Expr -> [ ]
389 let right_brack_tok = parser.tokens_pop();
390 let mut left_brack_tok = parser.tokens_pop();
391 left_brack_tok.merge_span(&right_brack_tok);
392 parser.tokens_push(TermToken::new(
393 TokenID::Expr,
394 Value::Term(Term::NIL),
395 left_brack_tok.span(),
396 ));
397 }
398
399 ProdID::List2 => {
400 // Expr -> [ Seq | Expr ]
401 let right_brack_tok = parser.tokens_pop();
402 let tail = Term::try_from(parser.tokens_pop().value)?;
403 parser.tokens_pop();
404 let index = usize::try_from(parser.tokens_pop().value)?;
405 let mut left_brack_tok = parser.tokens_pop();
406 left_brack_tok.merge_span(&right_brack_tok);
407
408 let term = arena.listc(&self.terms[index..], tail);
409 self.terms.truncate(index);
410
411 parser.tokens_push(TermToken::new(
412 TokenID::Expr,
413 Value::Term(term),
414 left_brack_tok.span(),
415 ));
416 }
417
418 ProdID::Tuple => {
419 // Expr -> ( Seq )
420 let right_paren_tok = parser.tokens_pop();
421 let seq_tok = parser.tokens_pop();
422 let mut left_paren_tok = parser.tokens_pop();
423 left_paren_tok.merge_span(&right_paren_tok);
424
425 let index = usize::try_from(seq_tok.value)?;
426
427 // Arena terms parser does not currently support unary tuples.
428 // TODO: Consider adding explicit unary tuple syntax `(expr,)`.
429 let vs = &self.terms[index..];
430 let term = if vs.len() == 1 {
431 vs[0]
432 } else {
433 arena.tuple(vs)
434 };
435 self.terms.truncate(index);
436
437 parser.tokens_push(TermToken::new(
438 TokenID::Expr,
439 Value::Term(term),
440 left_paren_tok.span(),
441 ));
442 }
443
444 ProdID::Unit => {
445 // Expr -> ( )
446 let right_paren_tok = parser.tokens_pop();
447 let mut left_paren_tok = parser.tokens_pop();
448 left_paren_tok.merge_span(&right_paren_tok);
449
450 parser.tokens_push(TermToken::new(
451 TokenID::Expr,
452 Value::Term(Term::UNIT),
453 left_paren_tok.span(),
454 ));
455 }
456
457 ProdID::Var | ProdID::Int | ProdID::Real | ProdID::Date | ProdID::Str | ProdID::Bin => {
458 // Expr -> xxx
459 let mut tok = parser.tokens_pop();
460 tok.token_id = TokenID::Expr;
461 parser.tokens_push(tok);
462 }
463
464 ProdID::Atom => {
465 // Expr -> atom
466 let atom_tok = parser.tokens_pop();
467 let span = atom_tok.span();
468 let op_tab_index = atom_tok.op_tab_index;
469
470 let atom = Term::try_from(atom_tok.value)?;
471
472 let term = arena
473 .normalize_term(atom, Fixity::Fun, op_tab_index)
474 .map_err(|e| ParlexError::from_err(e, span))?;
475
476 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
477 }
478
479 ProdID::Infix1 => {
480 // Expr -> Expr atomOper Expr
481 let expr2_tok = parser.tokens_pop();
482 let oper_tok = parser.tokens_pop();
483 let mut expr1_tok = parser.tokens_pop();
484 expr1_tok.merge_span(&expr2_tok);
485 let span = expr1_tok.span();
486 let op_tab_index = oper_tok.op_tab_index;
487
488 let expr2 = Term::try_from(expr2_tok.value)?;
489 let oper = Term::try_from(oper_tok.value)?;
490 let expr1 = Term::try_from(expr1_tok.value)?;
491
492 let term = arena
493 .funcv([oper, expr1, expr2])
494 .map_err(|e| ParlexError::from_err(e, span))?;
495 let term = arena
496 .normalize_term(term, Fixity::Infix, op_tab_index)
497 .map_err(|e| ParlexError::from_err(e, span))?;
498
499 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
500 }
501
502 ProdID::Infix2 => {
503 // Expr -> Expr func Seq ) Expr
504 let expr2_tok = parser.tokens_pop();
505 parser.tokens_pop();
506 let index = usize::try_from(parser.tokens_pop().value)?;
507 let oper_tok = parser.tokens_pop();
508 let mut expr1_tok = parser.tokens_pop();
509 expr1_tok.merge_span(&expr2_tok);
510
511 let span = expr1_tok.span();
512 let op_tab_index = oper_tok.op_tab_index;
513
514 let expr2 = Term::try_from(expr2_tok.value)?;
515 let oper = Term::try_from(oper_tok.value)?;
516 let expr1 = Term::try_from(expr1_tok.value)?;
517
518 let xs = [oper, expr1, expr2];
519 let vs = xs.iter().chain(self.terms[index..].iter());
520 let term = arena
521 .funcv(vs)
522 .map_err(|e| ParlexError::from_err(e, span))?;
523 self.terms.truncate(index);
524
525 let term = arena
526 .normalize_term(term, Fixity::Infix, op_tab_index)
527 .map_err(|e| ParlexError::from_err(e, span))?;
528
529 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
530 }
531
532 ProdID::Prefix1 => {
533 // Expr -> atom Expr
534 let expr1_tok = parser.tokens_pop();
535 let mut oper_tok = parser.tokens_pop();
536 oper_tok.merge_span(&expr1_tok);
537
538 let span = oper_tok.span();
539 let op_tab_index = oper_tok.op_tab_index;
540
541 let expr1 = Term::try_from(expr1_tok.value)?;
542 let oper = Term::try_from(oper_tok.value)?;
543
544 let term = match oper
545 .view(arena)
546 .map_err(|e| ParlexError::from_err(e, span))?
547 {
548 // Arena terms parser currently gives special treatment to unary minus
549 // on integer and real literals (it directly negates them).
550 // TODO: Consider handling minus at the lexical level.
551 View::Atom(s)
552 if s == "-"
553 && matches!(
554 expr1
555 .view(arena)
556 .map_err(|e| ParlexError::from_err(e, span))?,
557 View::Int(_) | View::Real(_)
558 ) =>
559 {
560 match expr1
561 .view(arena)
562 .map_err(|e| ParlexError::from_err(e, span))?
563 {
564 View::Int(i) => arena.int(-i),
565 View::Real(r) => arena.real(-r),
566 _ => unreachable!(),
567 }
568 }
569 _ => {
570 let term = arena
571 .funcv([oper, expr1])
572 .map_err(|e| ParlexError::from_err(e, span))?;
573 arena
574 .normalize_term(term, Fixity::Prefix, op_tab_index)
575 .map_err(|e| ParlexError::from_err(e, span))?
576 }
577 };
578
579 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
580 }
581
582 ProdID::Prefix2 => {
583 // Expr -> func Seq ) Expr
584 let expr1_tok = parser.tokens_pop();
585 parser.tokens_pop();
586 let index = usize::try_from(parser.tokens_pop().value)?;
587 let mut oper_tok = parser.tokens_pop();
588 oper_tok.merge_span(&expr1_tok);
589
590 let span = oper_tok.span();
591 let op_tab_index = oper_tok.op_tab_index;
592
593 let oper = Term::try_from(oper_tok.value)?;
594 let expr1 = Term::try_from(expr1_tok.value)?;
595
596 let xs = [oper, expr1];
597 let vs = xs.iter().chain(self.terms[index..].iter());
598 let term = arena
599 .funcv(vs)
600 .map_err(|e| ParlexError::from_err(e, span))?;
601 self.terms.truncate(index);
602
603 let term = arena
604 .normalize_term(term, Fixity::Prefix, op_tab_index)
605 .map_err(|e| ParlexError::from_err(e, span))?;
606
607 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
608 }
609
610 ProdID::Postfix1 => {
611 // Expr -> Expr atomOper
612 let oper_tok = parser.tokens_pop();
613 let mut expr1_tok = parser.tokens_pop();
614 expr1_tok.merge_span(&oper_tok);
615
616 let span = expr1_tok.span();
617 let op_tab_index = oper_tok.op_tab_index;
618
619 let oper = Term::try_from(oper_tok.value)?;
620 let expr1 = Term::try_from(expr1_tok.value)?;
621
622 let term = arena
623 .funcv([oper, expr1])
624 .map_err(|e| ParlexError::from_err(e, span))?;
625 let term = arena
626 .normalize_term(term, Fixity::Postfix, op_tab_index)
627 .map_err(|e| ParlexError::from_err(e, span))?;
628
629 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
630 }
631
632 ProdID::Postfix2 => {
633 // Expr -> Expr func Seq )
634 let right_paren_tok = parser.tokens_pop();
635 let index = usize::try_from(parser.tokens_pop().value)?;
636 let oper_tok = parser.tokens_pop();
637 let mut expr1_tok = parser.tokens_pop();
638 expr1_tok.merge_span(&right_paren_tok);
639
640 let span = expr1_tok.span();
641 let op_tab_index = oper_tok.op_tab_index;
642
643 let oper = Term::try_from(oper_tok.value)?;
644 let expr1 = Term::try_from(expr1_tok.value)?;
645
646 let xs = [oper, expr1];
647 let vs = xs.iter().chain(self.terms[index..].iter());
648 let term = arena
649 .funcv(vs)
650 .map_err(|e| ParlexError::from_err(e, span))?;
651 self.terms.truncate(index);
652
653 let term = arena
654 .normalize_term(term, Fixity::Postfix, op_tab_index)
655 .map_err(|e| ParlexError::from_err(e, span))?;
656
657 parser.tokens_push(TermToken::new(TokenID::Expr, Value::Term(term), span));
658 }
659
660 ProdID::Seq1 => {
661 // Seq -> BareSeq
662 let mut bare_seq_tok = parser.tokens_pop();
663 bare_seq_tok.token_id = TokenID::Seq;
664 parser.tokens_push(bare_seq_tok);
665 }
666
667 ProdID::Seq2 => {
668 // Seq -> BareSeq ,
669 parser.tokens_pop();
670 let mut bare_seq_tok = parser.tokens_pop();
671
672 bare_seq_tok.token_id = TokenID::Seq;
673 parser.tokens_push(bare_seq_tok);
674 }
675
676 ProdID::BareSeq1 => {
677 // BareSeq -> Expr
678 let expr_tok = parser.tokens_pop();
679 let span = expr_tok.span();
680 let expr = Term::try_from(expr_tok.value)?;
681
682 let index = self.terms.len();
683 self.terms.push(expr);
684
685 parser.tokens_push(TermToken::new(TokenID::BareSeq, Value::Index(index), span));
686 }
687
688 ProdID::BareSeq2 => {
689 // BareSeq -> BareSeq , Expr
690 let expr_tok = parser.tokens_pop();
691 let expr = Term::try_from(expr_tok.value)?;
692 parser.tokens_pop();
693
694 self.terms.push(expr);
695 }
696 }
697 Ok(())
698 }
699}
700
701/// Prolog-like term parser with operator precedence and associativity handling.
702///
703/// The [`TermTokenParser`] drives the parsing of Prolog-style terms using the
704/// [`parlex`] SLR(1) core library. It builds upon the [`TermLexer`] for tokenization
705/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
706///
707/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
708/// allowing user-defined or default operators to control how expressions are
709/// grouped and nested according to their **fixity**, **precedence**, and
710/// **associativity**.
711///
712/// /// # Input / Output
713///
714/// - **Input**: any byte stream `I` implementing
715/// [`TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>`].
716/// - **Output**: completed parsing units as [`TermToken`] values.
717///
718/// # End Tokens and Multiple Sentences
719///
720/// The underlying lexer typically emits an explicit [`TokenID::End`] token at
721/// the end of a *parsing unit* (end of “sentence” or expression). The parser
722/// uses this to finalize and emit one result. If the input contains multiple
723/// independent sentences, you will receive multiple results — one per `End` —
724/// and `None` only after all input is consumed.
725///
726/// # Empty Statements
727///
728/// The terms grammar also accepts an *empty* term, which is returned
729/// as a token with [`Value::None`].
730/// This occurs, for example, when the last statement in the input is terminated
731/// by a semicolon (`.`) but followed by no further expression. In that case:
732///
733/// 1. The parser first emits the token for the preceding completed term.
734/// 2. It then emits an additional token representing the *empty* term
735/// (`Value::None`).
736/// 3. Finally, it returns `None`, indicating the end of the input stream.
737///
738/// This design allows the parser to fully reflect the structure of the input.
739///
740/// # Errors
741///
742/// All failures are surfaced through a composed
743/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
744/// - `I::Error` — errors from the input source,
745/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
746/// symbol-table issues).
747///
748/// # Example
749///
750/// ```rust
751/// # use arena_terms_parser::{Encoding, TermToken, TermTokenParser, TokenID, Value};
752/// # use arena_terms::Arena;
753/// # use try_next::{IterInput, TryNextWithContext};
754/// let mut arena = Arena::try_with_default_opers().unwrap();
755/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
756/// let mut parser = TermTokenParser::try_new(input, Encoding::Utf8).unwrap();
757/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
758/// assert_eq!(vs.len(), 4);
759/// ```
760///
761/// [`Arena`]: arena_terms::Arena
762/// [`Term`]: arena_terms::Term
763/// [`OperDefs`]: crate::OperDefs
764/// [`TermLexer`]: crate::TermLexer
765/// [`TermToken`]: crate::TermToken
766pub struct TermTokenParser<I>
767where
768 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
769{
770 parser: Parser<TermLexer<I>, TermParserDriver<TermLexer<I>>, Arena>,
771}
772
773impl<I> TermTokenParser<I>
774where
775 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
776{
777 /// Creates a new [`TermTokenParser`] for the given input stream and operator definitions.
778 ///
779 /// # Parameters
780 /// - `input`: A fused iterator over bytes to be parsed.
781 /// - `arena`: A term arena, used to initialized default operator defs.
782 ///
783 /// # Returns
784 /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
785 ///
786 /// # Errors
787 /// Returns an error if the lexer context cannot be initialized
788 /// or if the generated parser tables fail to load.
789 pub fn try_new(input: I, encoding: Encoding) -> Result<Self, ParlexError> {
790 let lexer = TermLexer::try_new(input, encoding)?;
791 let driver = TermParserDriver {
792 _marker: PhantomData,
793 terms: Vec::new(),
794 };
795 let parser = Parser::new(lexer, driver);
796 Ok(Self { parser })
797 }
798}
799
800/// Defines or extends operator definitions directly from a Prolog-like
801/// `op/6` term list read from an input source.
802///
803/// This allows dynamic addition of new operator fixities and precedence
804/// rules during parsing.
805///
806/// # Parameters
807/// - `arena`: Arena allocator used for constructing term structures.
808/// - `defs_input`: Input byte iterator yielding the operator definition terms.
809/// - `encoding`: Input encoding of the definitions stream.
810///
811/// # Errors
812/// Returns an error if parsing the operator term list fails or produces
813/// an invalid operator specification.
814pub fn define_opers<I>(arena: &mut Arena, defs_input: I, encoding: Encoding) -> Result<(), ParlexError>
815where
816 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
817{
818 let mut defs_parser = TermParser::try_new(defs_input, encoding)?;
819 while let Some(term) = defs_parser.try_next_with_context(arena)? {
820 arena
821 .define_opers(term)
822 .map_err(|e| ParlexError::from_err(e, None))?;
823 }
824 Ok(())
825}
826
827impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermTokenParser<I>
828where
829 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
830{
831 /// Tokens produced by this lexer.
832 type Item = TermToken;
833
834 /// Unified error type.
835 type Error = ParlexError;
836
837 /// Advances the parser and returns the next token, or `None` at end of input.
838 ///
839 /// The provided `context` (an [`Arena`]) may be mutated by rule
840 /// actions (for example, to intern terms). This method is fallible;
841 /// both input and lexical errors are converted into [`Self::Error`].
842 ///
843 /// # End of Input
844 ///
845 /// When the lexer reaches the end of the input stream, it will typically
846 /// emit a final [`TokenID::End`] token before returning `None`.
847 ///
848 /// This explicit *End* token is expected by the **Parlex parser** to
849 /// signal successful termination of a complete parsing unit.
850 /// Consumers should treat this token as a logical *end-of-sentence* or
851 /// *end-of-expression* marker, depending on the grammar.
852 ///
853 /// If the input contains **multiple independent sentences or expressions**,
854 /// the lexer may emit multiple `End` tokens—one after each completed unit.
855 /// In such cases, the parser can restart or resume parsing after each `End`
856 /// to produce multiple parse results from a single input stream.
857 ///
858 /// Once all input has been consumed, the lexer returns `None`.
859 fn try_next_with_context(
860 &mut self,
861 context: &mut Arena,
862 ) -> Result<Option<TermToken>, ParlexError> {
863 self.parser.try_next_with_context(context)
864 }
865
866 fn stats(&self) -> (LexerStats, ParserStats) {
867 self.parser.stats()
868 }
869}
870
871/// Prolog-like term parser with operator precedence and associativity handling.
872///
873/// The [`TermParser`] drives the parsing of Prolog-style terms using the
874/// [`parlex`] SLR(1) core library. It builds upon the [`TermTokenParser`] for tokenization
875/// and produces [`Term`] values stored in an [`Arena`] for efficient allocation.
876///
877/// Operator definitions are resolved dynamically through an [`OperDefs`] table,
878/// allowing user-defined or default operators to control how expressions are
879/// grouped and nested according to their **fixity**, **precedence**, and
880/// **associativity**.
881///
882/// /// # Input / Output
883///
884/// - **Input**: any byte stream `I` implementing
885/// [`TryNextWithContext<Arena, Item = u8>`].
886/// - **Output**: completed parsing units as [`TermToken`] values.
887///
888/// # End Tokens and Multiple Sentences
889///
890/// The underlying parser emits an explicit tokens at
891/// the end of a *parsing unit* (end of “sentence” or expression). The parser
892/// uses this to finalize and emit one result. If the input contains multiple
893/// independent sentences, you will receive multiple results — one per `End` —
894/// and `None` only after all input is consumed.
895///
896/// # Empty Statements
897///
898/// The terms grammar also accepts an *empty* term, which is returned
899/// as a token with [`Value::None`].
900/// This occurs, for example, when the last statement in the input is terminated
901/// by a semicolon (`.`) but followed by no further expression. In that case:
902///
903/// 1. The parser first emits the token for the preceding completed term.
904/// 2. It then emits an additional token representing the *empty* term
905/// (`Value::None`).
906/// 3. Finally, it returns `None`, indicating the end of the input stream.
907///
908/// This design allows the parser to fully reflect the structure of the input.
909///
910/// # Errors
911///
912/// All failures are surfaced through a composed
913/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
914/// - `I::Error` — errors from the input source,
915/// - [`TermParserError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
916/// symbol-table issues).
917///
918/// # Example
919///
920/// ```rust
921/// # use arena_terms_parser::{Encoding, TermToken, TermParser, TokenID, Value};
922/// # use arena_terms::Arena;
923/// # use try_next::{IterInput, TryNextWithContext};
924/// let mut arena = Arena::try_with_default_opers().unwrap();
925/// let input = IterInput::from("hello = 1 .\n foo =\n [5, 3, 2].\n (world, hello, 10).\n\n1000".bytes());
926/// let mut parser = TermParser::try_new(input, Encoding::Utf8).unwrap();
927/// let vs = parser.try_collect_with_context(&mut arena).unwrap();
928/// assert_eq!(vs.len(), 4);
929/// ```
930///
931/// [`Arena`]: arena_terms::Arena
932/// [`Term`]: arena_terms::Term
933/// [`OperDefs`]: crate::OperDefs
934/// [`TermLexer`]: crate::TermLexer
935/// [`TermToken`]: crate::TermToken
936pub struct TermParser<I>
937where
938 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
939{
940 pub(crate) parser: TermTokenParser<I>,
941}
942
943impl<I> TermParser<I>
944where
945 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
946{
947 /// Creates a new [`TermParser`] for the given input stream and operator definitions.
948 ///
949 /// # Parameters
950 /// - `input`: A fused iterator over bytes to be parsed.
951 /// - `arena`: A term arena, used to initialized default operator defs.
952 ///
953 /// # Returns
954 /// A fully initialized [`TermParser`] ready to parse Prolog-like terms.
955 ///
956 /// # Errors
957 /// Returns an error if the lexer context cannot be initialized
958 /// or if the generated parser tables fail to load.
959 pub fn try_new(input: I, encoding: Encoding) -> Result<Self, ParlexError> {
960 let parser: TermTokenParser<I> = TermTokenParser::try_new(input, encoding)?;
961 Ok(Self { parser })
962 }
963}
964
965impl<I> TryNextWithContext<Arena, (LexerStats, ParserStats)> for TermParser<I>
966where
967 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
968{
969 /// Tokens produced by this lexer.
970 type Item = Term;
971
972 /// Unified error type.
973 type Error = ParlexError;
974
975 /// Advances the parser and returns the next token, or `None` at end of input.
976 ///
977 /// The provided `context` (an [`Arena`]) may be mutated by rule
978 /// actions (for example, to intern terms). This method is fallible;
979 /// both input and lexical errors are converted into [`Self::Error`].
980 ///
981 /// # End of Input
982 ///
983 /// When the lexer reaches the end of the input stream, it will typically
984 /// emit a final [`TokenID::End`] token before returning `None`.
985 ///
986 /// This explicit *End* token is expected by the **Parlex parser** to
987 /// signal successful termination of a complete parsing unit.
988 /// Consumers should treat this token as a logical *end-of-sentence* or
989 /// *end-of-expression* marker, depending on the grammar.
990 ///
991 /// If the input contains **multiple independent sentences or expressions**,
992 /// the lexer may emit multiple `End` tokens—one after each completed unit.
993 /// In such cases, the parser can restart or resume parsing after each `End`
994 /// to produce multiple parse results from a single input stream.
995 ///
996 /// Once all input has been consumed, the lexer returns `None`.
997 fn try_next_with_context(&mut self, context: &mut Arena) -> Result<Option<Term>, ParlexError> {
998 while let Some(TermToken { value, .. }) = self.parser.try_next_with_context(context)? {
999 match value {
1000 Value::Term(term) => return Ok(Some(term)),
1001 Value::None => continue,
1002 Value::Index(_) => {
1003 return Err(ParlexError {
1004 message: format!("index token not expected"),
1005 span: None,
1006 });
1007 }
1008 }
1009 }
1010 Ok(None)
1011 }
1012
1013 fn stats(&self) -> (LexerStats, ParserStats) {
1014 self.parser.stats()
1015 }
1016}
1017
1018/// Unit tests for the [`TermParser`] implementation.
1019#[cfg(test)]
1020mod tests {
1021 use super::*;
1022 use try_next::IterInput;
1023
1024 const SAMPLE_DEFS: &str = r#"[
1025op(==(x,y),infix,350,none),
1026op(!=(x,y),infix,350,none),
1027op( <(x,y),infix,350,none),
1028op( >(x,y),infix,350,none),
1029op(<=(x,y),infix,350,none),
1030op(>=(x,y),infix,350,none),
1031op('+'(x,y),infix,380,left),
1032op('-'(x,y),infix,380,left),
1033op('-'(x),postfix,900,left, rename_to=some('postfix_minus')),
1034op('*'(x,y),infix,400,left),
1035op('/'(x,y),infix,400,left),
1036op('+'(x),prefix,800,right),
1037op(and(x,y),infix,300,left),
1038op(or(x,y),infix,250,left),
1039op(not(x),prefix,800,right),
1040]"#;
1041
1042 fn parse(arena: &mut Arena, defs: Option<&str>, s: &str) -> Vec<Term> {
1043 let input = IterInput::from(s.bytes());
1044 let mut parser = TermParser::try_new(input, Encoding::Utf8).expect("cannot create parser");
1045 if let Some(defs) = defs {
1046 let defs_input = IterInput::from(defs.bytes());
1047 define_opers(arena, defs_input, Encoding::Utf8).expect("cannot define ops");
1048 }
1049 let ts = parser
1050 .try_collect_with_context(arena)
1051 .expect("parser error");
1052 dbg!(parser.stats());
1053 ts
1054 }
1055
1056 #[test]
1057 fn one_term() {
1058 let _ = env_logger::builder().is_test(true).try_init();
1059 let arena = &mut Arena::try_with_default_opers().unwrap();
1060 let ts = parse(arena, Some(SAMPLE_DEFS), " . . 2 * 2 <= 5 . .");
1061 dbg!(&ts);
1062 let s = format!("{}", ts[0].display(arena));
1063 dbg!(&s);
1064 assert_eq!(ts.len(), 1);
1065 assert_eq!(s, "'<='('*'(2, 2), 5)");
1066 }
1067
1068 /// String interpolation with a surrounding tighter operator.
1069 ///
1070 /// `+` at precedence 380 binds tighter than `++` at 500 (default).
1071 /// Both legacy and arena-terms emit outer parens around the interpolated string:
1072 /// "a{x}b" + 1 → ("a" ++ (x) ++ "b") + 1
1073 ///
1074 /// Without outer parens, precedence resolution would still produce the same
1075 /// parse tree here (because `+` binds tighter, it gets reduced first inside
1076 /// the `++` chain), but the outer parens ensure correctness in edge cases
1077 /// with mixed-associativity same-precedence operators.
1078 #[test]
1079 fn string_interpolation_outer_paren_isolation() {
1080 let _ = env_logger::builder().is_test(true).try_init();
1081 let arena = &mut Arena::try_with_default_opers().unwrap();
1082 let ts = parse(arena, Some(SAMPLE_DEFS), r#""a{xx}b" + 1 ."#);
1083 assert_eq!(ts.len(), 1);
1084 let s = format!("{}", ts[0].display(arena));
1085 // '+'('++'('++'("a", xx), "b"), 1)
1086 assert_eq!(s, r#"'+'('++'('++'("a", xx), "b"), 1)"#);
1087 }
1088
1089 /// A bare non-interpolated string `"hello"` is wrapped as `( "hello" )` by
1090 /// the lexer, but the parser unwraps unary tuples so the resulting term is
1091 /// just `"hello"` (no surrounding structure).
1092 #[test]
1093 fn bare_string_unwraps_to_plain_string() {
1094 let _ = env_logger::builder().is_test(true).try_init();
1095 let arena = &mut Arena::try_with_default_opers().unwrap();
1096 let ts = parse(arena, None, r#""hello" ."#);
1097 assert_eq!(ts.len(), 1);
1098 assert_eq!(format!("{}", ts[0].display(arena)), r#""hello""#);
1099 }
1100
1101 /// Bare strings used as function arguments: `foo("hello", "world")`.
1102 /// Despite the lexer emitting outer parens around each string, they unwrap
1103 /// correctly as distinct arguments.
1104 #[test]
1105 fn bare_strings_as_func_args() {
1106 let _ = env_logger::builder().is_test(true).try_init();
1107 let arena = &mut Arena::try_with_default_opers().unwrap();
1108 let ts = parse(arena, None, r#"foo("hello", "world") ."#);
1109 assert_eq!(ts.len(), 1);
1110 assert_eq!(
1111 format!("{}", ts[0].display(arena)),
1112 r#"foo("hello", "world")"#
1113 );
1114 }
1115
1116 /// Prefix operator applied to an interpolated string.
1117 ///
1118 /// `-` at prec 800 (prefix, right-assoc) binds tighter than `++` at 500.
1119 /// Without outer parens, `- "a{xx}b"` would parse as `(-"a") ++ xx ++ "b"`,
1120 /// applying the minus only to the first string piece. The outer parens
1121 /// ensure the minus applies to the entire interpolated string:
1122 /// - ("a" ++ xx ++ "b")
1123 #[test]
1124 fn prefix_op_on_interpolated_string() {
1125 let _ = env_logger::builder().is_test(true).try_init();
1126 let arena = &mut Arena::try_with_default_opers().unwrap();
1127 let ts = parse(arena, Some(SAMPLE_DEFS), r#"- "a{xx}b" ."#);
1128 assert_eq!(ts.len(), 1);
1129 let s = format!("{}", ts[0].display(arena));
1130 // Minus applies to the whole interpolated string, not just "a"
1131 assert_eq!(s, r#"'-'('++'('++'("a", xx), "b"))"#);
1132 }
1133
1134 /// Prefix operator on a bare (non-interpolated) string also works —
1135 /// the outer `( STR )` unwraps so the prefix applies directly.
1136 #[test]
1137 fn prefix_op_on_bare_string() {
1138 let _ = env_logger::builder().is_test(true).try_init();
1139 let arena = &mut Arena::try_with_default_opers().unwrap();
1140 let ts = parse(arena, None, r#"- "hello" ."#);
1141 assert_eq!(ts.len(), 1);
1142 assert_eq!(format!("{}", ts[0].display(arena)), r#"'-'("hello")"#);
1143 }
1144
1145 #[test]
1146 #[should_panic]
1147 fn missing_ops() {
1148 let arena = &mut Arena::try_with_default_opers().unwrap();
1149 let _ts = parse(arena, None, "2 * 2 <= 5");
1150 }
1151
1152 #[test]
1153 fn more_complicated_term() {
1154 let _ = env_logger::builder().is_test(true).try_init();
1155 let arena = &mut Arena::try_with_default_opers().unwrap();
1156 let x = "(
1157[(1, 2) | unit] ++ foo(baz(1e-9)),
1158date{2025-09-30T18:24:22.154Z},
1159\"aaa{
11601 + 2
1161}bbb{
11623 * 4
1163}ccc\",
1164{player = {pos = {x = 0, y = 0}, health = 100}},
1165)";
1166 let ts = parse(arena, Some(SAMPLE_DEFS), x);
1167 let s = format!("{}", ts[0].display(arena));
1168 assert_eq!(ts.len(), 1);
1169 assert_eq!(
1170 s,
1171 "('++'([(1, 2) | unit], foo(baz(0.000000001))), date{2025-09-30T18:24:22.154+00:00}, '++'('++'('++'('++'(\"aaa\", '+'(1, 2)), \"bbb\"), '*'(3, 4)), \"ccc\"), \"player = \\{pos = \\{x = 0, y = 0\\}, health = 100\\}\")"
1172 );
1173 }
1174
1175 /// Roundtrip test: parse term string → display → reparse → redisplay.
1176 /// Verifies that the term printer produces output that the parser can read
1177 /// back to produce the same term.
1178 ///
1179 /// Each vector entry is (term_string, expected_raw_value_or_None).
1180 /// - term_string: Aware eXpress syntax to parse
1181 /// - expected_raw: if Some, the expected raw string value (for string terms only)
1182 /// if None, the display of the parsed term is used for roundtrip only
1183 #[test]
1184 fn string_roundtrip_vectors() {
1185 let _ = env_logger::builder().is_test(true).try_init();
1186
1187 // (term_syntax, expected_display, expected_raw_value_for_strings)
1188 // expected_display=None means "same as term_syntax"
1189 let vectors: Vec<(&str, Option<&str>, Option<&str>)> = vec![
1190 // ── Simple strings ──
1191 (r#""hello""#, None, Some("hello")),
1192 (r#""""#, None, Some("")),
1193 (r#""hello world""#, None, Some("hello world")),
1194 (r#""abc def ghi""#, None, Some("abc def ghi")),
1195
1196 // ── Backslash escapes ──
1197 (r#""a\\b""#, None, Some("a\\b")),
1198 (r#""a\"b""#, None, Some("a\"b")),
1199 (r#""line1\nline2""#, None, Some("line1\nline2")),
1200 (r#""col1\tcol2""#, None, Some("col1\tcol2")),
1201 (r#""ret\r""#, None, Some("ret\r")),
1202 // Named control char escapes — roundtrip through named form
1203 (r#""bell\a""#, None, Some("bell\x07")),
1204 (r#""bs\b""#, None, Some("bs\x08")),
1205 (r#""ff\f""#, None, Some("ff\x0C")),
1206 (r#""vt\v""#, None, Some("vt\x0B")),
1207 (r#""esc\e""#, None, Some("esc\x1B")),
1208 (r#""del\d""#, None, Some("del\x7F")),
1209 (r#""a\\b\\c""#, None, Some("a\\b\\c")),
1210 (r#""\\\\""#, None, Some("\\\\")),
1211 (r#""\\""#, None, Some("\\")),
1212
1213 // ── Brace escapes (string interpolation prevention) ──
1214 (r#""hello \{world\}""#, None, Some("hello {world}")),
1215 (r#""\{""#, None, Some("{")),
1216 (r#""\}""#, None, Some("}")),
1217 (r#""\{\}""#, None, Some("{}")),
1218 (r#""a\{b\}c""#, None, Some("a{b}c")),
1219 (r#""nested \{a \{b\} c\}""#, None, Some("nested {a {b} c}")),
1220 (r#""\\attrDef\{name\}\{value\}""#, None, Some("\\attrDef{name}{value}")),
1221 (r#""\\vDefine\{r_\}\{text\}""#, None, Some("\\vDefine{r_}{text}")),
1222
1223 // ── Hex escapes ──
1224 (r#""\x41""#, Some(r#""A""#), Some("A")),
1225 (r#""\x00""#, Some(r#""\x00""#), Some("\x00")),
1226 (r#""\x7E""#, Some(r#""~""#), Some("~")),
1227 // (r#""\xFF""#, None, None), // high byte — not valid UTF-8, skip
1228
1229 // ── Octal escapes ──
1230 (r#""\101""#, Some(r#""A""#), Some("A")),
1231 (r#""\0""#, Some(r#""\x00""#), Some("\x00")),
1232 (r#""\176""#, Some(r#""~""#), Some("~")),
1233
1234 // ── Control char escapes ──
1235 (r#""\^A""#, Some(r#""\x01""#), Some("\x01")),
1236 (r#""\^Z""#, Some(r#""\x1A""#), Some("\x1A")),
1237
1238 // ── Mixed escapes ──
1239 (r#""tab\there\nnewline""#, None, Some("tab\there\nnewline")),
1240 (r#""path\\to\\file\{name\}""#, None, Some("path\\to\\file{name}")),
1241 (r#""say \"hello\" \{world\}""#, None, Some("say \"hello\" {world}")),
1242
1243 // ── String interpolation (using {expr}) ──
1244 // "aaa{1+2}bbb" parses as '++'('++'("aaa", '+'(1, 2)), "bbb")
1245 // This is NOT a simple string, it's an expression
1246
1247 // ── Atoms (single-quoted) ──
1248 ("hello", Some("hello"), None),
1249 ("'hello world'", None, None),
1250 ("'it\\'s'", None, None),
1251
1252 // ── Numbers ──
1253 ("42", Some("42"), None),
1254 ("-7", Some("-7"), None),
1255 ("3.14", Some("3.14"), None),
1256 ("0", Some("0"), None),
1257 ("0.0", Some("0.0"), None),
1258
1259 // ── Lists ──
1260 ("[1, 2, 3]", Some("[1, 2, 3]"), None),
1261 ("[]", Some("nil"), None),
1262 (r#"["a", "b", "c"]"#, Some(r#"["a", "b", "c"]"#), None),
1263
1264 // ── Compound terms ──
1265 ("foo(1, 2)", Some("foo(1, 2)"), None),
1266 (r#"f("hello \{world\}")"#, Some(r#"f("hello \{world\}")"#), None),
1267
1268 // ── Tuples ──
1269 // {expr} at expression level is a raw string with balanced braces
1270 ("{1, 2}", Some(r#""1, 2""#), Some("1, 2")),
1271 ("{1, 2, 3}", Some(r#""1, 2, 3""#), Some("1, 2, 3")),
1272 ("{hello {world} end}", Some(r#""hello \{world\} end""#), Some("hello {world} end")),
1273
1274 // ── Edge cases ──
1275 (r#"" spaces ""#, None, Some(" spaces ")),
1276 (r#""\n\n\n""#, None, Some("\n\n\n")),
1277 (r#""\t\t""#, None, Some("\t\t")),
1278 (r#""abc\ndef\tghi""#, None, Some("abc\ndef\tghi")),
1279 ];
1280
1281 let arena = &mut Arena::try_with_default_opers().unwrap();
1282
1283 for (i, (term_str, expected_display, expected_raw)) in vectors.iter().enumerate() {
1284 // Parse the term string
1285 let terms = parse(arena, None, &format!("{} .", term_str));
1286 assert!(
1287 !terms.is_empty(),
1288 "vector {}: failed to parse: {}",
1289 i, term_str
1290 );
1291 let term = terms[0];
1292
1293 // Check raw value for string terms
1294 if let Some(raw) = expected_raw {
1295 match term.view(arena).unwrap() {
1296 View::Str(s) => {
1297 assert_eq!(
1298 s, *raw,
1299 "vector {}: raw value mismatch for {}\n got: {:?}\n expected: {:?}",
1300 i, term_str, s, raw
1301 );
1302 }
1303 _ => {
1304 // Not a string — skip raw check
1305 }
1306 }
1307 }
1308
1309 // Display the term
1310 let displayed = format!("{}", term.display(arena));
1311 let expected_disp = expected_display.unwrap_or(term_str);
1312 assert_eq!(
1313 displayed, expected_disp,
1314 "vector {}: display mismatch for {}\n got: {}\n expected: {}",
1315 i, term_str, displayed, expected_disp
1316 );
1317
1318 // Roundtrip: reparse the displayed string
1319 let terms2 = parse(arena, None, &format!("{} .", displayed));
1320 assert!(
1321 !terms2.is_empty(),
1322 "vector {}: failed to reparse displayed: {}",
1323 i, displayed
1324 );
1325 let term2 = terms2[0];
1326
1327 // Redisplay and compare
1328 let redisplayed = format!("{}", term2.display(arena));
1329 assert_eq!(
1330 redisplayed, displayed,
1331 "vector {}: roundtrip display mismatch\n original: {}\n displayed: {}\n redisplayed: {}",
1332 i, term_str, displayed, redisplayed
1333 );
1334
1335 // Check raw value roundtrip for strings
1336 if let Some(raw) = expected_raw {
1337 match term2.view(arena).unwrap() {
1338 View::Str(s) => {
1339 assert_eq!(
1340 s, *raw,
1341 "vector {}: roundtrip raw value mismatch\n got: {:?}\n expected: {:?}",
1342 i, s, raw
1343 );
1344 }
1345 _ => {}
1346 }
1347 }
1348 }
1349 }
1350}