parlex_calc/calc.rs
1/// Includes the generated lexer definition produced by **`parlex-gen`**’s
2/// [`alex`](https://crates.io/crates/parlex-gen) tool.
3///
4/// The included file (`lexer_data.rs`) contains the DFA tables, mode definitions,
5/// and rule implementations required for the [`TermLexer`]. It is generated at
6/// build time by the project’s `build.rs` script.
7pub mod lexer_data {
8 include!(concat!(env!("OUT_DIR"), "/lexer_data.rs"));
9}
10
11/// Includes the generated SLR parser tables and definitions.
12///
13/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
14/// during the build process. It defines the parsing automaton, rule metadata,
15/// and associated enum types used by the [`TermParser`].
16pub mod parser_data {
17 include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
18}
19
20use crate::{SymTab, SymTabError};
21use lexer_data::{LexData, Mode, Rule};
22use parlex::{
23 Lexer, LexerData, LexerDriver, LexerError, Parser, ParserAction, ParserData, ParserDriver,
24 ParserError, Token,
25};
26use parser_data::{AmbigID, ParData, ProdID, StateID, TokenID};
27use std::convert::Infallible;
28use std::iter::Fuse;
29use std::marker::PhantomData;
30use thiserror::Error;
31use try_next::TryNextWithContext;
32
33/// Represents all possible errors that can occur within the calculator.
34///
35/// The [`CalcError`] enum aggregates various error sources encountered
36/// during lexical analysis, parsing, and symbol-table operations.
37/// It implements [`std::error::Error`] via [`thiserror::Error`], allowing
38/// ergonomic error propagation with the `?` operator.
39///
40/// Each variant wraps a more specific underlying error type.
41///
42/// # Variants
43///
44/// - [`CalcError::ParseInt`]:
45/// Returned when a numeric literal cannot be parsed into an integer,
46/// typically originating from [`std::num::ParseIntError`].
47///
48/// - [`CalcError::FromUtf8`]:
49/// Returned when the input contains invalid UTF-8 byte sequences and
50/// cannot be decoded into a [`String`].
51///
52/// - [`CalcError::SymTab`]:
53/// Wraps an error from the symbol table subsystem ([`SymTabError`]).
54///
55/// # Example
56/// ```rust
57/// # use parlex_calc::{CalcError, SymTabError};
58/// # use std::str::FromStr;
59/// // Example of a parse error bubbling up as CalcError::ParseInt
60/// let result: Result<i64, CalcError> = i64::from_str("notanumber").map_err(CalcError::from);
61/// assert!(matches!(result.unwrap_err(), CalcError::ParseInt(_)));
62///
63/// // Example of a symbol-table error propagation
64/// let sym_err = SymTabError::InvalidIndex { index: 10, len: 3 };
65/// let err = CalcError::from(sym_err);
66/// assert!(matches!(err, CalcError::SymTab(_)));
67/// ```
68#[derive(Debug, Error)]
69pub enum CalcError {
70 /// An integer literal could not be parsed from its string representation.
71 ///
72 /// Typically originates from [`std::num::ParseIntError`].
73 #[error("unable to parse {0:?}")]
74 ParseInt(#[from] std::num::ParseIntError),
75
76 /// Failed to decode UTF-8 bytes from input.
77 ///
78 /// Wraps a [`std::string::FromUtf8Error`].
79 #[error("utf8 error {0:?}")]
80 FromUtf8(#[from] std::string::FromUtf8Error),
81
82 /// A symbol-table operation failed.
83 ///
84 /// Wraps a [`SymTabError`] produced by symbol-table lookups or updates.
85 #[error("symtab error {0:?}")]
86 SymTab(#[from] SymTabError),
87}
88
89/// An input adapter that wraps any iterator and provides a `TryNextWithContext`
90/// interface, automatically fusing the iterator so it never yields items
91/// after returning `None` once.
92///
93/// # Type Parameters
94///
95/// - `I`: The underlying iterator type. It can be any `Iterator`.
96/// - `C`: The *context* type, which is passed by mutable reference to each
97/// `try_next_with_context` call.
98pub struct IterInput<I, C>
99where
100 I: Iterator,
101{
102 /// The underlying fused iterator.
103 iter: Fuse<I>,
104
105 /// Marker to make the type invariant in `C` and tie its lifetime logically
106 /// to the context without owning it.
107 _marker: PhantomData<fn(C)>,
108}
109
110impl<I, C> IterInput<I, C>
111where
112 I: Iterator,
113{
114 /// Creates a new `IterInput` from any iterator.
115 ///
116 /// The iterator is automatically fused internally, so that once it returns
117 /// `None`, all further `next()` calls will also return `None`.
118 pub fn from(iter: I) -> Self {
119 Self {
120 iter: iter.fuse(),
121 _marker: PhantomData,
122 }
123 }
124}
125
126impl<I, C> TryNextWithContext for IterInput<I, C>
127where
128 I: Iterator,
129{
130 type Item = I::Item;
131 type Error = Infallible;
132 type Context = C;
133
134 #[inline]
135 fn try_next_with_context(
136 &mut self,
137 _context: &mut Self::Context,
138 ) -> Result<Option<Self::Item>, Self::Error> {
139 Ok(self.iter.next())
140 }
141}
142
143/// Represents the value carried by a lexical token.
144///
145/// Each token in the lexer may carry optional data depending on its kind.
146/// For example, identifiers and numbers store extra information such as
147/// an index into the symbol table or a literal numeric value.
148///
149/// This type is used inside a [`CalcToken`] struct alongside a
150/// [`TokenID`] indicating what category the token belongs to.
151///
152/// # Variants
153///
154/// - [`TokenValue::None`]:
155/// Used for tokens that do not carry any extra data (e.g., punctuation, operators).
156///
157/// - [`TokenValue::Ident(usize)`]:
158/// Stores the **symbol table index** of an identifier.
159/// The `usize` refers to an entry in a [`SymTab`].
160///
161/// - [`TokenValue::Number(i64)`]:
162/// Represents an integer literal value.
163///
164/// # Example
165/// ```rust
166/// # use parlex_calc::TokenValue;
167///
168/// let ident = TokenValue::Ident(0);
169/// let number = TokenValue::Number(42);
170/// let punct = TokenValue::None;
171///
172/// match number {
173/// TokenValue::Number(n) => assert_eq!(n, 42),
174/// _ => unreachable!(),
175/// }
176/// ```
177#[derive(Debug, Clone)]
178pub enum TokenValue {
179 /// No associated data (for symbols or keywords).
180 None,
181
182 /// Identifier token with an index into the symbol table.
183 Ident(usize),
184
185 /// Integer literal token.
186 Number(i64),
187}
188
189/// A concrete implementation of a lexical token used by the calculator.
190///
191/// The [`CalcToken`] type represents a single lexical unit (identifier,
192/// numeric literal, or operator) recognized by the calculator’s lexer.
193/// It implements the generic [`Token`] trait, providing access to its
194/// token identifier and source position (line number).
195///
196/// This structure ties together:
197/// - The token’s identifier (via [`TokenID`]),
198/// - The token’s **associated data** (via [`TokenValue`]),
199/// - The **line number** where it occurs in the input stream.
200///
201/// # Trait Implementation
202///
203/// Implements the [`Token`] trait, providing:
204/// - [`token_id()`](#method.token_id): returns the token’s [`TokenID`].
205/// - [`line_no()`](#method.line_no): returns the source line number.
206///
207/// # Fields
208///
209/// - [`token_id`](#structfield.token_id):
210/// The category of token (identifier, number, operator, etc.).
211///
212/// - [`value`](#structfield.value):
213/// The token’s associated value — for instance, a symbol-table index
214/// or literal number — stored as a [`TokenValue`].
215///
216/// - [`line_no`](#structfield.line_no):
217/// The 1-based line number where the token appears in the source.
218///
219/// # Example
220/// ```rust
221/// # use parlex_calc::{CalcToken, TokenID, TokenValue};
222/// # use parlex::Token;
223/// let token = CalcToken {
224/// token_id: TokenID::Number,
225/// value: TokenValue::Number(99),
226/// line_no: 3,
227/// };
228///
229/// assert_eq!(token.token_id(), TokenID::Number);
230/// assert_eq!(token.line_no(), 3);
231/// ```
232#[derive(Debug, Clone)]
233pub struct CalcToken {
234 /// The token’s kind or category (e.g. identifier, operator, number).
235 pub token_id: TokenID,
236 /// The associated value for the token, if applicable.
237 pub value: TokenValue,
238 /// The line number in the input source where the token occurs.
239 pub line_no: usize,
240}
241
242impl Token for CalcToken {
243 /// The associated identifier type used to classify this token.
244 type TokenID = TokenID;
245
246 /// Returns the token’s kind identifier.
247 fn token_id(&self) -> Self::TokenID {
248 self.token_id
249 }
250
251 /// Returns the line number where the token appears.
252 fn line_no(&self) -> usize {
253 self.line_no
254 }
255}
256
257/// A stateful driver for the calculator lexer.
258///
259/// `CalcLexerDriver` orchestrates rule actions emitted by [`Lexer`], keeping
260/// minimal state needed during lexing (e.g., nested comment depth).
261///
262/// The driver is generic over an input source `I` that yields bytes (`u8`)
263/// and supports contextual access to a symbol table via
264/// [`TryNextWithContext<Item = u8, Context = SymTab>`].
265///
266/// # State
267///
268/// - [`comment_level`](#structfield.comment_level):
269/// Tracks the current nesting level of block comments. Increment on
270/// comment open (e.g. `/*`) and decrement on comment close (e.g. `*/`).
271/// Implementations typically skip emitting tokens while `comment_level > 0`.
272///
273/// - [`_marker`](#structfield._marker):
274/// A `PhantomData<I>` marker to bind the generic `I` without storing a value.
275///
276/// # Associated Types (via `LexerDriver`)
277///
278/// - `LexerData = LexData` — Tokenization metadata and rule IDs produced by your
279/// lexer generator.
280/// - `Token = CalcToken` — The concrete token type emitted by the lexer.
281/// - `Lexer = Lexer<I, Self>` — The concrete lexer over input `I` driven by this type.
282/// - `Error = CalcError` — Unified error type used during lexing.
283/// - `Context = I::Context` — External context available to actions (here: `SymTab`).
284///
285/// # Action Handling
286///
287/// The [`action`](Self::action) method is invoked whenever the underlying DFA
288/// recognizes a rule.
289///
290/// # Errors
291/// This implementation return:
292/// - `CalcError::ParseInt` when a numeric literal can’t be parsed,
293/// - `CalcError::FromUtf8` for invalid UTF-8 in identifiers/strings,
294/// - `CalcError::SymTab` for symbol table failures (e.g., invalid index).
295pub struct CalcLexerDriver<I> {
296 /// Current nesting depth of block comments.
297 ///
298 /// - Increment on comment open (e.g., `/*`).
299 /// - Decrement on comment close (e.g., `*/`).
300 /// - Ensure it never goes negative; reaching EOF with a positive value
301 /// should typically raise a lexical error.
302 comment_level: i32,
303
304 /// Marker to bind the driver to the input type `I` without storing it.
305 _marker: PhantomData<I>,
306}
307
308impl<I> LexerDriver for CalcLexerDriver<I>
309where
310 I: TryNextWithContext<Item = u8, Context = SymTab>,
311{
312 /// Rule identifiers and metadata produced by the lexer.
313 type LexerData = LexData;
314
315 /// Concrete token type emitted by the driver.
316 type Token = CalcToken;
317
318 /// Concrete lexer type parameterized by input and driver.
319 type Lexer = Lexer<I, Self>;
320
321 /// Unified error type returned by actions.
322 type Error = CalcError;
323
324 /// Externally supplied context available to actions (symbol table).
325 type Context = I::Context;
326
327 /// Handles a single lexer rule match.
328 ///
329 /// Called by the lexer when a DFA rule in [`Lexer`] fires. The implementation
330 /// typically inspects `rule`, reads the matched span from `lexer`, and either:
331 ///
332 /// - emits a [`CalcToken`] (e.g., identifiers, numbers, operators),
333 /// - updates internal state (e.g., `comment_level`),
334 /// - or returns an error if the match is invalid.
335 ///
336 /// Implementations may also use `context` (a [`SymTab`]) to intern identifiers
337 /// and store indices in [`TokenValue::Ident`].
338 ///
339 /// # Errors
340 /// Propagates any lexical, parsing, UTF-8 decoding, or symbol-table errors as
341 /// [`CalcError`].
342 fn action(
343 &mut self,
344 lexer: &mut Self::Lexer,
345 context: &mut Self::Context,
346 rule: <Self::LexerData as LexerData>::LexerRule,
347 ) -> Result<(), Self::Error> {
348 match rule {
349 Rule::Empty => {
350 unreachable!()
351 }
352 Rule::Ident => {
353 // <Expr> (?:[a-z_][a-z_A-Z0-9]*)
354 let index = context.intern(lexer.take_str()?);
355 lexer.yield_token(CalcToken {
356 token_id: TokenID::Ident,
357 line_no: lexer.line_no(),
358 value: TokenValue::Ident(index),
359 });
360 }
361 Rule::Number => {
362 // <Expr> (?:[0-9]+)
363 let s = lexer.take_str()?;
364 lexer.yield_token(CalcToken {
365 token_id: TokenID::Number,
366 line_no: lexer.line_no(),
367 value: TokenValue::Number(s.as_str().parse::<i64>()?),
368 });
369 }
370 Rule::Semicolon => {
371 // <Expr> ;
372 lexer.yield_token(CalcToken {
373 token_id: TokenID::End,
374 line_no: lexer.line_no(),
375 value: TokenValue::None,
376 });
377 }
378 Rule::Equals => {
379 // <Expr> =
380 lexer.yield_token(CalcToken {
381 token_id: TokenID::Equals,
382 line_no: lexer.line_no(),
383 value: TokenValue::None,
384 });
385 }
386 Rule::Plus => {
387 // <Expr> \+
388 lexer.yield_token(CalcToken {
389 token_id: TokenID::Plus,
390 line_no: lexer.line_no(),
391 value: TokenValue::None,
392 });
393 }
394 Rule::Minus => {
395 // <Expr> -
396 lexer.yield_token(CalcToken {
397 token_id: TokenID::Minus,
398 line_no: lexer.line_no(),
399 value: TokenValue::None,
400 });
401 }
402 Rule::Asterisk => {
403 // <Expr> \*
404 lexer.yield_token(CalcToken {
405 token_id: TokenID::Asterisk,
406 line_no: lexer.line_no(),
407 value: TokenValue::None,
408 });
409 }
410 Rule::Slash => {
411 // <Expr> /
412 lexer.yield_token(CalcToken {
413 token_id: TokenID::Slash,
414 line_no: lexer.line_no(),
415 value: TokenValue::None,
416 });
417 }
418 Rule::LeftParen => {
419 // <Expr> \(
420 lexer.yield_token(CalcToken {
421 token_id: TokenID::LeftParen,
422 line_no: lexer.line_no(),
423 value: TokenValue::None,
424 });
425 }
426 Rule::RightParen => {
427 // <Expr> \)
428 lexer.yield_token(CalcToken {
429 token_id: TokenID::RightParen,
430 line_no: lexer.line_no(),
431 value: TokenValue::None,
432 });
433 }
434 Rule::CommentBegin => {
435 // <Expr,Comment> /\*
436 lexer.begin(Mode::Comment);
437 self.comment_level += 1;
438 }
439 Rule::CommentEnd => {
440 // <Comment> \*/
441 self.comment_level -= 1;
442 if self.comment_level == 0 {
443 lexer.begin(Mode::Expr);
444 }
445 }
446 Rule::CommentChar => { // <Comment> .+
447 }
448 Rule::NewLine => {
449 // <*> (?:\n)
450 lexer.inc_line_no();
451 }
452 Rule::WhiteSpace => { // <Expr> (?:[ \t])+
453 }
454 Rule::Error => {
455 // <*> .
456 lexer.yield_token(CalcToken {
457 token_id: TokenID::Error,
458 line_no: lexer.line_no(),
459 value: TokenValue::None,
460 });
461 }
462 Rule::End => {
463 if lexer.mode() == Mode::Expr {
464 lexer.yield_token(CalcToken {
465 token_id: TokenID::End,
466 line_no: lexer.line_no(),
467 value: TokenValue::None,
468 });
469 } else {
470 lexer.yield_token(CalcToken {
471 token_id: TokenID::Error,
472 line_no: lexer.line_no(),
473 value: TokenValue::None,
474 });
475 }
476 }
477 }
478 Ok(())
479 }
480}
481
482/// The calculator lexer.
483///
484/// `CalcLexer<I>` adapts a byte-oriented input stream `I` (that supports
485/// contextual access to a [`SymTab`]) into an iterator-like interface that
486/// yields [`CalcToken`]s. Internally, it owns a lower-level [`Lexer`] driven by
487/// [`CalcLexerDriver`], which handles rule actions (e.g., interning identifiers,
488/// parsing numbers, skipping comments/whitespace).
489///
490/// The generic parameter `I` must implement
491/// [`TryNextWithContext<Item = u8, Context = SymTab>`], allowing the lexer to
492/// pull bytes and mutate/read the external symbol table while tokenizing.
493///
494/// # Output
495///
496/// Each successful step returns a [`CalcToken`], containing:
497/// - the token kind ([`TokenID`]),
498/// - optional associated value ([`TokenValue`], e.g., `Ident` index or `Number`),
499/// - and the source line (`line_no`) for diagnostics.
500///
501/// # Errors
502///
503/// Methods return a [`LexerError<I::Error, CalcError>`], where:
504/// - `I::Error` is any error produced by the underlying input,
505/// - [`CalcError`] covers lexical/parsing/UTF-8/symbol-table errors.
506///
507/// # Example
508///
509/// ```rust
510/// # use parlex_calc::{CalcToken, CalcLexer, IterInput, SymTab, TokenID, TokenValue};
511/// # use try_next::TryNextWithContext;
512/// let mut symtab = SymTab::new();
513/// let input = IterInput::from("hello\n +\n world\n\n123".bytes());
514/// let mut lexer = CalcLexer::try_new(input).unwrap();
515/// let vs = lexer.try_collect_with_context(&mut symtab).unwrap();
516/// assert_eq!(vs.len(), 5);
517/// assert_eq!(symtab.len(), 2);
518/// ```
519pub struct CalcLexer<I>
520where
521 I: TryNextWithContext<Item = u8, Context = SymTab>,
522{
523 /// The underlying DFA/engine that drives tokenization, parameterized by the
524 /// input `I` and the driver that executes rule actions.
525 lexer: Lexer<I, CalcLexerDriver<I>>,
526}
527
528impl<I> CalcLexer<I>
529where
530 I: TryNextWithContext<Item = u8, Context = SymTab>,
531{
532 /// Constructs a new calculator lexer over the provided input stream.
533 ///
534 /// This initializes an internal [`Lexer`] with a [`CalcLexerDriver`] that
535 /// performs rule actions such as:
536 /// - interning identifiers into the provided [`SymTab`] (via context),
537 /// - converting matched byte slices into numbers/idents,
538 /// - tracking line numbers and comment nesting.
539 ///
540 /// # Errors
541 ///
542 /// Returns a [`LexerError`] if the lexer cannot be constructed from the
543 /// given input (rare, but may occur if the input source fails during setup).
544 pub fn try_new(
545 input: I,
546 ) -> Result<
547 Self,
548 LexerError<<I as TryNextWithContext>::Error, <CalcLexerDriver<I> as LexerDriver>::Error>,
549 > {
550 let driver = CalcLexerDriver {
551 comment_level: 0,
552 _marker: PhantomData,
553 };
554 let lexer = Lexer::try_new(input, driver)?;
555 Ok(Self { lexer })
556 }
557}
558impl<I> TryNextWithContext for CalcLexer<I>
559where
560 I: TryNextWithContext<Item = u8, Context = SymTab>,
561{
562 /// Tokens produced by this lexer.
563 type Item = CalcToken;
564
565 /// Unified error type.
566 type Error =
567 LexerError<<I as TryNextWithContext>::Error, <CalcLexerDriver<I> as LexerDriver>::Error>;
568
569 /// External context available while lexing (a [`SymTab`]).
570 type Context = I::Context;
571
572 /// Advances the lexer and returns the next token, or `None` at end of input.
573 ///
574 /// The provided `context` (typically a [`SymTab`]) may be mutated by rule
575 /// actions (for example, to intern identifiers). This method is fallible;
576 /// both input and lexical errors are converted into [`Self::Error`].
577 ///
578 /// # End of Input
579 ///
580 /// When the lexer reaches the end of the input stream, it will typically
581 /// emit a final [`TokenID::End`] token before returning `None`.
582 ///
583 /// This explicit *End* token is expected by the **Parlex parser** to
584 /// signal successful termination of a complete parsing unit.
585 /// Consumers should treat this token as a logical *end-of-sentence* or
586 /// *end-of-expression* marker, depending on the grammar.
587 ///
588 /// If the input contains **multiple independent sentences or expressions**,
589 /// the lexer may emit multiple `End` tokens—one after each completed unit.
590 /// In such cases, the parser can restart or resume parsing after each `End`
591 /// to produce multiple parse results from a single input stream.
592 ///
593 /// Once all input has been consumed, the lexer returns `None`.
594 fn try_next_with_context(
595 &mut self,
596 context: &mut I::Context,
597 ) -> Result<Option<CalcToken>, <Self as TryNextWithContext>::Error> {
598 self.lexer.try_next_with_context(context)
599 }
600}
601
602/// A driver that defines semantic actions for the calculator parser.
603///
604/// The [`CalcParserDriver`] type implements [`ParserDriver`] and acts as the
605/// bridge between the parser engine ([`Parser`]) and calculator-specific
606/// semantic logic.
607///
608/// It provides the behavior for grammar reductions and ambiguity resolution
609/// during parsing. Each reduction corresponds to a grammar production rule
610/// in [`ParData`], and is responsible for building or evaluating partial
611/// results (e.g., computing arithmetic expressions, populating the symbol
612/// table), constructing AST, etc.
613///
614/// # Type Parameters
615///
616/// - `I`: The input source (the lexer) that yields [`CalcToken`]s and maintains a
617/// contextual [`SymTab`]. Must implement
618/// [`TryNextWithContext<Item = CalcToken, Context = SymTab>`].
619///
620/// # Associated Types
621///
622/// - `ParserData = ParData`:
623/// Generated parser metadata containing grammar rules, production IDs,
624/// and ambiguity identifiers.
625/// - `Token = CalcToken`:
626/// The token type produced by the lexer and consumed by this parser.
627/// - `Parser = Parser<I, Self>`:
628/// The parser engine parameterized by this driver.
629/// - `Error = CalcError`:
630/// Unified error type propagated during parsing.
631/// - `Context = I::Context`:
632/// Externally supplied context, such as a [`SymTab`].
633///
634/// # Responsibilities
635///
636/// The parser driver performs calculator-specific actions:
637///
638/// - **`resolve_ambiguity`** — invoked when the grammar allows multiple valid
639/// interpretations of a token sequence. The driver chooses which parse path
640/// to follow by returning an appropriate [`ParserAction`].
641/// - **`reduce`** — executed when a grammar production completes. The driver
642/// can perform semantic actions such as arithmetic evaluation, updating the
643/// symbol table, or producing intermediate values.
644///
645/// # Example
646/// ```rust,ignore
647/// let mut driver = CalcParserDriver::<MyLexer>::default();
648/// let mut parser = Parser::<MyLexer, _>::new(lexer, driver);
649///
650/// let mut symtab = SymTab::new();
651/// while let Some(result) = parser.try_next_with_context(&mut symtab)? {
652/// println!("Parsed expression result: {result:?}");
653/// }
654/// ```
655///
656/// # Notes
657///
658/// - The driver may be stateless (`_marker` only), or store intermediate
659/// evaluation state if needed.
660/// - Ambiguities can be resolved dynamically based on the current parse state
661/// or the next lookahead token.
662/// - The `reduce` method corresponds to grammar rules such as:
663/// ```text
664/// Expr → Expr '+' Term
665/// Expr → Term
666/// Term → NUMBER
667/// ```
668/// allowing the driver to fold numerical operations or emit results or
669/// result nodes.
670pub struct CalcParserDriver<I> {
671 /// Marker to associate the driver with its input type `I`.
672 _marker: PhantomData<I>,
673}
674
675impl<I> ParserDriver for CalcParserDriver<I>
676where
677 I: TryNextWithContext<Item = CalcToken, Context = SymTab>,
678{
679 /// Parser metadata generated from the calculator grammar.
680 type ParserData = ParData;
681
682 /// Token type consumed by the parser.
683 type Token = CalcToken;
684
685 /// Concrete parser engine type.
686 type Parser = Parser<I, Self>;
687
688 /// Error type for semantic or parsing failures.
689 type Error = CalcError;
690
691 /// Context (symbol table or shared state).
692 type Context = I::Context;
693
694 /// Resolves grammar ambiguities when multiple parse actions are valid.
695 ///
696 /// The driver can inspect the parser conflict (`ambig`) and the upcoming
697 /// token (`_tok2`) to decide which parse branch to follow. This method
698 /// returns the selected [`ParserAction`].
699 ///
700 /// By default, most calculator grammars are unambiguous, so this method
701 /// may simply return a default action or be left unimplemented.
702 ///
703 /// # Shift/Reduce Conflicts
704 ///
705 /// In practice, this hook is primarily used to resolve **Shift/Reduce**
706 /// conflicts — cases where the parser can either:
707 /// - **Reduce** using a completed production rule, or
708 /// - **Shift** the next incoming token (`tok2`).
709 ///
710 /// Other types of conflicts (such as **Reduce/Reduce**) are much more
711 /// difficult to handle programmatically and usually require modifying
712 /// the grammar itself to eliminate ambiguity.
713 ///
714 /// In a typical arithmetic grammar, you can use operator precedence and
715 /// associativity to decide whether to shift or reduce. For example:
716 ///
717 /// ```text
718 /// Expr -> Expr '+' Expr
719 /// ```
720 ///
721 /// When the incoming token is `*`, the driver can compare the precedence
722 /// of `'+'` (lower) vs. `'*'` (higher) and decide to **Shift**, allowing
723 /// the parser to defer reduction until the higher-precedence operation
724 /// (`*`) is parsed first.
725 ///
726 /// This strategy ensures that the resulting parse tree respects the
727 /// intended operator precedence and associativity rules.
728 fn resolve_ambiguity(
729 &mut self,
730 _parser: &mut Self::Parser,
731 _context: &mut Self::Context,
732 ambig: <Self::ParserData as ParserData>::AmbigID,
733 _tok2: &Self::Token,
734 ) -> Result<ParserAction<StateID, ProdID, AmbigID>, Self::Error> {
735 Ok(ParData::lookup_ambig(ambig)[1]) // Reduce
736 }
737
738 /// Performs semantic reduction for a completed grammar production.
739 ///
740 /// This is the main hook for calculator logic: each time the parser
741 /// recognizes a rule (identified by `prod_id`), the driver can evaluate
742 /// or construct the corresponding result, possibly updating the context.
743 ///
744 /// For example, when reducing:
745 /// ```text
746 /// Expr -> Expr '+' Expr
747 /// ```
748 /// the driver may pop the right-hand values from the parser stack, perform
749 /// addition, and push the result back.
750 fn reduce(
751 &mut self,
752 parser: &mut Self::Parser,
753 context: &mut Self::Context,
754 prod_id: <Self::ParserData as ParserData>::ProdID,
755 token: &Self::Token,
756 ) -> Result<(), Self::Error> {
757 match prod_id {
758 ProdID::Start => {
759 // Start -> Seq
760 // Accept - does not get reduced
761 unreachable!()
762 }
763 ProdID::Stat1 => {
764 // Stat ->
765 parser.tokens_push(CalcToken {
766 token_id: TokenID::Stat,
767 line_no: token.line_no(),
768 value: TokenValue::None,
769 });
770 }
771 ProdID::Stat2 => {
772 // Stat -> Expr
773 let mut expr = parser.tokens_pop();
774 expr.token_id = TokenID::Stat;
775 parser.tokens_push(expr);
776 }
777 ProdID::Stat3 => {
778 // Stat -> ident = Expr
779 let mut expr = parser.tokens_pop();
780 let TokenValue::Number(value) = expr.value else {
781 unreachable!()
782 };
783 parser.tokens_pop();
784 let ident = parser.tokens_pop();
785 let TokenValue::Ident(index) = ident.value else {
786 unreachable!()
787 };
788 context.set(index, value)?;
789 expr.token_id = TokenID::Stat;
790 parser.tokens_push(expr);
791 }
792 ProdID::Expr1 => {
793 // Expr -> number
794 let mut number = parser.tokens_pop();
795 number.token_id = TokenID::Expr;
796 parser.tokens_push(number);
797 }
798 ProdID::Expr2 => {
799 // Expr -> ident
800 let mut tok = parser.tokens_pop();
801 tok.token_id = TokenID::Expr;
802 let TokenValue::Ident(index) = tok.value else {
803 unreachable!()
804 };
805 tok.value = TokenValue::Number(context.get(index)?);
806 parser.tokens_push(tok);
807 }
808 ProdID::Expr3 => {
809 // Expr -> Expr + Expr
810 let expr2 = parser.tokens_pop();
811 parser.tokens_pop();
812 let mut expr1 = parser.tokens_pop();
813 let TokenValue::Number(value1) = expr1.value else {
814 unreachable!()
815 };
816 let TokenValue::Number(value2) = expr2.value else {
817 unreachable!()
818 };
819 expr1.value = TokenValue::Number(value1 + value2);
820 parser.tokens_push(expr1);
821 }
822 ProdID::Expr4 => {
823 // Expr -> Expr - Expr
824 let expr2 = parser.tokens_pop();
825 parser.tokens_pop();
826 let mut expr1 = parser.tokens_pop();
827 let TokenValue::Number(value1) = expr1.value else {
828 unreachable!()
829 };
830 let TokenValue::Number(value2) = expr2.value else {
831 unreachable!()
832 };
833 expr1.value = TokenValue::Number(value1 - value2);
834 parser.tokens_push(expr1);
835 }
836 ProdID::Expr5 => {
837 // Expr -> Expr * Expr
838 let expr2 = parser.tokens_pop();
839 parser.tokens_pop();
840 let mut expr1 = parser.tokens_pop();
841 let TokenValue::Number(value1) = expr1.value else {
842 unreachable!()
843 };
844 let TokenValue::Number(value2) = expr2.value else {
845 unreachable!()
846 };
847 expr1.value = TokenValue::Number(value1 * value2);
848 parser.tokens_push(expr1);
849 }
850 ProdID::Expr6 => {
851 // Expr -> Expr / Expr
852 let expr2 = parser.tokens_pop();
853 parser.tokens_pop();
854 let mut expr1 = parser.tokens_pop();
855 let TokenValue::Number(value1) = expr1.value else {
856 unreachable!()
857 };
858 let TokenValue::Number(value2) = expr2.value else {
859 unreachable!()
860 };
861 expr1.value = TokenValue::Number(value1 / value2);
862 parser.tokens_push(expr1);
863 }
864 ProdID::Expr7 => {
865 // Expr -> - Expr
866 let mut expr = parser.tokens_pop();
867 parser.tokens_pop();
868 let TokenValue::Number(value) = expr.value else {
869 unreachable!()
870 };
871 expr.value = TokenValue::Number(-value);
872 parser.tokens_push(expr);
873 }
874 ProdID::Expr8 => {
875 // Expr -> ( Expr )
876 parser.tokens_pop();
877 let expr = parser.tokens_pop();
878 parser.tokens_pop();
879 parser.tokens_push(expr);
880 }
881 }
882 Ok(())
883 }
884}
885
886/// The calculator parser, a wrapper that couples:
887/// - the calculator lexer ([`CalcLexer`]) producing [`CalcToken`]s, and
888/// - the calculator parser driver ([`CalcParserDriver`]) implementing reductions
889/// and ambiguity resolution for the calculator grammar.
890///
891/// `CalcParser<I>` exposes an iterator-like interface via
892/// [`TryNextWithContext`], yielding completed parse results (e.g., one per
893/// “sentence” or top-level expression) while using a shared [`SymTab`] as
894/// context. Internally it owns a generic [`Parser`] that pulls tokens
895/// from `CalcLexer` and executes semantic actions in `CalcParserDriver`.
896///
897/// # Input / Output
898///
899/// - **Input**: any byte stream `I` implementing
900/// [`TryNextWithContext<Item = u8, Context = SymTab>`].
901/// - **Output**: completed parsing units as [`CalcToken`] values (typically
902/// grammar-level results like expressions/statements).
903///
904/// # End Tokens and Multiple Sentences
905///
906/// The underlying lexer typically emits an explicit [`TokenID::End`] token at
907/// the end of a *parsing unit* (end of “sentence” or expression). The parser
908/// uses this to finalize and emit one result. If the input contains multiple
909/// independent sentences, you will receive multiple results — one per `End` —
910/// and `None` only after all input is consumed.
911///
912/// # Empty Statements
913///
914/// The calculator grammar also accepts an *empty* statement, which is returned
915/// as a token with [`TokenValue::None`].
916/// This occurs, for example, when the last statement in the input is terminated
917/// by a semicolon (`;`) but followed by no further expression. In that case:
918///
919/// 1. The parser first emits the token for the preceding completed statement.
920/// 2. It then emits an additional token representing the empty statement
921/// (`TokenValue::None`).
922/// 3. Finally, it returns `None`, indicating the end of the input stream.
923///
924/// This design allows the parser to fully reflect the structure of the input,
925/// including empty or separator-only statements.
926///
927/// # Errors
928///
929/// All failures are surfaced through a composed
930/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
931/// - `I::Error` — errors from the input source,
932/// - [`CalcError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
933/// symbol-table issues).
934///
935/// # Example
936///
937/// ```rust
938/// # use parlex_calc::{CalcToken, CalcParser, IterInput, SymTab, TokenID, TokenValue};
939/// # use try_next::TryNextWithContext;
940/// let mut symtab = SymTab::new();
941/// let input = IterInput::from("hello = 1;\n foo =\n 5 + 3 * 2;\n (world + hello + 10) * -2;\n\n1000 - - -123".bytes());
942/// let mut parser = CalcParser::try_new(input).unwrap();
943/// let vs = parser.try_collect_with_context(&mut symtab).unwrap();
944/// assert_eq!(vs.len(), 4);
945/// assert_eq!(symtab.len(), 3);
946/// ```
947
948/// # Example
949///
950/// ```rust
951/// # use parlex_calc::{CalcToken, CalcParser, IterInput, SymTab, TokenID, TokenValue};
952/// # use try_next::TryNextWithContext;
953/// let mut symtab = SymTab::new();
954/// let input = IterInput::from("hello = 1;\n 1 + 2;\n (world + hello + 10) * -2;\n\n1000 - - -123".bytes());
955/// let mut parser = CalcParser::try_new(input).unwrap();
956/// let vs = parser.try_collect_with_context(&mut symtab).unwrap();
957/// assert_eq!(vs.len(), 4);
958/// ```
959pub struct CalcParser<I>
960where
961 I: TryNextWithContext<Item = u8, Context = SymTab>,
962{
963 parser: Parser<CalcLexer<I>, CalcParserDriver<CalcLexer<I>>>,
964}
965
966impl<I> CalcParser<I>
967where
968 I: TryNextWithContext<Item = u8, Context = SymTab>,
969{
970 pub fn try_new(
971 input: I,
972 ) -> Result<
973 Self,
974 ParserError<
975 LexerError<
976 <I as TryNextWithContext>::Error,
977 <CalcLexerDriver<I> as LexerDriver>::Error,
978 >,
979 <CalcParserDriver<CalcLexer<I>> as ParserDriver>::Error,
980 CalcToken,
981 >,
982 > {
983 let lexer = CalcLexer::try_new(input).map_err(ParserError::Lexer)?;
984 let driver = CalcParserDriver {
985 _marker: PhantomData,
986 };
987 let parser = Parser::new(lexer, driver);
988 Ok(Self { parser })
989 }
990}
991impl<I> TryNextWithContext for CalcParser<I>
992where
993 I: TryNextWithContext<Item = u8, Context = SymTab>,
994{
995 type Item = CalcToken;
996 type Error = ParserError<
997 LexerError<<I as TryNextWithContext>::Error, <CalcLexerDriver<I> as LexerDriver>::Error>,
998 <CalcParserDriver<CalcLexer<I>> as ParserDriver>::Error,
999 CalcToken,
1000 >;
1001 type Context = I::Context;
1002
1003 fn try_next_with_context(
1004 &mut self,
1005 context: &mut I::Context,
1006 ) -> Result<Option<CalcToken>, <Self as TryNextWithContext>::Error> {
1007 self.parser.try_next_with_context(context)
1008 }
1009}
1010
1011#[cfg(test)]
1012mod tests {
1013 use crate::{CalcLexer, CalcParser, CalcToken, IterInput, SymTab, TokenID, TokenValue};
1014 use try_next::TryNextWithContext;
1015
1016 #[test]
1017 fn calc_lexer_1() {
1018 let _ = env_logger::builder().is_test(true).try_init();
1019 let mut symtab = SymTab::new();
1020 let input = IterInput::from("hello\n +\n world\n\n123".bytes());
1021 let mut lexer = CalcLexer::try_new(input).unwrap();
1022 assert!(matches!(
1023 lexer.try_next_with_context(&mut symtab).unwrap(),
1024 Some(CalcToken {
1025 token_id: TokenID::Ident,
1026 line_no: 1,
1027 value: TokenValue::Ident(0)
1028 }),
1029 ));
1030 assert!(matches!(
1031 lexer.try_next_with_context(&mut symtab).unwrap(),
1032 Some(CalcToken {
1033 token_id: TokenID::Plus,
1034 line_no: 2,
1035 value: TokenValue::None
1036 }),
1037 ));
1038 assert!(matches!(
1039 lexer.try_next_with_context(&mut symtab).unwrap(),
1040 Some(CalcToken {
1041 token_id: TokenID::Ident,
1042 line_no: 3,
1043 value: TokenValue::Ident(1)
1044 }),
1045 ));
1046 assert!(matches!(
1047 lexer.try_next_with_context(&mut symtab).unwrap(),
1048 Some(CalcToken {
1049 token_id: TokenID::Number,
1050 line_no: 5,
1051 value: TokenValue::Number(123)
1052 }),
1053 ));
1054 assert!(matches!(
1055 lexer.try_next_with_context(&mut symtab).unwrap(),
1056 Some(CalcToken {
1057 token_id: TokenID::End,
1058 line_no: 5,
1059 value: TokenValue::None
1060 }),
1061 ));
1062 assert!(matches!(
1063 lexer.try_next_with_context(&mut symtab).unwrap(),
1064 None,
1065 ));
1066 }
1067
1068 #[test]
1069 fn calc_parser_1() {
1070 let _ = env_logger::builder().is_test(true).try_init();
1071 let mut symtab = SymTab::new();
1072 let input = IterInput::from(
1073 "hello = 1;\n 1 + 2;\n (world + hello + 10) * -2;\n\n1000 - - -123;".bytes(),
1074 );
1075 let mut parser = CalcParser::try_new(input).unwrap();
1076 assert!(matches!(
1077 parser.try_next_with_context(&mut symtab).unwrap(),
1078 Some(CalcToken {
1079 token_id: TokenID::Stat,
1080 line_no: 1,
1081 value: TokenValue::Number(1)
1082 }),
1083 ));
1084 assert!(matches!(
1085 parser.try_next_with_context(&mut symtab).unwrap(),
1086 Some(CalcToken {
1087 token_id: TokenID::Stat,
1088 line_no: 2,
1089 value: TokenValue::Number(3)
1090 }),
1091 ));
1092 assert!(matches!(
1093 parser.try_next_with_context(&mut symtab).unwrap(),
1094 Some(CalcToken {
1095 token_id: TokenID::Stat,
1096 line_no: 3,
1097 value: TokenValue::Number(-22)
1098 }),
1099 ));
1100 assert!(matches!(
1101 parser.try_next_with_context(&mut symtab).unwrap(),
1102 Some(CalcToken {
1103 token_id: TokenID::Stat,
1104 line_no: 5,
1105 value: TokenValue::Number(877)
1106 }),
1107 ));
1108 assert!(matches!(
1109 parser.try_next_with_context(&mut symtab).unwrap(),
1110 Some(CalcToken {
1111 token_id: TokenID::Stat,
1112 line_no: 5,
1113 value: TokenValue::None
1114 }),
1115 ));
1116 assert!(matches!(
1117 parser.try_next_with_context(&mut symtab).unwrap(),
1118 None,
1119 ));
1120 assert!(matches!(
1121 parser.try_next_with_context(&mut symtab).unwrap(),
1122 None,
1123 ));
1124 }
1125}