parlex_calc/
calc.rs

1/// Includes the generated lexer definition produced by **`parlex-gen`**’s
2/// [`alex`](https://crates.io/crates/parlex-gen) tool.
3///             
4/// The included file (`lexer_data.rs`) contains the DFA tables, mode definitions,
5/// and rule implementations required for the [`TermLexer`]. It is generated at
6/// build time by the project’s `build.rs` script.
7pub mod lexer_data {
8    include!(concat!(env!("OUT_DIR"), "/lexer_data.rs"));
9}
10
11/// Includes the generated SLR parser tables and definitions.
12///             
13/// This file (`parser_data.rs`) is produced by the **parlex-gen** [`aslr`] tool
14/// during the build process. It defines the parsing automaton, rule metadata,
15/// and associated enum types used by the [`TermParser`].
16pub mod parser_data {
17    include!(concat!(env!("OUT_DIR"), "/parser_data.rs"));
18}
19
20use crate::{SymTab, SymTabError};
21use lexer_data::{LexData, Mode, Rule};
22use parlex::{
23    Lexer, LexerData, LexerDriver, LexerError, Parser, ParserAction, ParserData, ParserDriver,
24    ParserError, Token,
25};
26use parser_data::{AmbigID, ParData, ProdID, StateID, TokenID};
27use std::convert::Infallible;
28use std::iter::Fuse;
29use std::marker::PhantomData;
30use thiserror::Error;
31use try_next::TryNextWithContext;
32
33/// Represents all possible errors that can occur within the calculator.
34///
35/// The [`CalcError`] enum aggregates various error sources encountered
36/// during lexical analysis, parsing, and symbol-table operations.
37/// It implements [`std::error::Error`] via [`thiserror::Error`], allowing
38/// ergonomic error propagation with the `?` operator.
39///
40/// Each variant wraps a more specific underlying error type.
41///
42/// # Variants
43///
44/// - [`CalcError::ParseInt`]:  
45///   Returned when a numeric literal cannot be parsed into an integer,
46///   typically originating from [`std::num::ParseIntError`].
47///
48/// - [`CalcError::FromUtf8`]:  
49///   Returned when the input contains invalid UTF-8 byte sequences and
50///   cannot be decoded into a [`String`].
51///
52/// - [`CalcError::SymTab`]:  
53///   Wraps an error from the symbol table subsystem ([`SymTabError`]).
54///
55/// # Example
56/// ```rust
57/// # use parlex_calc::{CalcError, SymTabError};
58/// # use std::str::FromStr;
59/// // Example of a parse error bubbling up as CalcError::ParseInt
60/// let result: Result<i64, CalcError> = i64::from_str("notanumber").map_err(CalcError::from);
61/// assert!(matches!(result.unwrap_err(), CalcError::ParseInt(_)));
62///
63/// // Example of a symbol-table error propagation
64/// let sym_err = SymTabError::InvalidIndex { index: 10, len: 3 };
65/// let err = CalcError::from(sym_err);
66/// assert!(matches!(err, CalcError::SymTab(_)));
67/// ```
68#[derive(Debug, Error)]
69pub enum CalcError {
70    /// An integer literal could not be parsed from its string representation.
71    ///
72    /// Typically originates from [`std::num::ParseIntError`].
73    #[error("unable to parse {0:?}")]
74    ParseInt(#[from] std::num::ParseIntError),
75
76    /// Failed to decode UTF-8 bytes from input.
77    ///
78    /// Wraps a [`std::string::FromUtf8Error`].
79    #[error("utf8 error {0:?}")]
80    FromUtf8(#[from] std::string::FromUtf8Error),
81
82    /// A symbol-table operation failed.
83    ///
84    /// Wraps a [`SymTabError`] produced by symbol-table lookups or updates.
85    #[error("symtab error {0:?}")]
86    SymTab(#[from] SymTabError),
87}
88
89/// An input adapter that wraps any iterator and provides a `TryNextWithContext`
90/// interface, automatically fusing the iterator so it never yields items
91/// after returning `None` once.
92///
93/// # Type Parameters
94///
95/// - `I`: The underlying iterator type. It can be any `Iterator`.
96/// - `C`: The *context* type, which is passed by mutable reference to each
97///   `try_next_with_context` call.
98pub struct IterInput<I, C>
99where
100    I: Iterator,
101{
102    /// The underlying fused iterator.
103    iter: Fuse<I>,
104
105    /// Marker to make the type invariant in `C` and tie its lifetime logically
106    /// to the context without owning it.
107    _marker: PhantomData<fn(C)>,
108}
109
110impl<I, C> IterInput<I, C>
111where
112    I: Iterator,
113{
114    /// Creates a new `IterInput` from any iterator.
115    ///
116    /// The iterator is automatically fused internally, so that once it returns
117    /// `None`, all further `next()` calls will also return `None`.
118    pub fn from(iter: I) -> Self {
119        Self {
120            iter: iter.fuse(),
121            _marker: PhantomData,
122        }
123    }
124}
125
126impl<I, C> TryNextWithContext for IterInput<I, C>
127where
128    I: Iterator,
129{
130    type Item = I::Item;
131    type Error = Infallible;
132    type Context = C;
133
134    #[inline]
135    fn try_next_with_context(
136        &mut self,
137        _context: &mut Self::Context,
138    ) -> Result<Option<Self::Item>, Self::Error> {
139        Ok(self.iter.next())
140    }
141}
142
143/// Represents the value carried by a lexical token.
144///
145/// Each token in the lexer may carry optional data depending on its kind.
146/// For example, identifiers and numbers store extra information such as
147/// an index into the symbol table or a literal numeric value.
148///
149/// This type is used inside a [`CalcToken`] struct alongside a
150/// [`TokenID`] indicating what category the token belongs to.
151///
152/// # Variants
153///
154/// - [`TokenValue::None`]:  
155///   Used for tokens that do not carry any extra data (e.g., punctuation, operators).
156///
157/// - [`TokenValue::Ident(usize)`]:  
158///   Stores the **symbol table index** of an identifier.  
159///   The `usize` refers to an entry in a [`SymTab`].
160///
161/// - [`TokenValue::Number(i64)`]:  
162///   Represents an integer literal value.
163///
164/// # Example
165/// ```rust
166/// # use parlex_calc::TokenValue;
167///
168/// let ident = TokenValue::Ident(0);
169/// let number = TokenValue::Number(42);
170/// let punct = TokenValue::None;
171///
172/// match number {
173///     TokenValue::Number(n) => assert_eq!(n, 42),
174///     _ => unreachable!(),
175/// }
176/// ```
177#[derive(Debug, Clone)]
178pub enum TokenValue {
179    /// No associated data (for symbols or keywords).
180    None,
181
182    /// Identifier token with an index into the symbol table.
183    Ident(usize),
184
185    /// Integer literal token.
186    Number(i64),
187}
188
189/// A concrete implementation of a lexical token used by the calculator.
190///
191/// The [`CalcToken`] type represents a single lexical unit (identifier,
192/// numeric literal, or operator) recognized by the calculator’s lexer.
193/// It implements the generic [`Token`] trait, providing access to its
194/// token identifier and source position (line number).
195///
196/// This structure ties together:
197/// - The token’s identifier (via [`TokenID`]),
198/// - The token’s **associated data** (via [`TokenValue`]),
199/// - The **line number** where it occurs in the input stream.
200///
201/// # Trait Implementation
202///
203/// Implements the [`Token`] trait, providing:
204/// - [`token_id()`](#method.token_id): returns the token’s [`TokenID`].
205/// - [`line_no()`](#method.line_no): returns the source line number.
206///
207/// # Fields
208///
209/// - [`token_id`](#structfield.token_id):  
210///   The category of token (identifier, number, operator, etc.).
211///
212/// - [`value`](#structfield.value):  
213///   The token’s associated value — for instance, a symbol-table index
214///   or literal number — stored as a [`TokenValue`].
215///
216/// - [`line_no`](#structfield.line_no):  
217///   The 1-based line number where the token appears in the source.
218///
219/// # Example
220/// ```rust
221/// # use parlex_calc::{CalcToken, TokenID, TokenValue};
222/// # use parlex::Token;
223/// let token = CalcToken {
224///     token_id: TokenID::Number,
225///     value: TokenValue::Number(99),
226///     line_no: 3,
227/// };
228///
229/// assert_eq!(token.token_id(), TokenID::Number);
230/// assert_eq!(token.line_no(), 3);
231/// ```
232#[derive(Debug, Clone)]
233pub struct CalcToken {
234    /// The token’s kind or category (e.g. identifier, operator, number).
235    pub token_id: TokenID,
236    /// The associated value for the token, if applicable.
237    pub value: TokenValue,
238    /// The line number in the input source where the token occurs.
239    pub line_no: usize,
240}
241
242impl Token for CalcToken {
243    /// The associated identifier type used to classify this token.
244    type TokenID = TokenID;
245
246    /// Returns the token’s kind identifier.
247    fn token_id(&self) -> Self::TokenID {
248        self.token_id
249    }
250
251    /// Returns the line number where the token appears.
252    fn line_no(&self) -> usize {
253        self.line_no
254    }
255}
256
257/// A stateful driver for the calculator lexer.
258///
259/// `CalcLexerDriver` orchestrates rule actions emitted by [`Lexer`], keeping
260/// minimal state needed during lexing (e.g., nested comment depth).
261///
262/// The driver is generic over an input source `I` that yields bytes (`u8`)
263/// and supports contextual access to a symbol table via
264/// [`TryNextWithContext<Item = u8, Context = SymTab>`].
265///
266/// # State
267///
268/// - [`comment_level`](#structfield.comment_level):  
269///   Tracks the current nesting level of block comments. Increment on
270///   comment open (e.g. `/*`) and decrement on comment close (e.g. `*/`).
271///   Implementations typically skip emitting tokens while `comment_level > 0`.
272///
273/// - [`_marker`](#structfield._marker):  
274///   A `PhantomData<I>` marker to bind the generic `I` without storing a value.
275///
276/// # Associated Types (via `LexerDriver`)
277///
278/// - `LexerData = LexData` — Tokenization metadata and rule IDs produced by your
279///   lexer generator.
280/// - `Token = CalcToken` — The concrete token type emitted by the lexer.
281/// - `Lexer = Lexer<I, Self>` — The concrete lexer over input `I` driven by this type.
282/// - `Error = CalcError` — Unified error type used during lexing.
283/// - `Context = I::Context` — External context available to actions (here: `SymTab`).
284///
285/// # Action Handling
286///
287/// The [`action`](Self::action) method is invoked whenever the underlying DFA
288/// recognizes a rule.
289///
290/// # Errors
291/// This implementation return:
292/// - `CalcError::ParseInt` when a numeric literal can’t be parsed,
293/// - `CalcError::FromUtf8` for invalid UTF-8 in identifiers/strings,
294/// - `CalcError::SymTab` for symbol table failures (e.g., invalid index).
295pub struct CalcLexerDriver<I> {
296    /// Current nesting depth of block comments.
297    ///
298    /// - Increment on comment open (e.g., `/*`).
299    /// - Decrement on comment close (e.g., `*/`).
300    /// - Ensure it never goes negative; reaching EOF with a positive value
301    ///   should typically raise a lexical error.
302    comment_level: i32,
303
304    /// Marker to bind the driver to the input type `I` without storing it.
305    _marker: PhantomData<I>,
306}
307
308impl<I> LexerDriver for CalcLexerDriver<I>
309where
310    I: TryNextWithContext<Item = u8, Context = SymTab>,
311{
312    /// Rule identifiers and metadata produced by the lexer.
313    type LexerData = LexData;
314
315    /// Concrete token type emitted by the driver.
316    type Token = CalcToken;
317
318    /// Concrete lexer type parameterized by input and driver.
319    type Lexer = Lexer<I, Self>;
320
321    /// Unified error type returned by actions.
322    type Error = CalcError;
323
324    /// Externally supplied context available to actions (symbol table).
325    type Context = I::Context;
326
327    /// Handles a single lexer rule match.
328    ///
329    /// Called by the lexer when a DFA rule in [`Lexer`] fires. The implementation
330    /// typically inspects `rule`, reads the matched span from `lexer`, and either:
331    ///
332    /// - emits a [`CalcToken`] (e.g., identifiers, numbers, operators),
333    /// - updates internal state (e.g., `comment_level`),
334    /// - or returns an error if the match is invalid.
335    ///
336    /// Implementations may also use `context` (a [`SymTab`]) to intern identifiers
337    /// and store indices in [`TokenValue::Ident`].
338    ///
339    /// # Errors
340    /// Propagates any lexical, parsing, UTF-8 decoding, or symbol-table errors as
341    /// [`CalcError`].
342    fn action(
343        &mut self,
344        lexer: &mut Self::Lexer,
345        context: &mut Self::Context,
346        rule: <Self::LexerData as LexerData>::LexerRule,
347    ) -> Result<(), Self::Error> {
348        match rule {
349            Rule::Empty => {
350                unreachable!()
351            }
352            Rule::Ident => {
353                // <Expr> (?:[a-z_][a-z_A-Z0-9]*)
354                let index = context.intern(lexer.take_str()?);
355                lexer.yield_token(CalcToken {
356                    token_id: TokenID::Ident,
357                    line_no: lexer.line_no(),
358                    value: TokenValue::Ident(index),
359                });
360            }
361            Rule::Number => {
362                // <Expr> (?:[0-9]+)
363                let s = lexer.take_str()?;
364                lexer.yield_token(CalcToken {
365                    token_id: TokenID::Number,
366                    line_no: lexer.line_no(),
367                    value: TokenValue::Number(s.as_str().parse::<i64>()?),
368                });
369            }
370            Rule::Semicolon => {
371                // <Expr> ;
372                lexer.yield_token(CalcToken {
373                    token_id: TokenID::End,
374                    line_no: lexer.line_no(),
375                    value: TokenValue::None,
376                });
377            }
378            Rule::Equals => {
379                // <Expr> =
380                lexer.yield_token(CalcToken {
381                    token_id: TokenID::Equals,
382                    line_no: lexer.line_no(),
383                    value: TokenValue::None,
384                });
385            }
386            Rule::Plus => {
387                // <Expr> \+
388                lexer.yield_token(CalcToken {
389                    token_id: TokenID::Plus,
390                    line_no: lexer.line_no(),
391                    value: TokenValue::None,
392                });
393            }
394            Rule::Minus => {
395                // <Expr> -
396                lexer.yield_token(CalcToken {
397                    token_id: TokenID::Minus,
398                    line_no: lexer.line_no(),
399                    value: TokenValue::None,
400                });
401            }
402            Rule::Asterisk => {
403                // <Expr> \*
404                lexer.yield_token(CalcToken {
405                    token_id: TokenID::Asterisk,
406                    line_no: lexer.line_no(),
407                    value: TokenValue::None,
408                });
409            }
410            Rule::Slash => {
411                // <Expr> /
412                lexer.yield_token(CalcToken {
413                    token_id: TokenID::Slash,
414                    line_no: lexer.line_no(),
415                    value: TokenValue::None,
416                });
417            }
418            Rule::LeftParen => {
419                // <Expr> \(
420                lexer.yield_token(CalcToken {
421                    token_id: TokenID::LeftParen,
422                    line_no: lexer.line_no(),
423                    value: TokenValue::None,
424                });
425            }
426            Rule::RightParen => {
427                // <Expr> \)
428                lexer.yield_token(CalcToken {
429                    token_id: TokenID::RightParen,
430                    line_no: lexer.line_no(),
431                    value: TokenValue::None,
432                });
433            }
434            Rule::CommentBegin => {
435                // <Expr,Comment> /\*
436                lexer.begin(Mode::Comment);
437                self.comment_level += 1;
438            }
439            Rule::CommentEnd => {
440                // <Comment> \*/
441                self.comment_level -= 1;
442                if self.comment_level == 0 {
443                    lexer.begin(Mode::Expr);
444                }
445            }
446            Rule::CommentChar => { // <Comment> .+
447            }
448            Rule::NewLine => {
449                // <*> (?:\n)
450                lexer.inc_line_no();
451            }
452            Rule::WhiteSpace => { // <Expr> (?:[ \t])+
453            }
454            Rule::Error => {
455                // <*> .
456                lexer.yield_token(CalcToken {
457                    token_id: TokenID::Error,
458                    line_no: lexer.line_no(),
459                    value: TokenValue::None,
460                });
461            }
462            Rule::End => {
463                if lexer.mode() == Mode::Expr {
464                    lexer.yield_token(CalcToken {
465                        token_id: TokenID::End,
466                        line_no: lexer.line_no(),
467                        value: TokenValue::None,
468                    });
469                } else {
470                    lexer.yield_token(CalcToken {
471                        token_id: TokenID::Error,
472                        line_no: lexer.line_no(),
473                        value: TokenValue::None,
474                    });
475                }
476            }
477        }
478        Ok(())
479    }
480}
481
482/// The calculator lexer.
483///
484/// `CalcLexer<I>` adapts a byte-oriented input stream `I` (that supports
485/// contextual access to a [`SymTab`]) into an iterator-like interface that
486/// yields [`CalcToken`]s. Internally, it owns a lower-level [`Lexer`] driven by
487/// [`CalcLexerDriver`], which handles rule actions (e.g., interning identifiers,
488/// parsing numbers, skipping comments/whitespace).
489///
490/// The generic parameter `I` must implement
491/// [`TryNextWithContext<Item = u8, Context = SymTab>`], allowing the lexer to
492/// pull bytes and mutate/read the external symbol table while tokenizing.
493///
494/// # Output
495///
496/// Each successful step returns a [`CalcToken`], containing:
497/// - the token kind ([`TokenID`]),
498/// - optional associated value ([`TokenValue`], e.g., `Ident` index or `Number`),
499/// - and the source line (`line_no`) for diagnostics.
500///
501/// # Errors
502///
503/// Methods return a [`LexerError<I::Error, CalcError>`], where:
504/// - `I::Error` is any error produced by the underlying input,
505/// - [`CalcError`] covers lexical/parsing/UTF-8/symbol-table errors.
506///
507/// # Example
508///
509/// ```rust
510/// # use parlex_calc::{CalcToken, CalcLexer, IterInput, SymTab, TokenID, TokenValue};
511/// # use try_next::TryNextWithContext;
512/// let mut symtab = SymTab::new();
513/// let input = IterInput::from("hello\n +\n world\n\n123".bytes());
514/// let mut lexer = CalcLexer::try_new(input).unwrap();
515/// let vs = lexer.try_collect_with_context(&mut symtab).unwrap();
516/// assert_eq!(vs.len(), 5);
517/// assert_eq!(symtab.len(), 2);
518/// ```
519pub struct CalcLexer<I>
520where
521    I: TryNextWithContext<Item = u8, Context = SymTab>,
522{
523    /// The underlying DFA/engine that drives tokenization, parameterized by the
524    /// input `I` and the driver that executes rule actions.
525    lexer: Lexer<I, CalcLexerDriver<I>>,
526}
527
528impl<I> CalcLexer<I>
529where
530    I: TryNextWithContext<Item = u8, Context = SymTab>,
531{
532    /// Constructs a new calculator lexer over the provided input stream.
533    ///
534    /// This initializes an internal [`Lexer`] with a [`CalcLexerDriver`] that
535    /// performs rule actions such as:
536    /// - interning identifiers into the provided [`SymTab`] (via context),
537    /// - converting matched byte slices into numbers/idents,
538    /// - tracking line numbers and comment nesting.
539    ///
540    /// # Errors
541    ///
542    /// Returns a [`LexerError`] if the lexer cannot be constructed from the
543    /// given input (rare, but may occur if the input source fails during setup).
544    pub fn try_new(
545        input: I,
546    ) -> Result<
547        Self,
548        LexerError<<I as TryNextWithContext>::Error, <CalcLexerDriver<I> as LexerDriver>::Error>,
549    > {
550        let driver = CalcLexerDriver {
551            comment_level: 0,
552            _marker: PhantomData,
553        };
554        let lexer = Lexer::try_new(input, driver)?;
555        Ok(Self { lexer })
556    }
557}
558impl<I> TryNextWithContext for CalcLexer<I>
559where
560    I: TryNextWithContext<Item = u8, Context = SymTab>,
561{
562    /// Tokens produced by this lexer.
563    type Item = CalcToken;
564
565    /// Unified error type.
566    type Error =
567        LexerError<<I as TryNextWithContext>::Error, <CalcLexerDriver<I> as LexerDriver>::Error>;
568
569    /// External context available while lexing (a [`SymTab`]).
570    type Context = I::Context;
571
572    /// Advances the lexer and returns the next token, or `None` at end of input.
573    ///
574    /// The provided `context` (typically a [`SymTab`]) may be mutated by rule
575    /// actions (for example, to intern identifiers). This method is fallible;
576    /// both input and lexical errors are converted into [`Self::Error`].
577    ///
578    /// # End of Input
579    ///
580    /// When the lexer reaches the end of the input stream, it will typically
581    /// emit a final [`TokenID::End`] token before returning `None`.  
582    ///
583    /// This explicit *End* token is expected by the **Parlex parser** to
584    /// signal successful termination of a complete parsing unit.  
585    /// Consumers should treat this token as a logical *end-of-sentence* or
586    /// *end-of-expression* marker, depending on the grammar.
587    ///
588    /// If the input contains **multiple independent sentences or expressions**,
589    /// the lexer may emit multiple `End` tokens—one after each completed unit.
590    /// In such cases, the parser can restart or resume parsing after each `End`
591    /// to produce multiple parse results from a single input stream.
592    ///
593    /// Once all input has been consumed, the lexer returns `None`.
594    fn try_next_with_context(
595        &mut self,
596        context: &mut I::Context,
597    ) -> Result<Option<CalcToken>, <Self as TryNextWithContext>::Error> {
598        self.lexer.try_next_with_context(context)
599    }
600}
601
602/// A driver that defines semantic actions for the calculator parser.
603///
604/// The [`CalcParserDriver`] type implements [`ParserDriver`] and acts as the
605/// bridge between the parser engine ([`Parser`]) and calculator-specific
606/// semantic logic.
607///
608/// It provides the behavior for grammar reductions and ambiguity resolution
609/// during parsing. Each reduction corresponds to a grammar production rule
610/// in [`ParData`], and is responsible for building or evaluating partial
611/// results (e.g., computing arithmetic expressions, populating the symbol
612/// table), constructing AST, etc.
613///
614/// # Type Parameters
615///
616/// - `I`: The input source (the lexer) that yields [`CalcToken`]s and maintains a
617///   contextual [`SymTab`]. Must implement
618///   [`TryNextWithContext<Item = CalcToken, Context = SymTab>`].
619///
620/// # Associated Types
621///
622/// - `ParserData = ParData`:  
623///   Generated parser metadata containing grammar rules, production IDs,
624///   and ambiguity identifiers.
625/// - `Token = CalcToken`:  
626///   The token type produced by the lexer and consumed by this parser.
627/// - `Parser = Parser<I, Self>`:  
628///   The parser engine parameterized by this driver.
629/// - `Error = CalcError`:  
630///   Unified error type propagated during parsing.
631/// - `Context = I::Context`:  
632///   Externally supplied context, such as a [`SymTab`].
633///
634/// # Responsibilities
635///
636/// The parser driver performs calculator-specific actions:
637///
638/// - **`resolve_ambiguity`** — invoked when the grammar allows multiple valid
639///   interpretations of a token sequence. The driver chooses which parse path
640///   to follow by returning an appropriate [`ParserAction`].
641/// - **`reduce`** — executed when a grammar production completes. The driver
642///   can perform semantic actions such as arithmetic evaluation, updating the
643///   symbol table, or producing intermediate values.
644///
645/// # Example
646/// ```rust,ignore
647/// let mut driver = CalcParserDriver::<MyLexer>::default();
648/// let mut parser = Parser::<MyLexer, _>::new(lexer, driver);
649///
650/// let mut symtab = SymTab::new();
651/// while let Some(result) = parser.try_next_with_context(&mut symtab)? {
652///     println!("Parsed expression result: {result:?}");
653/// }
654/// ```
655///
656/// # Notes
657///
658/// - The driver may be stateless (`_marker` only), or store intermediate
659///   evaluation state if needed.
660/// - Ambiguities can be resolved dynamically based on the current parse state
661///   or the next lookahead token.
662/// - The `reduce` method corresponds to grammar rules such as:
663///   ```text
664///   Expr → Expr '+' Term
665///   Expr → Term
666///   Term → NUMBER
667///   ```
668///   allowing the driver to fold numerical operations or emit results or
669///   result  nodes.
670pub struct CalcParserDriver<I> {
671    /// Marker to associate the driver with its input type `I`.
672    _marker: PhantomData<I>,
673}
674
675impl<I> ParserDriver for CalcParserDriver<I>
676where
677    I: TryNextWithContext<Item = CalcToken, Context = SymTab>,
678{
679    /// Parser metadata generated from the calculator grammar.
680    type ParserData = ParData;
681
682    /// Token type consumed by the parser.
683    type Token = CalcToken;
684
685    /// Concrete parser engine type.
686    type Parser = Parser<I, Self>;
687
688    /// Error type for semantic or parsing failures.
689    type Error = CalcError;
690
691    /// Context (symbol table or shared state).
692    type Context = I::Context;
693
694    /// Resolves grammar ambiguities when multiple parse actions are valid.
695    ///
696    /// The driver can inspect the parser conflict (`ambig`) and the upcoming
697    /// token (`_tok2`) to decide which parse branch to follow. This method
698    /// returns the selected [`ParserAction`].
699    ///
700    /// By default, most calculator grammars are unambiguous, so this method
701    /// may simply return a default action or be left unimplemented.
702    ///
703    /// # Shift/Reduce Conflicts
704    ///
705    /// In practice, this hook is primarily used to resolve **Shift/Reduce**
706    /// conflicts — cases where the parser can either:
707    /// - **Reduce** using a completed production rule, or
708    /// - **Shift** the next incoming token (`tok2`).
709    ///
710    /// Other types of conflicts (such as **Reduce/Reduce**) are much more
711    /// difficult to handle programmatically and usually require modifying
712    /// the grammar itself to eliminate ambiguity.
713    ///
714    /// In a typical arithmetic grammar, you can use operator precedence and
715    /// associativity to decide whether to shift or reduce. For example:
716    ///
717    /// ```text
718    /// Expr -> Expr '+' Expr
719    /// ```
720    ///
721    /// When the incoming token is `*`, the driver can compare the precedence
722    /// of `'+'` (lower) vs. `'*'` (higher) and decide to **Shift**, allowing
723    /// the parser to defer reduction until the higher-precedence operation
724    /// (`*`) is parsed first.
725    ///
726    /// This strategy ensures that the resulting parse tree respects the
727    /// intended operator precedence and associativity rules.
728    fn resolve_ambiguity(
729        &mut self,
730        _parser: &mut Self::Parser,
731        _context: &mut Self::Context,
732        ambig: <Self::ParserData as ParserData>::AmbigID,
733        _tok2: &Self::Token,
734    ) -> Result<ParserAction<StateID, ProdID, AmbigID>, Self::Error> {
735        Ok(ParData::lookup_ambig(ambig)[1]) // Reduce
736    }
737
738    /// Performs semantic reduction for a completed grammar production.
739    ///
740    /// This is the main hook for calculator logic: each time the parser
741    /// recognizes a rule (identified by `prod_id`), the driver can evaluate
742    /// or construct the corresponding result, possibly updating the context.
743    ///
744    /// For example, when reducing:
745    /// ```text
746    /// Expr -> Expr '+' Expr
747    /// ```
748    /// the driver may pop the right-hand values from the parser stack, perform
749    /// addition, and push the result back.
750    fn reduce(
751        &mut self,
752        parser: &mut Self::Parser,
753        context: &mut Self::Context,
754        prod_id: <Self::ParserData as ParserData>::ProdID,
755        token: &Self::Token,
756    ) -> Result<(), Self::Error> {
757        match prod_id {
758            ProdID::Start => {
759                // Start -> Seq
760                // Accept - does not get reduced
761                unreachable!()
762            }
763            ProdID::Stat1 => {
764                // Stat ->
765                parser.tokens_push(CalcToken {
766                    token_id: TokenID::Stat,
767                    line_no: token.line_no(),
768                    value: TokenValue::None,
769                });
770            }
771            ProdID::Stat2 => {
772                // Stat -> Expr
773                let mut expr = parser.tokens_pop();
774                expr.token_id = TokenID::Stat;
775                parser.tokens_push(expr);
776            }
777            ProdID::Stat3 => {
778                // Stat -> ident = Expr
779                let mut expr = parser.tokens_pop();
780                let TokenValue::Number(value) = expr.value else {
781                    unreachable!()
782                };
783                parser.tokens_pop();
784                let ident = parser.tokens_pop();
785                let TokenValue::Ident(index) = ident.value else {
786                    unreachable!()
787                };
788                context.set(index, value)?;
789                expr.token_id = TokenID::Stat;
790                parser.tokens_push(expr);
791            }
792            ProdID::Expr1 => {
793                // Expr -> number
794                let mut number = parser.tokens_pop();
795                number.token_id = TokenID::Expr;
796                parser.tokens_push(number);
797            }
798            ProdID::Expr2 => {
799                // Expr -> ident
800                let mut tok = parser.tokens_pop();
801                tok.token_id = TokenID::Expr;
802                let TokenValue::Ident(index) = tok.value else {
803                    unreachable!()
804                };
805                tok.value = TokenValue::Number(context.get(index)?);
806                parser.tokens_push(tok);
807            }
808            ProdID::Expr3 => {
809                // Expr -> Expr + Expr
810                let expr2 = parser.tokens_pop();
811                parser.tokens_pop();
812                let mut expr1 = parser.tokens_pop();
813                let TokenValue::Number(value1) = expr1.value else {
814                    unreachable!()
815                };
816                let TokenValue::Number(value2) = expr2.value else {
817                    unreachable!()
818                };
819                expr1.value = TokenValue::Number(value1 + value2);
820                parser.tokens_push(expr1);
821            }
822            ProdID::Expr4 => {
823                // Expr -> Expr - Expr
824                let expr2 = parser.tokens_pop();
825                parser.tokens_pop();
826                let mut expr1 = parser.tokens_pop();
827                let TokenValue::Number(value1) = expr1.value else {
828                    unreachable!()
829                };
830                let TokenValue::Number(value2) = expr2.value else {
831                    unreachable!()
832                };
833                expr1.value = TokenValue::Number(value1 - value2);
834                parser.tokens_push(expr1);
835            }
836            ProdID::Expr5 => {
837                // Expr -> Expr * Expr
838                let expr2 = parser.tokens_pop();
839                parser.tokens_pop();
840                let mut expr1 = parser.tokens_pop();
841                let TokenValue::Number(value1) = expr1.value else {
842                    unreachable!()
843                };
844                let TokenValue::Number(value2) = expr2.value else {
845                    unreachable!()
846                };
847                expr1.value = TokenValue::Number(value1 * value2);
848                parser.tokens_push(expr1);
849            }
850            ProdID::Expr6 => {
851                // Expr -> Expr / Expr
852                let expr2 = parser.tokens_pop();
853                parser.tokens_pop();
854                let mut expr1 = parser.tokens_pop();
855                let TokenValue::Number(value1) = expr1.value else {
856                    unreachable!()
857                };
858                let TokenValue::Number(value2) = expr2.value else {
859                    unreachable!()
860                };
861                expr1.value = TokenValue::Number(value1 / value2);
862                parser.tokens_push(expr1);
863            }
864            ProdID::Expr7 => {
865                // Expr -> - Expr
866                let mut expr = parser.tokens_pop();
867                parser.tokens_pop();
868                let TokenValue::Number(value) = expr.value else {
869                    unreachable!()
870                };
871                expr.value = TokenValue::Number(-value);
872                parser.tokens_push(expr);
873            }
874            ProdID::Expr8 => {
875                // Expr -> ( Expr )
876                parser.tokens_pop();
877                let expr = parser.tokens_pop();
878                parser.tokens_pop();
879                parser.tokens_push(expr);
880            }
881        }
882        Ok(())
883    }
884}
885
886/// The calculator parser, a wrapper that couples:
887/// - the calculator lexer ([`CalcLexer`]) producing [`CalcToken`]s, and
888/// - the calculator parser driver ([`CalcParserDriver`]) implementing reductions
889///   and ambiguity resolution for the calculator grammar.
890///
891/// `CalcParser<I>` exposes an iterator-like interface via
892/// [`TryNextWithContext`], yielding completed parse results (e.g., one per
893/// “sentence” or top-level expression) while using a shared [`SymTab`] as
894/// context. Internally it owns a generic [`Parser`] that pulls tokens
895/// from `CalcLexer` and executes semantic actions in `CalcParserDriver`.
896///
897/// # Input / Output
898///
899/// - **Input**: any byte stream `I` implementing
900///   [`TryNextWithContext<Item = u8, Context = SymTab>`].
901/// - **Output**: completed parsing units as [`CalcToken`] values (typically
902///   grammar-level results like expressions/statements).
903///
904/// # End Tokens and Multiple Sentences
905///
906/// The underlying lexer typically emits an explicit [`TokenID::End`] token at
907/// the end of a *parsing unit* (end of “sentence” or expression). The parser
908/// uses this to finalize and emit one result. If the input contains multiple
909/// independent sentences, you will receive multiple results — one per `End` —
910/// and `None` only after all input is consumed.
911///
912/// # Empty Statements
913///
914/// The calculator grammar also accepts an *empty* statement, which is returned
915/// as a token with [`TokenValue::None`].  
916/// This occurs, for example, when the last statement in the input is terminated
917/// by a semicolon (`;`) but followed by no further expression. In that case:
918///
919/// 1. The parser first emits the token for the preceding completed statement.
920/// 2. It then emits an additional token representing the empty statement
921///    (`TokenValue::None`).
922/// 3. Finally, it returns `None`, indicating the end of the input stream.
923///
924/// This design allows the parser to fully reflect the structure of the input,
925/// including empty or separator-only statements.
926///
927/// # Errors
928///
929/// All failures are surfaced through a composed
930/// [`ParserError<LexerError<I::Error, CalcError>, CalcError, CalcToken>`]:
931/// - `I::Error` — errors from the input source,
932/// - [`CalcError`] — lexical/semantic errors (e.g., UTF-8, integer parsing,
933///   symbol-table issues).
934///
935/// # Example
936///
937/// ```rust
938/// # use parlex_calc::{CalcToken, CalcParser, IterInput, SymTab, TokenID, TokenValue};
939/// # use try_next::TryNextWithContext;
940/// let mut symtab = SymTab::new();
941/// let input = IterInput::from("hello = 1;\n foo =\n 5 + 3 * 2;\n (world + hello + 10) * -2;\n\n1000 - - -123".bytes());
942/// let mut parser = CalcParser::try_new(input).unwrap();
943/// let vs = parser.try_collect_with_context(&mut symtab).unwrap();
944/// assert_eq!(vs.len(), 4);
945/// assert_eq!(symtab.len(), 3);
946/// ```
947
948/// # Example
949///     
950/// ```rust
951/// # use parlex_calc::{CalcToken, CalcParser, IterInput, SymTab, TokenID, TokenValue};
952/// # use try_next::TryNextWithContext;
953/// let mut symtab = SymTab::new();
954/// let input = IterInput::from("hello = 1;\n 1 + 2;\n (world + hello + 10) * -2;\n\n1000 - - -123".bytes());
955/// let mut parser = CalcParser::try_new(input).unwrap();
956/// let vs = parser.try_collect_with_context(&mut symtab).unwrap();
957/// assert_eq!(vs.len(), 4);
958/// ```
959pub struct CalcParser<I>
960where
961    I: TryNextWithContext<Item = u8, Context = SymTab>,
962{
963    parser: Parser<CalcLexer<I>, CalcParserDriver<CalcLexer<I>>>,
964}
965
966impl<I> CalcParser<I>
967where
968    I: TryNextWithContext<Item = u8, Context = SymTab>,
969{
970    pub fn try_new(
971        input: I,
972    ) -> Result<
973        Self,
974        ParserError<
975            LexerError<
976                <I as TryNextWithContext>::Error,
977                <CalcLexerDriver<I> as LexerDriver>::Error,
978            >,
979            <CalcParserDriver<CalcLexer<I>> as ParserDriver>::Error,
980            CalcToken,
981        >,
982    > {
983        let lexer = CalcLexer::try_new(input).map_err(ParserError::Lexer)?;
984        let driver = CalcParserDriver {
985            _marker: PhantomData,
986        };
987        let parser = Parser::new(lexer, driver);
988        Ok(Self { parser })
989    }
990}
991impl<I> TryNextWithContext for CalcParser<I>
992where
993    I: TryNextWithContext<Item = u8, Context = SymTab>,
994{
995    type Item = CalcToken;
996    type Error = ParserError<
997        LexerError<<I as TryNextWithContext>::Error, <CalcLexerDriver<I> as LexerDriver>::Error>,
998        <CalcParserDriver<CalcLexer<I>> as ParserDriver>::Error,
999        CalcToken,
1000    >;
1001    type Context = I::Context;
1002
1003    fn try_next_with_context(
1004        &mut self,
1005        context: &mut I::Context,
1006    ) -> Result<Option<CalcToken>, <Self as TryNextWithContext>::Error> {
1007        self.parser.try_next_with_context(context)
1008    }
1009}
1010
1011#[cfg(test)]
1012mod tests {
1013    use crate::{CalcLexer, CalcParser, CalcToken, IterInput, SymTab, TokenID, TokenValue};
1014    use try_next::TryNextWithContext;
1015
1016    #[test]
1017    fn calc_lexer_1() {
1018        let _ = env_logger::builder().is_test(true).try_init();
1019        let mut symtab = SymTab::new();
1020        let input = IterInput::from("hello\n +\n world\n\n123".bytes());
1021        let mut lexer = CalcLexer::try_new(input).unwrap();
1022        assert!(matches!(
1023            lexer.try_next_with_context(&mut symtab).unwrap(),
1024            Some(CalcToken {
1025                token_id: TokenID::Ident,
1026                line_no: 1,
1027                value: TokenValue::Ident(0)
1028            }),
1029        ));
1030        assert!(matches!(
1031            lexer.try_next_with_context(&mut symtab).unwrap(),
1032            Some(CalcToken {
1033                token_id: TokenID::Plus,
1034                line_no: 2,
1035                value: TokenValue::None
1036            }),
1037        ));
1038        assert!(matches!(
1039            lexer.try_next_with_context(&mut symtab).unwrap(),
1040            Some(CalcToken {
1041                token_id: TokenID::Ident,
1042                line_no: 3,
1043                value: TokenValue::Ident(1)
1044            }),
1045        ));
1046        assert!(matches!(
1047            lexer.try_next_with_context(&mut symtab).unwrap(),
1048            Some(CalcToken {
1049                token_id: TokenID::Number,
1050                line_no: 5,
1051                value: TokenValue::Number(123)
1052            }),
1053        ));
1054        assert!(matches!(
1055            lexer.try_next_with_context(&mut symtab).unwrap(),
1056            Some(CalcToken {
1057                token_id: TokenID::End,
1058                line_no: 5,
1059                value: TokenValue::None
1060            }),
1061        ));
1062        assert!(matches!(
1063            lexer.try_next_with_context(&mut symtab).unwrap(),
1064            None,
1065        ));
1066    }
1067
1068    #[test]
1069    fn calc_parser_1() {
1070        let _ = env_logger::builder().is_test(true).try_init();
1071        let mut symtab = SymTab::new();
1072        let input = IterInput::from(
1073            "hello = 1;\n 1 + 2;\n (world + hello + 10) * -2;\n\n1000 - - -123;".bytes(),
1074        );
1075        let mut parser = CalcParser::try_new(input).unwrap();
1076        assert!(matches!(
1077            parser.try_next_with_context(&mut symtab).unwrap(),
1078            Some(CalcToken {
1079                token_id: TokenID::Stat,
1080                line_no: 1,
1081                value: TokenValue::Number(1)
1082            }),
1083        ));
1084        assert!(matches!(
1085            parser.try_next_with_context(&mut symtab).unwrap(),
1086            Some(CalcToken {
1087                token_id: TokenID::Stat,
1088                line_no: 2,
1089                value: TokenValue::Number(3)
1090            }),
1091        ));
1092        assert!(matches!(
1093            parser.try_next_with_context(&mut symtab).unwrap(),
1094            Some(CalcToken {
1095                token_id: TokenID::Stat,
1096                line_no: 3,
1097                value: TokenValue::Number(-22)
1098            }),
1099        ));
1100        assert!(matches!(
1101            parser.try_next_with_context(&mut symtab).unwrap(),
1102            Some(CalcToken {
1103                token_id: TokenID::Stat,
1104                line_no: 5,
1105                value: TokenValue::Number(877)
1106            }),
1107        ));
1108        assert!(matches!(
1109            parser.try_next_with_context(&mut symtab).unwrap(),
1110            Some(CalcToken {
1111                token_id: TokenID::Stat,
1112                line_no: 5,
1113                value: TokenValue::None
1114            }),
1115        ));
1116        assert!(matches!(
1117            parser.try_next_with_context(&mut symtab).unwrap(),
1118            None,
1119        ));
1120        assert!(matches!(
1121            parser.try_next_with_context(&mut symtab).unwrap(),
1122            None,
1123        ));
1124    }
1125}
parlex_calc/calc.rs

parlex_calc/
calc.rs