Skip to main content

oxilean_parse/token/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use super::functions::*;
6use crate::tokens::{Span, Token, TokenKind};
7
8/// Rich metadata attached to a token.
9#[derive(Clone, Debug, PartialEq)]
10#[allow(missing_docs)]
11pub struct TokenMeta {
12    /// The underlying token.
13    pub token: Token,
14    /// High-level category.
15    pub category: TokenCategory,
16    /// Raw source text for this token.
17    #[allow(missing_docs)]
18    pub text: String,
19    /// `true` if any whitespace preceded this token on the same line.
20    pub preceded_by_space: bool,
21    /// `true` if a newline preceded this token.
22    pub preceded_by_newline: bool,
23}
24impl TokenMeta {
25    /// Construct a new `TokenMeta`.
26    #[allow(missing_docs)]
27    pub fn new(
28        token: Token,
29        category: TokenCategory,
30        text: impl Into<String>,
31        preceded_by_space: bool,
32        preceded_by_newline: bool,
33    ) -> Self {
34        Self {
35            token,
36            category,
37            text: text.into(),
38            preceded_by_space,
39            preceded_by_newline,
40        }
41    }
42    /// Convenience: construct from a `Token` and its source slice.
43    #[allow(missing_docs)]
44    pub fn from_token(token: Token, source: &str) -> Self {
45        let span = &token.span;
46        let text = source.get(span.start..span.end).unwrap_or("").to_string();
47        let category = categorise(&token.kind);
48        Self {
49            token,
50            category,
51            text,
52            preceded_by_space: false,
53            preceded_by_newline: false,
54        }
55    }
56    /// Return the span of this token.
57    #[allow(missing_docs)]
58    pub fn span(&self) -> &Span {
59        &self.token.span
60    }
61    /// Return the `TokenKind`.
62    #[allow(missing_docs)]
63    pub fn kind(&self) -> &TokenKind {
64        &self.token.kind
65    }
66    /// `true` if this token is an identifier.
67    #[allow(missing_docs)]
68    pub fn is_ident(&self) -> bool {
69        self.token.is_ident()
70    }
71    /// `true` if this token is a keyword.
72    #[allow(missing_docs)]
73    pub fn is_keyword(&self) -> bool {
74        self.category == TokenCategory::Keyword
75    }
76    /// `true` if this token is a literal.
77    #[allow(missing_docs)]
78    pub fn is_literal(&self) -> bool {
79        self.category == TokenCategory::Literal
80    }
81}
82impl TokenMeta {
83    /// Annotate with preceding newline based on source position.
84    #[allow(dead_code)]
85    #[allow(missing_docs)]
86    pub fn set_preceded_by_newline(&mut self, v: bool) {
87        self.preceded_by_newline = v;
88    }
89    /// Annotate with preceding space.
90    #[allow(dead_code)]
91    #[allow(missing_docs)]
92    pub fn set_preceded_by_space(&mut self, v: bool) {
93        self.preceded_by_space = v;
94    }
95    /// Length of the token's source span.
96    #[allow(dead_code)]
97    #[allow(missing_docs)]
98    pub fn len(&self) -> usize {
99        self.token.span.end.saturating_sub(self.token.span.start)
100    }
101    /// Whether this token's source span is empty.
102    #[allow(dead_code)]
103    #[allow(missing_docs)]
104    pub fn is_empty(&self) -> bool {
105        self.len() == 0
106    }
107    /// Whether this token is a numeric literal.
108    #[allow(dead_code)]
109    #[allow(missing_docs)]
110    pub fn is_numeric(&self) -> bool {
111        matches!(self.token.kind, TokenKind::Nat(_) | TokenKind::Float(_))
112    }
113    /// Whether this token is a string literal.
114    #[allow(dead_code)]
115    #[allow(missing_docs)]
116    pub fn is_string(&self) -> bool {
117        matches!(self.token.kind, TokenKind::String(_))
118    }
119    /// Whether this token is an operator.
120    #[allow(dead_code)]
121    #[allow(missing_docs)]
122    pub fn is_operator(&self) -> bool {
123        self.category == TokenCategory::Operator
124    }
125}
126/// High-level category of a token used for error messages and syntax
127/// highlighting.
128#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
129#[allow(missing_docs)]
130pub enum TokenCategory {
131    /// A keyword (`theorem`, `def`, `fun`, …).
132    Keyword,
133    /// An identifier (`foo`, `α`, `myVar`).
134    Identifier,
135    /// A numeric or string literal.
136    Literal,
137    /// A punctuation symbol (`(`, `)`, `,`, …).
138    Punctuation,
139    /// An operator (`+`, `->`, `≤`, …).
140    Operator,
141    /// A comment.
142    Comment,
143    /// End-of-file sentinel.
144    Eof,
145    /// Anything else.
146    Other,
147}
148impl TokenCategory {
149    /// Human-readable name used in error messages.
150    #[allow(missing_docs)]
151    pub fn name(&self) -> &'static str {
152        match self {
153            TokenCategory::Keyword => "keyword",
154            TokenCategory::Identifier => "identifier",
155            TokenCategory::Literal => "literal",
156            TokenCategory::Punctuation => "punctuation",
157            TokenCategory::Operator => "operator",
158            TokenCategory::Comment => "comment",
159            TokenCategory::Eof => "end-of-file",
160            TokenCategory::Other => "token",
161        }
162    }
163    /// Return `true` if tokens of this category may appear as expression
164    /// starters.
165    #[allow(missing_docs)]
166    pub fn can_start_expr(&self) -> bool {
167        matches!(
168            self,
169            TokenCategory::Identifier | TokenCategory::Literal | TokenCategory::Punctuation
170        )
171    }
172}
173impl TokenCategory {
174    /// Whether this category is meaningful for formatting (not Eof/Other).
175    #[allow(dead_code)]
176    #[allow(missing_docs)]
177    pub fn is_meaningful(&self) -> bool {
178        !matches!(self, TokenCategory::Eof | TokenCategory::Other)
179    }
180    /// Return an ANSI color code for this category.
181    #[allow(dead_code)]
182    #[allow(missing_docs)]
183    pub fn ansi_color(&self) -> &'static str {
184        match self {
185            TokenCategory::Keyword => ansi::BOLD_BLUE,
186            TokenCategory::Identifier => ansi::RESET,
187            TokenCategory::Literal => ansi::BOLD_GREEN,
188            TokenCategory::Operator => ansi::CYAN,
189            TokenCategory::Punctuation => ansi::YELLOW,
190            TokenCategory::Comment => ansi::GREEN,
191            TokenCategory::Eof => ansi::RESET,
192            TokenCategory::Other => ansi::RESET,
193        }
194    }
195    /// All categories in canonical order.
196    #[allow(dead_code)]
197    #[allow(missing_docs)]
198    pub fn all() -> &'static [TokenCategory] {
199        &[
200            TokenCategory::Keyword,
201            TokenCategory::Identifier,
202            TokenCategory::Literal,
203            TokenCategory::Punctuation,
204            TokenCategory::Operator,
205            TokenCategory::Comment,
206            TokenCategory::Eof,
207            TokenCategory::Other,
208        ]
209    }
210}
211/// A priority level for operator tokens.
212///
213/// Higher values bind more tightly (e.g., `*` before `+`).
214#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
215#[allow(missing_docs)]
216pub struct OperatorPriority(pub u32);
217impl OperatorPriority {
218    /// Create a priority.
219    pub fn new(p: u32) -> Self {
220        Self(p)
221    }
222    /// Lowest possible priority.
223    #[allow(missing_docs)]
224    pub const MIN: Self = Self(0);
225    /// Highest possible priority.
226    pub const MAX: Self = Self(u32::MAX);
227}
228/// A classified token enriched with operator metadata.
229#[derive(Clone, Debug)]
230#[allow(missing_docs)]
231pub struct RichToken {
232    /// Underlying token.
233    pub token: Token,
234    /// High-level category.
235    pub category: TokenCategory,
236    /// Operator arity (if applicable).
237    #[allow(missing_docs)]
238    pub arity: OperatorArity,
239    /// Operator priority (if applicable).
240    pub priority: OperatorPriority,
241}
242impl RichToken {
243    /// Create a `RichToken` from a plain `Token`.
244    #[allow(missing_docs)]
245    pub fn from_token(token: Token) -> Self {
246        let category = categorise(&token.kind);
247        let arity = operator_arity(&token.kind);
248        let priority = operator_priority(&token.kind);
249        Self {
250            token,
251            category,
252            arity,
253            priority,
254        }
255    }
256    /// Return `true` if this is an infix binary operator.
257    #[allow(missing_docs)]
258    pub fn is_infix(&self) -> bool {
259        self.arity == OperatorArity::Binary
260    }
261    /// Return `true` if this is a prefix unary operator.
262    #[allow(missing_docs)]
263    pub fn is_prefix(&self) -> bool {
264        self.arity == OperatorArity::Unary
265    }
266}
267/// Options for token stream reformatting.
268#[allow(dead_code)]
269#[allow(missing_docs)]
270#[derive(Clone, Debug)]
271pub struct ReformatOptions {
272    /// Insert a space before operators.
273    pub space_before_op: bool,
274    /// Insert a space after operators.
275    pub space_after_op: bool,
276    /// Insert a space after commas.
277    #[allow(missing_docs)]
278    pub space_after_comma: bool,
279    /// Remove spaces before closing brackets.
280    pub no_space_before_close: bool,
281}
282/// A peekable stream of tokens with span tracking.
283#[derive(Clone, Debug)]
284#[allow(missing_docs)]
285pub struct TokenStream {
286    tokens: Vec<Token>,
287    pos: usize,
288}
289impl TokenStream {
290    /// Create a stream from a token list.
291    #[allow(missing_docs)]
292    pub fn new(tokens: Vec<Token>) -> Self {
293        Self { tokens, pos: 0 }
294    }
295    /// Peek at the current token without consuming it.
296    #[allow(missing_docs)]
297    pub fn peek(&self) -> Option<&Token> {
298        self.tokens.get(self.pos)
299    }
300    /// Peek at the token `n` positions ahead without consuming.
301    #[allow(missing_docs)]
302    pub fn peek_ahead(&self, n: usize) -> Option<&Token> {
303        self.tokens.get(self.pos + n)
304    }
305    /// Consume and return the next token.
306    #[allow(clippy::should_implement_trait)]
307    #[allow(missing_docs)]
308    pub fn next(&mut self) -> Option<Token> {
309        if self.pos < self.tokens.len() {
310            let tok = self.tokens[self.pos].clone();
311            self.pos += 1;
312            Some(tok)
313        } else {
314            None
315        }
316    }
317    /// Consume the next token only if its kind matches `expected`.
318    #[allow(missing_docs)]
319    pub fn eat(&mut self, expected: &TokenKind) -> Option<Token> {
320        if self.peek().map(|t| &t.kind) == Some(expected) {
321            self.next()
322        } else {
323            None
324        }
325    }
326    /// Consume tokens while the predicate holds.
327    #[allow(missing_docs)]
328    pub fn eat_while<F>(&mut self, mut pred: F) -> Vec<Token>
329    where
330        F: FnMut(&Token) -> bool,
331    {
332        let mut consumed = Vec::new();
333        while let Some(tok) = self.peek() {
334            if pred(tok) {
335                consumed.push(self.next().expect("peek confirmed token exists"));
336            } else {
337                break;
338            }
339        }
340        consumed
341    }
342    /// Return the current position in the stream (number consumed).
343    #[allow(missing_docs)]
344    pub fn position(&self) -> usize {
345        self.pos
346    }
347    /// `true` if all tokens have been consumed.
348    #[allow(missing_docs)]
349    pub fn is_empty(&self) -> bool {
350        self.pos >= self.tokens.len()
351    }
352    /// Remaining token count.
353    #[allow(missing_docs)]
354    pub fn remaining(&self) -> usize {
355        self.tokens.len().saturating_sub(self.pos)
356    }
357    /// Rewind to a previously saved position.
358    #[allow(missing_docs)]
359    pub fn rewind(&mut self, saved: usize) {
360        self.pos = saved.min(self.tokens.len());
361    }
362    /// Save the current position so it can be restored later.
363    #[allow(missing_docs)]
364    pub fn save(&self) -> usize {
365        self.pos
366    }
367    /// Consume the next token and return an error if the kind is wrong.
368    #[allow(missing_docs)]
369    pub fn expect(&mut self, expected: &TokenKind) -> Result<Token, String> {
370        match self.peek() {
371            Some(tok) if &tok.kind == expected => {
372                Ok(self.next().expect("peek confirmed token exists"))
373            }
374            Some(tok) => Err(format!(
375                "expected {:?}, got {:?} at {}:{}",
376                expected, tok.kind, tok.span.line, tok.span.column
377            )),
378            None => Err(format!("expected {:?}, got end-of-file", expected)),
379        }
380    }
381    /// Collect all remaining tokens as a `Vec`.
382    #[allow(missing_docs)]
383    pub fn collect_remaining(mut self) -> Vec<Token> {
384        let mut result = Vec::new();
385        while let Some(t) = self.next() {
386            result.push(t);
387        }
388        result
389    }
390}
391impl TokenStream {
392    /// Peek at the `n`-th token from the current position (0 = next).
393    #[allow(dead_code)]
394    #[allow(missing_docs)]
395    pub fn look_ahead(&self, n: usize) -> Option<&Token> {
396        self.tokens.get(self.pos + n)
397    }
398    /// Consume and discard tokens while `pred` holds.
399    #[allow(dead_code)]
400    #[allow(missing_docs)]
401    pub fn skip_while<F: FnMut(&Token) -> bool>(&mut self, mut pred: F) {
402        while let Some(tok) = self.peek() {
403            if pred(tok) {
404                self.pos += 1;
405            } else {
406                break;
407            }
408        }
409    }
410    /// Consume tokens up to and including the next occurrence of `kind`.
411    #[allow(dead_code)]
412    #[allow(missing_docs)]
413    pub fn skip_to_inclusive(&mut self, kind: &TokenKind) {
414        while let Some(tok) = self.peek() {
415            let found = &tok.kind == kind;
416            self.pos += 1;
417            if found {
418                break;
419            }
420        }
421    }
422    /// Consume tokens up to (but not including) the next occurrence of `kind`.
423    #[allow(dead_code)]
424    #[allow(missing_docs)]
425    pub fn skip_to(&mut self, kind: &TokenKind) {
426        while let Some(tok) = self.peek() {
427            if &tok.kind == kind {
428                break;
429            }
430            self.pos += 1;
431        }
432    }
433    /// Return a slice of the next `n` tokens without consuming them.
434    #[allow(dead_code)]
435    #[allow(missing_docs)]
436    pub fn peek_slice(&self, n: usize) -> &[Token] {
437        let end = (self.pos + n).min(self.tokens.len());
438        &self.tokens[self.pos..end]
439    }
440    /// Check if the next `n` tokens match the given kind sequence.
441    #[allow(dead_code)]
442    #[allow(missing_docs)]
443    pub fn matches_sequence(&self, kinds: &[&TokenKind]) -> bool {
444        for (i, k) in kinds.iter().enumerate() {
445            match self.tokens.get(self.pos + i) {
446                Some(tok) if &&tok.kind == k => {}
447                _ => return false,
448            }
449        }
450        true
451    }
452    /// Consume exactly `n` tokens and return them.
453    #[allow(dead_code)]
454    #[allow(missing_docs)]
455    pub fn consume_n(&mut self, n: usize) -> Vec<Token> {
456        let mut result = Vec::with_capacity(n);
457        for _ in 0..n {
458            if let Some(tok) = self.next() {
459                result.push(tok);
460            }
461        }
462        result
463    }
464    /// Peek at all remaining tokens.
465    #[allow(dead_code)]
466    #[allow(missing_docs)]
467    pub fn peek_all(&self) -> &[Token] {
468        &self.tokens[self.pos..]
469    }
470    /// Insert tokens at the current position (for synthetic token injection).
471    #[allow(dead_code)]
472    #[allow(missing_docs)]
473    pub fn inject(&mut self, tokens: Vec<Token>) {
474        let mut new_tokens = self.tokens[..self.pos].to_vec();
475        new_tokens.extend(tokens);
476        new_tokens.extend_from_slice(&self.tokens[self.pos..]);
477        self.tokens = new_tokens;
478    }
479    /// Return the total token count (including consumed).
480    #[allow(dead_code)]
481    #[allow(missing_docs)]
482    pub fn len(&self) -> usize {
483        self.tokens.len()
484    }
485}
486/// A window iterator that yields consecutive N-grams of tokens.
487#[allow(dead_code)]
488#[allow(missing_docs)]
489pub struct TokenNgramIter<'a> {
490    pub(crate) tokens: &'a [Token],
491    pub(crate) window: usize,
492    pub(crate) pos: usize,
493}
494impl<'a> TokenNgramIter<'a> {
495    /// Create a new N-gram iterator with the given window size.
496    #[allow(dead_code)]
497    #[allow(missing_docs)]
498    pub fn new(tokens: &'a [Token], window: usize) -> Self {
499        Self {
500            tokens,
501            window,
502            pos: 0,
503        }
504    }
505}
506/// Matching bracket pairs.
507#[derive(Clone, Copy, Debug, PartialEq, Eq)]
508#[allow(missing_docs)]
509pub enum BracketKind {
510    /// `(` / `)`
511    Paren,
512    /// `{` / `}`
513    Brace,
514    /// `[` / `]`
515    Bracket,
516}
517/// A simple pattern for matching token sequences.
518#[allow(dead_code)]
519#[allow(missing_docs)]
520#[derive(Clone, Debug)]
521pub enum TokenPattern {
522    /// Match a specific token kind.
523    Exact(TokenKind),
524    /// Match any token in the given category.
525    Category(TokenCategory),
526    /// Match any token (wildcard).
527    Any,
528    /// Match an optional token.
529    Optional(Box<TokenPattern>),
530    /// Match a sequence of patterns.
531    Sequence(Vec<TokenPattern>),
532    /// Match one of the alternatives.
533    Alternatives(Vec<TokenPattern>),
534}
535impl TokenPattern {
536    /// Check if a single token matches this pattern (non-recursive).
537    #[allow(dead_code)]
538    #[allow(missing_docs)]
539    pub fn matches_single(&self, tok: &Token) -> bool {
540        match self {
541            TokenPattern::Exact(k) => &tok.kind == k,
542            TokenPattern::Category(cat) => categorise(&tok.kind) == *cat,
543            TokenPattern::Any => true,
544            TokenPattern::Optional(inner) => inner.matches_single(tok),
545            TokenPattern::Sequence(_) => false,
546            TokenPattern::Alternatives(alts) => alts.iter().any(|a| a.matches_single(tok)),
547        }
548    }
549    /// Try to match this pattern against the start of a token slice.
550    /// Returns the number of tokens consumed, or `None` if no match.
551    #[allow(dead_code)]
552    #[allow(missing_docs)]
553    pub fn try_match(&self, tokens: &[Token]) -> Option<usize> {
554        match self {
555            TokenPattern::Exact(k) => {
556                if tokens.first().map(|t| &t.kind) == Some(k) {
557                    Some(1)
558                } else {
559                    None
560                }
561            }
562            TokenPattern::Category(cat) => {
563                if tokens.first().map(|t| categorise(&t.kind)) == Some(*cat) {
564                    Some(1)
565                } else {
566                    None
567                }
568            }
569            TokenPattern::Any => {
570                if tokens.is_empty() {
571                    None
572                } else {
573                    Some(1)
574                }
575            }
576            TokenPattern::Optional(inner) => Some(inner.try_match(tokens).unwrap_or(0)),
577            TokenPattern::Sequence(pats) => {
578                let mut consumed = 0;
579                for pat in pats {
580                    match pat.try_match(&tokens[consumed..]) {
581                        Some(n) => consumed += n,
582                        None => return None,
583                    }
584                }
585                Some(consumed)
586            }
587            TokenPattern::Alternatives(alts) => {
588                for alt in alts {
589                    if let Some(n) = alt.try_match(tokens) {
590                        return Some(n);
591                    }
592                }
593                None
594            }
595        }
596    }
597    /// Find all non-overlapping matches of this pattern in a slice.
598    #[allow(dead_code)]
599    #[allow(missing_docs)]
600    pub fn find_all<'a>(&self, tokens: &'a [Token]) -> Vec<&'a [Token]> {
601        let mut results = Vec::new();
602        let mut pos = 0;
603        while pos < tokens.len() {
604            if let Some(n) = self.try_match(&tokens[pos..]) {
605                if n > 0 {
606                    results.push(&tokens[pos..pos + n]);
607                    pos += n;
608                } else {
609                    pos += 1;
610                }
611            } else {
612                pos += 1;
613            }
614        }
615        results
616    }
617}
618/// A token annotated with its depth and category.
619#[allow(dead_code)]
620#[allow(missing_docs)]
621#[derive(Clone, Debug)]
622pub struct AnnotatedToken {
623    pub token: Token,
624    pub category: TokenCategory,
625    pub depth: i32,
626    pub index: usize,
627}
628impl AnnotatedToken {
629    /// Create an annotated token.
630    #[allow(dead_code)]
631    #[allow(missing_docs)]
632    pub fn new(token: Token, depth: i32, index: usize) -> Self {
633        let category = categorise(&token.kind);
634        Self {
635            token,
636            category,
637            depth,
638            index,
639        }
640    }
641}
642/// Classify a TokenKind as unary, binary, or non-operator.
643#[derive(Debug, Clone, Copy, PartialEq, Eq)]
644#[allow(missing_docs)]
645pub enum OperatorArity {
646    /// A prefix operator (e.g., `-`, `¬`).
647    Unary,
648    /// An infix operator (e.g., `+`, `*`, `→`).
649    Binary,
650    /// Not an operator.
651    None,
652}