Skip to main content

parser/
tokens.rs

1//! Token types and definitions. Generated by CongoCC Parser Generator. Do not edit.
2
3use std::fmt;
4
5/// Token type enumeration
6///
7/// Represents all possible token types recognized by the lexer.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
9#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
10#[allow(non_camel_case_types)]
11pub enum TokenType {
12    /// End of file
13    EOF,
14    /// Whitespace: space
15    SPACE,
16    /// Whitespace: tab
17    TAB,
18    /// Whitespace: newline
19    NEWLINE,
20    /// Whitespace: carriage return
21    CR,
22    /// Whitespace: form feed
23    FORM_FEED,
24    /// Keyword: NOT
25    NOT,
26    /// Keyword: AND
27    AND,
28    /// Keyword: OR
29    OR,
30    /// Keyword: BETWEEN
31    BETWEEN,
32    /// Keyword: LIKE
33    LIKE,
34    /// Keyword: ESCAPE
35    ESCAPE,
36    /// Keyword: IN
37    IN,
38    /// Keyword: IS
39    IS,
40    /// Keyword: TRUE
41    TRUE,
42    /// Keyword: FALSE
43    FALSE,
44    /// Keyword: NULL
45    NULL,
46    /// Operator: = (equals)
47    EQ,
48    /// Operator: <> or != (not equals)
49    NE,
50    /// Operator: > (greater than)
51    GT,
52    /// Operator: >= (greater than or equal)
53    GE,
54    /// Operator: < (less than)
55    LT,
56    /// Operator: <= (less than or equal)
57    LE,
58    /// Delimiter: ( (left parenthesis)
59    LPAREN,
60    /// Delimiter: , (comma)
61    COMMA,
62    /// Delimiter: ) (right parenthesis)
63    RPAREN,
64    /// Operator: + (plus)
65    PLUS,
66    /// Operator: - (minus)
67    MINUS,
68    /// Operator: * (multiply)
69    STAR,
70    /// Operator: / (divide)
71    SLASH,
72    /// Operator: % (modulo)
73    PERCENT,
74    /// Line comment
75    LINE_COMMENT,
76    /// Block comment
77    BLOCK_COMMENT,
78    /// Decimal numeric literal
79    DECIMAL_LITERAL,
80    /// Hexadecimal numeric literal
81    HEX_LITERAL,
82    /// Octal numeric literal
83    OCTAL_LITERAL,
84    /// Floating point numeric literal
85    FLOATING_POINT_LITERAL,
86    /// String literal (single-quoted)
87    STRING_LITERAL,
88    /// Identifier
89    ID,
90    /// Invalid token marker
91    INVALID,
92}
93
94impl fmt::Display for TokenType {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        match self {
97            TokenType::EOF => write!(f, "EOF"),
98            TokenType::SPACE => write!(f, "SPACE"),
99            TokenType::TAB => write!(f, "TAB"),
100            TokenType::NEWLINE => write!(f, "NEWLINE"),
101            TokenType::CR => write!(f, "CR"),
102            TokenType::FORM_FEED => write!(f, "FORM_FEED"),
103            TokenType::NOT => write!(f, "NOT"),
104            TokenType::AND => write!(f, "AND"),
105            TokenType::OR => write!(f, "OR"),
106            TokenType::BETWEEN => write!(f, "BETWEEN"),
107            TokenType::LIKE => write!(f, "LIKE"),
108            TokenType::ESCAPE => write!(f, "ESCAPE"),
109            TokenType::IN => write!(f, "IN"),
110            TokenType::IS => write!(f, "IS"),
111            TokenType::TRUE => write!(f, "TRUE"),
112            TokenType::FALSE => write!(f, "FALSE"),
113            TokenType::NULL => write!(f, "NULL"),
114            TokenType::EQ => write!(f, "="),
115            TokenType::NE => write!(f, "<>"),
116            TokenType::GT => write!(f, ">"),
117            TokenType::GE => write!(f, ">="),
118            TokenType::LT => write!(f, "<"),
119            TokenType::LE => write!(f, "<="),
120            TokenType::LPAREN => write!(f, "("),
121            TokenType::COMMA => write!(f, ","),
122            TokenType::RPAREN => write!(f, ")"),
123            TokenType::PLUS => write!(f, "+"),
124            TokenType::MINUS => write!(f, "-"),
125            TokenType::STAR => write!(f, "*"),
126            TokenType::SLASH => write!(f, "/"),
127            TokenType::PERCENT => write!(f, "%"),
128            TokenType::LINE_COMMENT => write!(f, "LINE_COMMENT"),
129            TokenType::BLOCK_COMMENT => write!(f, "BLOCK_COMMENT"),
130            TokenType::DECIMAL_LITERAL => write!(f, "DECIMAL_LITERAL"),
131            TokenType::HEX_LITERAL => write!(f, "HEX_LITERAL"),
132            TokenType::OCTAL_LITERAL => write!(f, "OCTAL_LITERAL"),
133            TokenType::FLOATING_POINT_LITERAL => write!(f, "FLOATING_POINT_LITERAL"),
134            TokenType::STRING_LITERAL => write!(f, "STRING_LITERAL"),
135            TokenType::ID => write!(f, "ID"),
136            TokenType::INVALID => write!(f, "INVALID"),
137        }
138    }
139}
140
141/// Lexical state enumeration
142///
143/// Represents different lexical states the lexer can be in.
144#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
145#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
146pub enum LexicalState {
147    /// Lexical state: DEFAULT
148    DEFAULT,
149}
150
151/// A single token in the input stream
152///
153/// Tokens are the atomic units produced by the lexer. Each token has:
154/// - A type (e.g., INTEGER, PLUS, EOF)
155/// - An image (the actual text matched)
156/// - Position information (begin/end offsets)
157/// - Optional links to previous/next tokens
158#[derive(Debug, Clone)]
159#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
160pub struct Token {
161    /// The type of this token
162    pub token_type: TokenType,
163    /// The actual text of this token
164    pub image: String,
165    /// Absolute offset in the input where this token begins
166    pub begin_offset: usize,
167    /// Absolute offset in the input where this token ends
168    pub end_offset: usize,
169    /// Index of the next token (if any)
170    pub next: Option<usize>,
171    /// Index of the previous token (if any)
172    pub previous: Option<usize>,
173}
174
175impl Token {
176    /// Create a new token
177    pub fn new(
178        token_type: TokenType,
179        image: String,
180        begin_offset: usize,
181        end_offset: usize,
182    ) -> Self {
183        Token {
184            token_type,
185            image,
186            begin_offset,
187            end_offset,
188            next: None,
189            previous: None,
190        }
191    }
192
193    /// Get the length of this token in characters
194    pub fn len(&self) -> usize {
195        self.end_offset.saturating_sub(self.begin_offset)
196    }
197
198    /// Check if this token is empty
199    pub fn is_empty(&self) -> bool {
200        self.len() == 0
201    }
202
203    /// Check if this token is of a specific type
204    pub fn is_type(&self, token_type: TokenType) -> bool {
205        self.token_type == token_type
206    }
207
208    /// Check if this token is one of the specified types
209    pub fn is_one_of(&self, types: &[TokenType]) -> bool {
210        types.contains(&self.token_type)
211    }
212}
213
214impl fmt::Display for Token {
215    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
216        write!(f, "{}: \"{}\"", self.token_type, self.image)
217    }
218}
219
220/// Token source trait for tracking token locations
221pub trait TokenSource {
222    /// Get the line number for a given offset
223    fn get_line_from_offset(&self, offset: usize) -> usize;
224
225    /// Get the column number for a given offset
226    fn get_column_from_offset(&self, offset: usize) -> usize;
227}