Skip to main content

sqlglot_rust/tokens/
mod.rs

1mod tokenizer;
2
3pub use tokenizer::Tokenizer;
4
5use serde::{Deserialize, Serialize};
6
7/// The type of a SQL token.
8///
9/// Modeled after Python sqlglot's comprehensive token type system.
10#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub enum TokenType {
12    // ── Literals ────────────────────────────────────────────────────
13    Number,
14    String,
15    Identifier,
16    BitString,
17    HexString,
18    Parameter, // $1, :name, ?
19
20    // ── Keywords ────────────────────────────────────────────────────
21    Select,
22    From,
23    Where,
24    And,
25    Or,
26    Not,
27    As,
28    Join,
29    Inner,
30    Left,
31    Right,
32    Full,
33    Outer,
34    Cross,
35    On,
36    Insert,
37    Into,
38    Values,
39    Update,
40    Set,
41    Delete,
42    Create,
43    Table,
44    Drop,
45    Alter,
46    Index,
47    If,
48    Exists,
49    In,
50    Is,
51    Null,
52    Like,
53    ILike,
54    Between,
55    Case,
56    When,
57    Then,
58    Else,
59    End,
60    Order,
61    By,
62    Asc,
63    Desc,
64    Group,
65    Having,
66    Limit,
67    Offset,
68    Union,
69    All,
70    Distinct,
71    True,
72    False,
73
74    // Set operations
75    Intersect,
76    Except,
77
78    // CTEs
79    With,
80    Recursive,
81
82    // Subqueries / misc
83    Any,
84    Some,
85
86    // Casting
87    Cast,
88
89    // Window functions
90    Over,
91    Partition,
92    Window,
93    Rows,
94    Range,
95    Unbounded,
96    Preceding,
97    Following,
98    CurrentRow,
99    Filter,
100
101    // Data types as keywords
102    Int,
103    Integer,
104    BigInt,
105    SmallInt,
106    TinyInt,
107    Float,
108    Double,
109    Decimal,
110    Numeric,
111    Real,
112    Varchar,
113    Char,
114    Text,
115    Boolean,
116    Bool,
117    Date,
118    Timestamp,
119    TimestampTz,
120    Time,
121    Interval,
122    Blob,
123    Bytea,
124    Json,
125    Jsonb,
126    Uuid,
127    Array,
128    Map,
129    Struct,
130
131    // Constraints & DDL
132    Primary,
133    Key,
134    Foreign,
135    References,
136    Unique,
137    Check,
138    Default,
139    Constraint,
140    AutoIncrement,
141    NotNull,
142    Cascade,
143    Restrict,
144    NoAction,
145    SetNull,
146    SetDefault,
147
148    // Additional DML
149    Returning,
150    Conflict,
151    Do,
152    Nothing,
153    Replace,
154    Ignore,
155    Merge,
156    Matched,
157    Using,
158    Truncate,
159
160    // Schema
161    Schema,
162    Database,
163    View,
164    Materialized,
165    Temporary,
166    Temp,
167
168    // Transaction
169    Begin,
170    Commit,
171    Rollback,
172    Savepoint,
173    Transaction,
174
175    // Misc keywords
176    Explain,
177    Analyze,
178    Describe,
179    Show,
180    Use,
181    Grant,
182    Revoke,
183    Lateral,
184    Unnest,
185    Pivot,
186    Unpivot,
187    Tablesample,
188    Fetch,
189    First,
190    Next,
191    Only,
192    Percent,
193    WithTies,
194    Nulls,
195    Respect,
196    Top,
197    Collate,
198    Comment,
199    Isnull,
200    Notnull,
201    Escape,
202
203    // Existence checks
204    Qualify,
205
206    // Grouped set operations
207    Cube,
208    Rollup,
209    Grouping,
210    Sets,
211
212    // Logical
213    Xor,
214
215    // Special expressions
216    Extract,
217    Epoch,
218    Year,
219    Month,
220    Day,
221    Hour,
222    Minute,
223    Second,
224
225    // ── Operators ───────────────────────────────────────────────────
226    Plus,
227    Minus,
228    Star,
229    Slash,
230    Percent2, // % as modulo operator
231    Eq,
232    Neq, // <> or !=
233    Lt,
234    Gt,
235    LtEq,
236    GtEq,
237    Concat,          // ||
238    BitwiseAnd,      // &
239    BitwiseOr,       // |
240    BitwiseXor,      // ^
241    BitwiseNot,      // ~
242    ShiftLeft,       // <<
243    ShiftRight,      // >>
244    DoubleColon,     // :: (Postgres cast)
245    Arrow,           // ->
246    DoubleArrow,     // ->>
247    HashArrow,       // #>
248    HashDoubleArrow, // #>>
249    AtSign,          // @
250    Scope,           // ::
251
252    // ── Punctuation ────────────────────────────────────────────────
253    LParen,
254    RParen,
255    LBracket, // [
256    RBracket, // ]
257    LBrace,   // {
258    RBrace,   // }
259    Comma,
260    Semicolon,
261    Dot,
262    Colon,
263    DoubleColon2, // duplicated for compat -- use DoubleColon
264
265    // ── Special ────────────────────────────────────────────────────
266    Whitespace,
267    LineComment,
268    BlockComment,
269    Eof,
270}
271
272/// A token produced by the tokenizer.
273#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
274pub struct Token {
275    pub token_type: TokenType,
276    pub value: String,
277    pub line: usize,
278    pub col: usize,
279    pub position: usize,
280    /// For quoted identifiers: the delimiter character ('"', '`', or '[').
281    /// '\0' means unquoted.
282    #[serde(default)]
283    pub quote_char: char,
284}
285
286impl Token {
287    #[must_use]
288    pub fn new(token_type: TokenType, value: impl Into<String>, position: usize) -> Self {
289        Self {
290            token_type,
291            value: value.into(),
292            line: 0,
293            col: 0,
294            position,
295            quote_char: '\0',
296        }
297    }
298
299    #[must_use]
300    pub fn with_location(
301        token_type: TokenType,
302        value: impl Into<String>,
303        position: usize,
304        line: usize,
305        col: usize,
306    ) -> Self {
307        Self {
308            token_type,
309            value: value.into(),
310            line,
311            col,
312            position,
313            quote_char: '\0',
314        }
315    }
316
317    #[must_use]
318    pub fn with_quote(
319        token_type: TokenType,
320        value: impl Into<String>,
321        position: usize,
322        line: usize,
323        col: usize,
324        quote_char: char,
325    ) -> Self {
326        Self {
327            token_type,
328            value: value.into(),
329            line,
330            col,
331            position,
332            quote_char,
333        }
334    }
335}