Skip to main content

sqlglot_rust/tokens/
mod.rs

1mod tokenizer;
2
3pub use tokenizer::Tokenizer;
4
5use serde::{Deserialize, Serialize};
6
7/// The type of a SQL token.
8///
9/// Modeled after Python sqlglot's comprehensive token type system.
10#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub enum TokenType {
12    // ── Literals ────────────────────────────────────────────────────
13    Number,
14    String,
15    NationalString,
16    Identifier,
17    BitString,
18    HexString,
19    Parameter, // $1, :name, ?
20
21    // ── Keywords ────────────────────────────────────────────────────
22    Select,
23    From,
24    Where,
25    And,
26    Or,
27    Not,
28    As,
29    Join,
30    Inner,
31    Left,
32    Right,
33    Full,
34    Outer,
35    Cross,
36    On,
37    Insert,
38    Into,
39    Values,
40    Update,
41    Set,
42    Delete,
43    Create,
44    Table,
45    Drop,
46    Alter,
47    Index,
48    If,
49    Exists,
50    In,
51    Is,
52    Null,
53    Like,
54    ILike,
55    Between,
56    Case,
57    When,
58    Then,
59    Else,
60    End,
61    Order,
62    By,
63    Asc,
64    Desc,
65    Group,
66    Having,
67    Limit,
68    Offset,
69    Union,
70    All,
71    Distinct,
72    True,
73    False,
74
75    // Set operations
76    Intersect,
77    Except,
78
79    // CTEs
80    With,
81    Recursive,
82
83    // Subqueries / misc
84    Any,
85    Some,
86
87    // Casting
88    Cast,
89
90    // Window functions
91    Over,
92    Partition,
93    Window,
94    Rows,
95    Range,
96    Unbounded,
97    Preceding,
98    Following,
99    CurrentRow,
100    Filter,
101
102    // Data types as keywords
103    Int,
104    Integer,
105    BigInt,
106    SmallInt,
107    TinyInt,
108    Float,
109    Double,
110    Decimal,
111    Numeric,
112    Real,
113    Varchar,
114    Char,
115    Text,
116    Boolean,
117    Bool,
118    Date,
119    Timestamp,
120    TimestampTz,
121    Time,
122    Interval,
123    Blob,
124    Bytea,
125    Json,
126    Jsonb,
127    Uuid,
128    Array,
129    Map,
130    Struct,
131
132    // Constraints & DDL
133    Primary,
134    Key,
135    Foreign,
136    References,
137    Unique,
138    Check,
139    Default,
140    Constraint,
141    AutoIncrement,
142    NotNull,
143    Cascade,
144    Restrict,
145    NoAction,
146    SetNull,
147    SetDefault,
148
149    // Additional DML
150    Returning,
151    Conflict,
152    Do,
153    Nothing,
154    Replace,
155    Ignore,
156    Merge,
157    Matched,
158    Using,
159    Truncate,
160
161    // Schema
162    Schema,
163    Database,
164    View,
165    Materialized,
166    Temporary,
167    Temp,
168
169    // Transaction
170    Begin,
171    Commit,
172    Rollback,
173    Savepoint,
174    Transaction,
175
176    // Misc keywords
177    Explain,
178    Analyze,
179    Describe,
180    Show,
181    Use,
182    Grant,
183    Revoke,
184    Lateral,
185    Unnest,
186    Pivot,
187    Unpivot,
188    Tablesample,
189    Fetch,
190    First,
191    Next,
192    Only,
193    Percent,
194    WithTies,
195    Nulls,
196    Respect,
197    Top,
198    Collate,
199    Comment,
200    Isnull,
201    Notnull,
202    Escape,
203
204    // Existence checks
205    Qualify,
206
207    // Grouped set operations
208    Cube,
209    Rollup,
210    Grouping,
211    Sets,
212
213    // Logical
214    Xor,
215
216    // Special expressions
217    Extract,
218    Epoch,
219    Year,
220    Month,
221    Day,
222    Hour,
223    Minute,
224    Second,
225
226    // ── Operators ───────────────────────────────────────────────────
227    Plus,
228    Minus,
229    Star,
230    Slash,
231    Percent2, // % as modulo operator
232    Eq,
233    Neq, // <> or !=
234    Lt,
235    Gt,
236    LtEq,
237    GtEq,
238    Concat,          // ||
239    BitwiseAnd,      // &
240    BitwiseOr,       // |
241    BitwiseXor,      // ^
242    BitwiseNot,      // ~
243    ShiftLeft,       // <<
244    ShiftRight,      // >>
245    DoubleColon,     // :: (Postgres cast)
246    Arrow,           // ->
247    DoubleArrow,     // ->>
248    HashArrow,       // #>
249    HashDoubleArrow, // #>>
250    AtSign,          // @
251    Scope,           // ::
252
253    // ── Punctuation ────────────────────────────────────────────────
254    LParen,
255    RParen,
256    LBracket, // [
257    RBracket, // ]
258    LBrace,   // {
259    RBrace,   // }
260    Comma,
261    Semicolon,
262    Dot,
263    Colon,
264    DoubleColon2, // duplicated for compat -- use DoubleColon
265
266    // ── Special ────────────────────────────────────────────────────
267    Whitespace,
268    LineComment,
269    BlockComment,
270    Eof,
271}
272
273/// A token produced by the tokenizer.
274#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
275pub struct Token {
276    pub token_type: TokenType,
277    pub value: String,
278    pub line: usize,
279    pub col: usize,
280    pub position: usize,
281    /// For quoted identifiers: the delimiter character ('"', '`', or '[').
282    /// '\0' means unquoted.
283    #[serde(default)]
284    pub quote_char: char,
285}
286
287impl Token {
288    #[must_use]
289    pub fn new(token_type: TokenType, value: impl Into<String>, position: usize) -> Self {
290        Self {
291            token_type,
292            value: value.into(),
293            line: 0,
294            col: 0,
295            position,
296            quote_char: '\0',
297        }
298    }
299
300    #[must_use]
301    pub fn with_location(
302        token_type: TokenType,
303        value: impl Into<String>,
304        position: usize,
305        line: usize,
306        col: usize,
307    ) -> Self {
308        Self {
309            token_type,
310            value: value.into(),
311            line,
312            col,
313            position,
314            quote_char: '\0',
315        }
316    }
317
318    #[must_use]
319    pub fn with_quote(
320        token_type: TokenType,
321        value: impl Into<String>,
322        position: usize,
323        line: usize,
324        col: usize,
325        quote_char: char,
326    ) -> Self {
327        Self {
328            token_type,
329            value: value.into(),
330            line,
331            col,
332            position,
333            quote_char,
334        }
335    }
336}