Skip to main content

sqlglot_rust/tokens/
mod.rs

1mod tokenizer;
2
3pub use tokenizer::Tokenizer;
4
5use serde::{Deserialize, Serialize};
6
7/// The type of a SQL token.
8///
9/// Modeled after Python sqlglot's comprehensive token type system.
10#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub enum TokenType {
12    // ── Literals ────────────────────────────────────────────────────
13    Number,
14    String,
15    NationalString,
16    Identifier,
17    BitString,
18    HexString,
19    Parameter, // $1, :name, ?
20
21    // ── Keywords ────────────────────────────────────────────────────
22    Select,
23    From,
24    Where,
25    And,
26    Or,
27    Not,
28    As,
29    Join,
30    Inner,
31    Left,
32    Right,
33    Full,
34    Outer,
35    Cross,
36    On,
37    Insert,
38    Into,
39    Values,
40    Update,
41    Set,
42    Delete,
43    Create,
44    Table,
45    Drop,
46    Alter,
47    Index,
48    If,
49    Exists,
50    In,
51    Is,
52    Null,
53    Like,
54    ILike,
55    Between,
56    Case,
57    When,
58    Then,
59    Else,
60    End,
61    Order,
62    By,
63    Asc,
64    Desc,
65    Group,
66    Having,
67    Limit,
68    Offset,
69    Union,
70    All,
71    Distinct,
72    True,
73    False,
74
75    // Set operations
76    Intersect,
77    Except,
78
79    // CTEs
80    With,
81    Recursive,
82
83    // Subqueries / misc
84    Any,
85    Some,
86
87    // Casting
88    Cast,
89
90    // Window functions
91    Over,
92    Partition,
93    Window,
94    Rows,
95    Range,
96    Unbounded,
97    Preceding,
98    Following,
99    CurrentRow,
100    Filter,
101
102    // Data types as keywords
103    Int,
104    Integer,
105    BigInt,
106    SmallInt,
107    TinyInt,
108    Float,
109    Double,
110    Decimal,
111    Numeric,
112    Real,
113    Varchar,
114    Char,
115    Text,
116    Boolean,
117    Bool,
118    Date,
119    Timestamp,
120    TimestampTz,
121    Time,
122    Interval,
123    Blob,
124    Bytea,
125    Json,
126    Jsonb,
127    Uuid,
128    Array,
129    Map,
130    Struct,
131
132    // Constraints & DDL
133    Primary,
134    Key,
135    Foreign,
136    References,
137    Unique,
138    Check,
139    Default,
140    Constraint,
141    AutoIncrement,
142    NotNull,
143    Cascade,
144    Restrict,
145    NoAction,
146    SetNull,
147    SetDefault,
148
149    // Additional DML
150    Returning,
151    Conflict,
152    Do,
153    Nothing,
154    Replace,
155    Ignore,
156    Merge,
157    Matched,
158    Using,
159    Truncate,
160
161    // Schema
162    Schema,
163    Database,
164    View,
165    Materialized,
166    Temporary,
167    Temp,
168
169    // Transaction
170    Begin,
171    Commit,
172    Rollback,
173    Savepoint,
174    Transaction,
175
176    // Misc keywords
177    Explain,
178    Analyze,
179    Describe,
180    Show,
181    Use,
182    Grant,
183    Revoke,
184    Lateral,
185    Unnest,
186    Pivot,
187    Unpivot,
188    Tablesample,
189    Fetch,
190    First,
191    Next,
192    Only,
193    Percent,
194    WithTies,
195    Nulls,
196    Respect,
197    Top,
198    Collate,
199    Comment,
200    Isnull,
201    Notnull,
202    Escape,
203
204    // Existence checks
205    Qualify,
206
207    // Grouped set operations
208    Cube,
209    Rollup,
210    Grouping,
211    Sets,
212
213    // Logical
214    Xor,
215
216    // Special expressions
217    Extract,
218    Epoch,
219    Year,
220    Month,
221    Day,
222    Hour,
223    Minute,
224    Second,
225
226    // ── Operators ───────────────────────────────────────────────────
227    Plus,
228    Minus,
229    Star,
230    Slash,
231    Percent2, // % as modulo operator
232    Eq,
233    Neq, // <> or !=
234    Lt,
235    Gt,
236    LtEq,
237    GtEq,
238    Concat,          // ||
239    BitwiseAnd,      // &
240    BitwiseOr,       // |
241    BitwiseXor,      // ^
242    BitwiseNot,      // ~
243    ShiftLeft,       // <<
244    ShiftRight,      // >>
245    DoubleColon,     // :: (Postgres cast)
246    Arrow,           // ->
247    DoubleArrow,     // ->>
248    HashArrow,       // #>
249    HashDoubleArrow, // #>>
250    AtSign,          // @
251    /// `@>` PostgreSQL "contains" (arrays / jsonb / range)
252    AtArrow,
253    /// `<@` PostgreSQL "is contained by"
254    ArrowAt,
255    Scope,           // ::
256
257    // ── Punctuation ────────────────────────────────────────────────
258    LParen,
259    RParen,
260    LBracket, // [
261    RBracket, // ]
262    LBrace,   // {
263    RBrace,   // }
264    Comma,
265    Semicolon,
266    Dot,
267    Colon,
268    DoubleColon2, // duplicated for compat -- use DoubleColon
269
270    // ── Special ────────────────────────────────────────────────────
271    Whitespace,
272    LineComment,
273    BlockComment,
274    Eof,
275}
276
277/// A token produced by the tokenizer.
278#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
279pub struct Token {
280    pub token_type: TokenType,
281    pub value: String,
282    pub line: usize,
283    pub col: usize,
284    pub position: usize,
285    /// For quoted identifiers: the delimiter character ('"', '`', or '[').
286    /// '\0' means unquoted.
287    #[serde(default)]
288    pub quote_char: char,
289}
290
291impl Token {
292    #[must_use]
293    pub fn new(token_type: TokenType, value: impl Into<String>, position: usize) -> Self {
294        Self {
295            token_type,
296            value: value.into(),
297            line: 0,
298            col: 0,
299            position,
300            quote_char: '\0',
301        }
302    }
303
304    #[must_use]
305    pub fn with_location(
306        token_type: TokenType,
307        value: impl Into<String>,
308        position: usize,
309        line: usize,
310        col: usize,
311    ) -> Self {
312        Self {
313            token_type,
314            value: value.into(),
315            line,
316            col,
317            position,
318            quote_char: '\0',
319        }
320    }
321
322    #[must_use]
323    pub fn with_quote(
324        token_type: TokenType,
325        value: impl Into<String>,
326        position: usize,
327        line: usize,
328        col: usize,
329        quote_char: char,
330    ) -> Self {
331        Self {
332            token_type,
333            value: value.into(),
334            line,
335            col,
336            position,
337            quote_char,
338        }
339    }
340}