Skip to main content

sqlglot_rust/tokens/
mod.rs

1mod tokenizer;
2
3pub use tokenizer::Tokenizer;
4
5use serde::{Deserialize, Serialize};
6
7/// The type of a SQL token.
8///
9/// Modeled after Python sqlglot's comprehensive token type system.
10#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub enum TokenType {
12    // ── Literals ────────────────────────────────────────────────────
13    Number,
14    String,
15    Identifier,
16    BitString,
17    HexString,
18    Parameter, // $1, :name, ?
19
20    // ── Keywords ────────────────────────────────────────────────────
21    Select,
22    From,
23    Where,
24    And,
25    Or,
26    Not,
27    As,
28    Join,
29    Inner,
30    Left,
31    Right,
32    Full,
33    Outer,
34    Cross,
35    On,
36    Insert,
37    Into,
38    Values,
39    Update,
40    Set,
41    Delete,
42    Create,
43    Table,
44    Drop,
45    Alter,
46    Index,
47    If,
48    Exists,
49    In,
50    Is,
51    Null,
52    Like,
53    ILike,
54    Between,
55    Case,
56    When,
57    Then,
58    Else,
59    End,
60    Order,
61    By,
62    Asc,
63    Desc,
64    Group,
65    Having,
66    Limit,
67    Offset,
68    Union,
69    All,
70    Distinct,
71    True,
72    False,
73
74    // Set operations
75    Intersect,
76    Except,
77
78    // CTEs
79    With,
80    Recursive,
81
82    // Subqueries / misc
83    Any,
84    Some,
85
86    // Casting
87    Cast,
88
89    // Window functions
90    Over,
91    Partition,
92    Window,
93    Rows,
94    Range,
95    Unbounded,
96    Preceding,
97    Following,
98    CurrentRow,
99    Filter,
100
101    // Data types as keywords
102    Int,
103    Integer,
104    BigInt,
105    SmallInt,
106    TinyInt,
107    Float,
108    Double,
109    Decimal,
110    Numeric,
111    Real,
112    Varchar,
113    Char,
114    Text,
115    Boolean,
116    Bool,
117    Date,
118    Timestamp,
119    TimestampTz,
120    Time,
121    Interval,
122    Blob,
123    Bytea,
124    Json,
125    Jsonb,
126    Uuid,
127    Array,
128    Map,
129    Struct,
130
131    // Constraints & DDL
132    Primary,
133    Key,
134    Foreign,
135    References,
136    Unique,
137    Check,
138    Default,
139    Constraint,
140    AutoIncrement,
141    NotNull,
142    Cascade,
143    Restrict,
144    NoAction,
145    SetNull,
146    SetDefault,
147
148    // Additional DML
149    Returning,
150    Conflict,
151    Do,
152    Nothing,
153    Replace,
154    Ignore,
155    Merge,
156    Matched,
157    Using,
158    Truncate,
159
160    // Schema
161    Schema,
162    Database,
163    View,
164    Materialized,
165    Temporary,
166    Temp,
167
168    // Transaction
169    Begin,
170    Commit,
171    Rollback,
172    Savepoint,
173    Transaction,
174
175    // Misc keywords
176    Explain,
177    Analyze,
178    Describe,
179    Show,
180    Use,
181    Grant,
182    Revoke,
183    Lateral,
184    Unnest,
185    Pivot,
186    Unpivot,
187    Tablesample,
188    Fetch,
189    First,
190    Next,
191    Only,
192    Percent,
193    WithTies,
194    Nulls,
195    Respect,
196    Top,
197    Collate,
198    Comment,
199    Isnull,
200    Notnull,
201    Escape,
202
203    // Existence checks
204    Qualify,
205
206    // Logical
207    Xor,
208
209    // Special expressions
210    Extract,
211    Epoch,
212    Year,
213    Month,
214    Day,
215    Hour,
216    Minute,
217    Second,
218
219    // ── Operators ───────────────────────────────────────────────────
220    Plus,
221    Minus,
222    Star,
223    Slash,
224    Percent2, // % as modulo operator
225    Eq,
226    Neq, // <> or !=
227    Lt,
228    Gt,
229    LtEq,
230    GtEq,
231    Concat,          // ||
232    BitwiseAnd,      // &
233    BitwiseOr,       // |
234    BitwiseXor,      // ^
235    BitwiseNot,      // ~
236    ShiftLeft,       // <<
237    ShiftRight,      // >>
238    DoubleColon,     // :: (Postgres cast)
239    Arrow,           // ->
240    DoubleArrow,     // ->>
241    HashArrow,       // #>
242    HashDoubleArrow, // #>>
243    AtSign,          // @
244    Scope,           // ::
245
246    // ── Punctuation ────────────────────────────────────────────────
247    LParen,
248    RParen,
249    LBracket, // [
250    RBracket, // ]
251    LBrace,   // {
252    RBrace,   // }
253    Comma,
254    Semicolon,
255    Dot,
256    Colon,
257    DoubleColon2, // duplicated for compat -- use DoubleColon
258
259    // ── Special ────────────────────────────────────────────────────
260    Whitespace,
261    LineComment,
262    BlockComment,
263    Eof,
264}
265
266/// A token produced by the tokenizer.
267#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
268pub struct Token {
269    pub token_type: TokenType,
270    pub value: String,
271    pub line: usize,
272    pub col: usize,
273    pub position: usize,
274    /// For quoted identifiers: the delimiter character ('"', '`', or '[').
275    /// '\0' means unquoted.
276    #[serde(default)]
277    pub quote_char: char,
278}
279
280impl Token {
281    #[must_use]
282    pub fn new(token_type: TokenType, value: impl Into<String>, position: usize) -> Self {
283        Self {
284            token_type,
285            value: value.into(),
286            line: 0,
287            col: 0,
288            position,
289            quote_char: '\0',
290        }
291    }
292
293    #[must_use]
294    pub fn with_location(
295        token_type: TokenType,
296        value: impl Into<String>,
297        position: usize,
298        line: usize,
299        col: usize,
300    ) -> Self {
301        Self {
302            token_type,
303            value: value.into(),
304            line,
305            col,
306            position,
307            quote_char: '\0',
308        }
309    }
310
311    #[must_use]
312    pub fn with_quote(
313        token_type: TokenType,
314        value: impl Into<String>,
315        position: usize,
316        line: usize,
317        col: usize,
318        quote_char: char,
319    ) -> Self {
320        Self {
321            token_type,
322            value: value.into(),
323            line,
324            col,
325            position,
326            quote_char,
327        }
328    }
329}