1use std::fmt;
20use std::hash::Hash;
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub struct Span {
25 pub start: usize,
26 pub end: usize,
27 pub line: usize,
28 pub column: usize,
29}
30
31impl Span {
32 pub fn new(start: usize, end: usize, line: usize, column: usize) -> Self {
33 Self {
34 start,
35 end,
36 line,
37 column,
38 }
39 }
40
41 pub fn merge(self, other: Span) -> Span {
42 Span {
43 start: self.start.min(other.start),
44 end: self.end.max(other.end),
45 line: self.line,
46 column: self.column,
47 }
48 }
49}
50
51impl Default for Span {
52 fn default() -> Self {
53 Self {
54 start: 0,
55 end: 0,
56 line: 1,
57 column: 1,
58 }
59 }
60}
61
62#[derive(Debug, Clone, PartialEq)]
64pub struct Token {
65 pub kind: TokenKind,
66 pub span: Span,
67 pub literal: String,
68}
69
70impl Token {
71 pub fn new(kind: TokenKind, span: Span, literal: impl Into<String>) -> Self {
72 Self {
73 kind,
74 span,
75 literal: literal.into(),
76 }
77 }
78}
79
80#[derive(Debug, Clone, PartialEq)]
82pub enum TokenKind {
83 Integer(i64),
85 Float(f64),
86 String(String),
87 Blob(Vec<u8>),
88 Null,
89 True,
90 False,
91
92 Identifier(String),
94 QuotedIdentifier(String), Create,
98 Table,
99 Index,
100 Drop,
101 Alter,
102 Add,
103 Column,
104 Rename,
105 Primary,
106 Key,
107 Foreign,
108 References,
109 Unique,
110 Default,
111 AutoIncrement,
112 If,
113 Exists,
114
115 Ignore, Replace, Conflict, Do, Nothing, Duplicate, Abort, Fail, Returning, Select,
128 Insert,
129 Update,
130 Delete,
131 Into,
132 Values,
133 Set,
134 From,
135 Where,
136 Join,
137 Inner,
138 Left,
139 Right,
140 Outer,
141 Cross,
142 On,
143 Using,
144
145 As,
147 Distinct,
148 All,
149 Group,
150 Having,
151 Order,
152 By,
153 Asc,
154 Desc,
155 Nulls,
156 First,
157 Last,
158 Limit,
159 Offset,
160 Union,
161 Intersect,
162 Except,
163
164 And,
166 Or,
167 Not,
168 Is,
169 In,
170 Like,
171 Escape,
172 Between,
173 Case,
174 When,
175 Then,
176 Else,
177 End,
178 Cast,
179 Collate,
180
181 Begin,
183 Commit,
184 Rollback,
185 Transaction,
186 Savepoint,
187 Release,
188
189 Int,
191 IntegerKw,
192 Bigint,
193 Smallint,
194 Tinyint,
195 FloatKw,
196 Double,
197 Real,
198 Decimal,
199 Numeric,
200 Varchar,
201 Char,
202 Text,
203 BlobKw,
204 Boolean,
205 Bool,
206 Date,
207 Time,
208 Timestamp,
209 Datetime,
210
211 Count,
213 Sum,
214 Avg,
215 Min,
216 Max,
217
218 Vector,
220 VectorSearch,
221 JsonExtract,
222 JsonSet,
223 ContextWindow,
224 Embedding,
225 Cosine,
226 Euclidean,
227 DotProduct,
228
229 Plus, Minus, Star, Slash, Percent, Eq, Ne, Lt, Le, Gt, Ge, Concat, BitAnd, BitOr, BitNot, LeftShift, RightShift, LParen, RParen, LBracket, RBracket, Comma, Semicolon, Dot, Colon, DoubleColon, Arrow, DoubleArrow, QuestionMark, At, Placeholder(u32), Comment(String),
266 Whitespace,
267 Eof,
268 Invalid(String),
269}
270
271impl Eq for TokenKind {}
272
273impl Hash for TokenKind {
274 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
275 std::mem::discriminant(self).hash(state);
276 match self {
277 TokenKind::Integer(n) => n.hash(state),
278 TokenKind::Float(f) => f.to_bits().hash(state),
279 TokenKind::String(s) => s.hash(state),
280 TokenKind::Blob(b) => b.hash(state),
281 TokenKind::Identifier(s) => s.hash(state),
282 TokenKind::QuotedIdentifier(s) => s.hash(state),
283 TokenKind::Placeholder(n) => n.hash(state),
284 TokenKind::Comment(s) => s.hash(state),
285 TokenKind::Invalid(s) => s.hash(state),
286 _ => {}
287 }
288 }
289}
290
291impl TokenKind {
292 pub fn is_keyword(&self) -> bool {
294 matches!(
295 self,
296 TokenKind::Select
297 | TokenKind::Insert
298 | TokenKind::Update
299 | TokenKind::Delete
300 | TokenKind::Create
301 | TokenKind::Drop
302 | TokenKind::From
303 | TokenKind::Where
304 | TokenKind::And
305 | TokenKind::Or
306 | TokenKind::Not
307 | TokenKind::Join
308 | TokenKind::Inner
309 | TokenKind::Left
310 | TokenKind::Right
311 | TokenKind::Outer
312 | TokenKind::Cross
313 | TokenKind::On
314 | TokenKind::As
315 | TokenKind::Distinct
316 | TokenKind::All
317 | TokenKind::Group
318 | TokenKind::Having
319 | TokenKind::Order
320 | TokenKind::By
321 | TokenKind::Asc
322 | TokenKind::Desc
323 | TokenKind::Limit
324 | TokenKind::Offset
325 | TokenKind::Values
326 | TokenKind::Into
327 | TokenKind::Set
328 | TokenKind::Begin
329 | TokenKind::Commit
330 | TokenKind::Rollback
331 | TokenKind::Table
332 | TokenKind::Index
333 | TokenKind::Alter
334 | TokenKind::Primary
335 | TokenKind::Key
336 | TokenKind::Foreign
337 | TokenKind::References
338 | TokenKind::Unique
339 | TokenKind::Default
340 | TokenKind::If
341 | TokenKind::Exists
342 | TokenKind::Case
343 | TokenKind::When
344 | TokenKind::Then
345 | TokenKind::Else
346 | TokenKind::End
347 | TokenKind::Cast
348 | TokenKind::Union
349 | TokenKind::Intersect
350 | TokenKind::Except
351 | TokenKind::Count
352 | TokenKind::Sum
353 | TokenKind::Avg
354 | TokenKind::Min
355 | TokenKind::Max
356 | TokenKind::Is
357 | TokenKind::In
358 | TokenKind::Like
359 | TokenKind::Between
360 | TokenKind::Null
361 | TokenKind::True
362 | TokenKind::False
363 | TokenKind::Int
364 | TokenKind::IntegerKw
365 | TokenKind::Bigint
366 | TokenKind::Smallint
367 | TokenKind::FloatKw
368 | TokenKind::Double
369 | TokenKind::Real
370 | TokenKind::Varchar
371 | TokenKind::Char
372 | TokenKind::Text
373 | TokenKind::BlobKw
374 | TokenKind::Boolean
375 | TokenKind::Bool
376 | TokenKind::Date
377 | TokenKind::Time
378 | TokenKind::Timestamp
379 | TokenKind::Datetime
380 | TokenKind::Vector
381 | TokenKind::VectorSearch
382 | TokenKind::Embedding
383 | TokenKind::Cosine
384 | TokenKind::Euclidean
385 | TokenKind::DotProduct
386 | TokenKind::ContextWindow
387 | TokenKind::Using
388 | TokenKind::Transaction
389 | TokenKind::Savepoint
390 | TokenKind::Release
391 | TokenKind::Escape
392 | TokenKind::Nulls
393 | TokenKind::First
394 | TokenKind::Last
395 | TokenKind::AutoIncrement
396 | TokenKind::Add
397 | TokenKind::Column
398 | TokenKind::Rename
399 | TokenKind::Collate
400 | TokenKind::Tinyint
401 | TokenKind::Decimal
402 | TokenKind::Numeric
403 | TokenKind::JsonExtract
404 | TokenKind::JsonSet
405 | TokenKind::Ignore
407 | TokenKind::Replace
408 | TokenKind::Conflict
409 | TokenKind::Do
410 | TokenKind::Nothing
411 | TokenKind::Duplicate
412 | TokenKind::Abort
413 | TokenKind::Fail
414 | TokenKind::Returning
415 )
416 }
417
418 pub fn from_keyword(s: &str) -> Option<TokenKind> {
420 match s.to_uppercase().as_str() {
421 "SELECT" => Some(TokenKind::Select),
422 "INSERT" => Some(TokenKind::Insert),
423 "UPDATE" => Some(TokenKind::Update),
424 "DELETE" => Some(TokenKind::Delete),
425 "CREATE" => Some(TokenKind::Create),
426 "TABLE" => Some(TokenKind::Table),
427 "DROP" => Some(TokenKind::Drop),
428 "ALTER" => Some(TokenKind::Alter),
429 "ADD" => Some(TokenKind::Add),
430 "COLUMN" => Some(TokenKind::Column),
431 "RENAME" => Some(TokenKind::Rename),
432 "INDEX" => Some(TokenKind::Index),
433 "FROM" => Some(TokenKind::From),
434 "WHERE" => Some(TokenKind::Where),
435 "AND" => Some(TokenKind::And),
436 "OR" => Some(TokenKind::Or),
437 "NOT" => Some(TokenKind::Not),
438 "NULL" => Some(TokenKind::Null),
439 "TRUE" => Some(TokenKind::True),
440 "FALSE" => Some(TokenKind::False),
441 "IS" => Some(TokenKind::Is),
442 "IN" => Some(TokenKind::In),
443 "LIKE" => Some(TokenKind::Like),
444 "ESCAPE" => Some(TokenKind::Escape),
445 "BETWEEN" => Some(TokenKind::Between),
446 "JOIN" => Some(TokenKind::Join),
447 "INNER" => Some(TokenKind::Inner),
448 "LEFT" => Some(TokenKind::Left),
449 "RIGHT" => Some(TokenKind::Right),
450 "OUTER" => Some(TokenKind::Outer),
451 "CROSS" => Some(TokenKind::Cross),
452 "ON" => Some(TokenKind::On),
453 "USING" => Some(TokenKind::Using),
454 "AS" => Some(TokenKind::As),
455 "DISTINCT" => Some(TokenKind::Distinct),
456 "ALL" => Some(TokenKind::All),
457 "GROUP" => Some(TokenKind::Group),
458 "HAVING" => Some(TokenKind::Having),
459 "ORDER" => Some(TokenKind::Order),
460 "BY" => Some(TokenKind::By),
461 "ASC" => Some(TokenKind::Asc),
462 "DESC" => Some(TokenKind::Desc),
463 "NULLS" => Some(TokenKind::Nulls),
464 "FIRST" => Some(TokenKind::First),
465 "LAST" => Some(TokenKind::Last),
466 "LIMIT" => Some(TokenKind::Limit),
467 "OFFSET" => Some(TokenKind::Offset),
468 "VALUES" => Some(TokenKind::Values),
469 "INTO" => Some(TokenKind::Into),
470 "SET" => Some(TokenKind::Set),
471 "BEGIN" => Some(TokenKind::Begin),
472 "COMMIT" => Some(TokenKind::Commit),
473 "ROLLBACK" => Some(TokenKind::Rollback),
474 "TRANSACTION" => Some(TokenKind::Transaction),
475 "SAVEPOINT" => Some(TokenKind::Savepoint),
476 "RELEASE" => Some(TokenKind::Release),
477 "PRIMARY" => Some(TokenKind::Primary),
478 "KEY" => Some(TokenKind::Key),
479 "FOREIGN" => Some(TokenKind::Foreign),
480 "REFERENCES" => Some(TokenKind::References),
481 "UNIQUE" => Some(TokenKind::Unique),
482 "DEFAULT" => Some(TokenKind::Default),
483 "AUTOINCREMENT" | "AUTO_INCREMENT" => Some(TokenKind::AutoIncrement),
484 "IF" => Some(TokenKind::If),
485 "EXISTS" => Some(TokenKind::Exists),
486 "CASE" => Some(TokenKind::Case),
487 "WHEN" => Some(TokenKind::When),
488 "THEN" => Some(TokenKind::Then),
489 "ELSE" => Some(TokenKind::Else),
490 "END" => Some(TokenKind::End),
491 "CAST" => Some(TokenKind::Cast),
492 "COLLATE" => Some(TokenKind::Collate),
493 "UNION" => Some(TokenKind::Union),
494 "INTERSECT" => Some(TokenKind::Intersect),
495 "EXCEPT" => Some(TokenKind::Except),
496 "COUNT" => Some(TokenKind::Count),
497 "SUM" => Some(TokenKind::Sum),
498 "AVG" => Some(TokenKind::Avg),
499 "MIN" => Some(TokenKind::Min),
500 "MAX" => Some(TokenKind::Max),
501 "IGNORE" => Some(TokenKind::Ignore),
503 "REPLACE" => Some(TokenKind::Replace),
504 "CONFLICT" => Some(TokenKind::Conflict),
505 "DO" => Some(TokenKind::Do),
506 "NOTHING" => Some(TokenKind::Nothing),
507 "DUPLICATE" => Some(TokenKind::Duplicate),
508 "ABORT" => Some(TokenKind::Abort),
509 "FAIL" => Some(TokenKind::Fail),
510 "RETURNING" => Some(TokenKind::Returning),
511 "INT" => Some(TokenKind::Int),
513 "INTEGER" => Some(TokenKind::IntegerKw),
514 "BIGINT" => Some(TokenKind::Bigint),
515 "SMALLINT" => Some(TokenKind::Smallint),
516 "TINYINT" => Some(TokenKind::Tinyint),
517 "FLOAT" => Some(TokenKind::FloatKw),
518 "DOUBLE" => Some(TokenKind::Double),
519 "REAL" => Some(TokenKind::Real),
520 "DECIMAL" => Some(TokenKind::Decimal),
521 "NUMERIC" => Some(TokenKind::Numeric),
522 "VARCHAR" => Some(TokenKind::Varchar),
523 "CHAR" => Some(TokenKind::Char),
524 "TEXT" => Some(TokenKind::Text),
525 "BLOB" => Some(TokenKind::BlobKw),
526 "BOOLEAN" => Some(TokenKind::Boolean),
527 "BOOL" => Some(TokenKind::Bool),
528 "DATE" => Some(TokenKind::Date),
529 "TIME" => Some(TokenKind::Time),
530 "TIMESTAMP" => Some(TokenKind::Timestamp),
531 "DATETIME" => Some(TokenKind::Datetime),
532 "VECTOR" => Some(TokenKind::Vector),
534 "VECTOR_SEARCH" => Some(TokenKind::VectorSearch),
535 "JSON_EXTRACT" => Some(TokenKind::JsonExtract),
536 "JSON_SET" => Some(TokenKind::JsonSet),
537 "CONTEXT_WINDOW" => Some(TokenKind::ContextWindow),
538 "EMBEDDING" => Some(TokenKind::Embedding),
539 "COSINE" => Some(TokenKind::Cosine),
540 "EUCLIDEAN" => Some(TokenKind::Euclidean),
541 "DOT_PRODUCT" => Some(TokenKind::DotProduct),
542 _ => None,
543 }
544 }
545}
546
547impl fmt::Display for TokenKind {
548 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
549 match self {
550 TokenKind::Integer(n) => write!(f, "{}", n),
551 TokenKind::Float(n) => write!(f, "{}", n),
552 TokenKind::String(s) => write!(f, "'{}'", s),
553 TokenKind::Identifier(s) => write!(f, "{}", s),
554 TokenKind::QuotedIdentifier(s) => write!(f, "\"{}\"", s),
555 TokenKind::Select => write!(f, "SELECT"),
556 TokenKind::From => write!(f, "FROM"),
557 TokenKind::Where => write!(f, "WHERE"),
558 TokenKind::Plus => write!(f, "+"),
559 TokenKind::Minus => write!(f, "-"),
560 TokenKind::Star => write!(f, "*"),
561 TokenKind::Slash => write!(f, "/"),
562 TokenKind::Eq => write!(f, "="),
563 TokenKind::Ne => write!(f, "!="),
564 TokenKind::Lt => write!(f, "<"),
565 TokenKind::Le => write!(f, "<="),
566 TokenKind::Gt => write!(f, ">"),
567 TokenKind::Ge => write!(f, ">="),
568 TokenKind::LParen => write!(f, "("),
569 TokenKind::RParen => write!(f, ")"),
570 TokenKind::LBracket => write!(f, "["),
571 TokenKind::RBracket => write!(f, "]"),
572 TokenKind::Comma => write!(f, ","),
573 TokenKind::Semicolon => write!(f, ";"),
574 TokenKind::Dot => write!(f, "."),
575 TokenKind::Eof => write!(f, "EOF"),
576 TokenKind::Null => write!(f, "NULL"),
577 TokenKind::True => write!(f, "TRUE"),
578 TokenKind::False => write!(f, "FALSE"),
579 TokenKind::And => write!(f, "AND"),
580 TokenKind::Or => write!(f, "OR"),
581 TokenKind::Not => write!(f, "NOT"),
582 _ => write!(f, "{:?}", self),
583 }
584 }
585}