1use std::fmt;
23use std::hash::Hash;
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub struct Span {
28 pub start: usize,
29 pub end: usize,
30 pub line: usize,
31 pub column: usize,
32}
33
34impl Span {
35 pub fn new(start: usize, end: usize, line: usize, column: usize) -> Self {
36 Self {
37 start,
38 end,
39 line,
40 column,
41 }
42 }
43
44 pub fn merge(self, other: Span) -> Span {
45 Span {
46 start: self.start.min(other.start),
47 end: self.end.max(other.end),
48 line: self.line,
49 column: self.column,
50 }
51 }
52}
53
54impl Default for Span {
55 fn default() -> Self {
56 Self {
57 start: 0,
58 end: 0,
59 line: 1,
60 column: 1,
61 }
62 }
63}
64
65#[derive(Debug, Clone, PartialEq)]
67pub struct Token {
68 pub kind: TokenKind,
69 pub span: Span,
70 pub literal: String,
71}
72
73impl Token {
74 pub fn new(kind: TokenKind, span: Span, literal: impl Into<String>) -> Self {
75 Self {
76 kind,
77 span,
78 literal: literal.into(),
79 }
80 }
81}
82
83#[derive(Debug, Clone, PartialEq)]
85pub enum TokenKind {
86 Integer(i64),
88 Float(f64),
89 String(String),
90 Blob(Vec<u8>),
91 Null,
92 True,
93 False,
94
95 Identifier(String),
97 QuotedIdentifier(String), Create,
101 Table,
102 Index,
103 Drop,
104 Alter,
105 Add,
106 Column,
107 Rename,
108 Primary,
109 Key,
110 Foreign,
111 References,
112 Unique,
113 Default,
114 AutoIncrement,
115 If,
116 Exists,
117
118 Ignore, Replace, Conflict, Do, Nothing, Duplicate, Abort, Fail, Returning, Select,
131 Insert,
132 Update,
133 Delete,
134 Into,
135 Values,
136 Set,
137 From,
138 Where,
139 Join,
140 Inner,
141 Left,
142 Right,
143 Outer,
144 Cross,
145 On,
146 Using,
147
148 As,
150 Distinct,
151 All,
152 Group,
153 Having,
154 Order,
155 By,
156 Asc,
157 Desc,
158 Nulls,
159 First,
160 Last,
161 Limit,
162 Offset,
163 Union,
164 Intersect,
165 Except,
166
167 And,
169 Or,
170 Not,
171 Is,
172 In,
173 Like,
174 Escape,
175 Between,
176 Case,
177 When,
178 Then,
179 Else,
180 End,
181 Cast,
182 Collate,
183
184 Begin,
186 Commit,
187 Rollback,
188 Transaction,
189 Savepoint,
190 Release,
191
192 Int,
194 IntegerKw,
195 Bigint,
196 Smallint,
197 Tinyint,
198 FloatKw,
199 Double,
200 Real,
201 Decimal,
202 Numeric,
203 Varchar,
204 Char,
205 Text,
206 BlobKw,
207 Boolean,
208 Bool,
209 Date,
210 Time,
211 Timestamp,
212 Datetime,
213
214 Count,
216 Sum,
217 Avg,
218 Min,
219 Max,
220
221 Vector,
223 VectorSearch,
224 JsonExtract,
225 JsonSet,
226 ContextWindow,
227 Embedding,
228 Cosine,
229 Euclidean,
230 DotProduct,
231
232 Plus, Minus, Star, Slash, Percent, Eq, Ne, Lt, Le, Gt, Ge, Concat, BitAnd, BitOr, BitNot, LeftShift, RightShift, LParen, RParen, LBracket, RBracket, Comma, Semicolon, Dot, Colon, DoubleColon, Arrow, DoubleArrow, QuestionMark, At, Placeholder(u32), Comment(String),
269 Whitespace,
270 Eof,
271 Invalid(String),
272}
273
274impl Eq for TokenKind {}
275
276impl Hash for TokenKind {
277 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
278 std::mem::discriminant(self).hash(state);
279 match self {
280 TokenKind::Integer(n) => n.hash(state),
281 TokenKind::Float(f) => f.to_bits().hash(state),
282 TokenKind::String(s) => s.hash(state),
283 TokenKind::Blob(b) => b.hash(state),
284 TokenKind::Identifier(s) => s.hash(state),
285 TokenKind::QuotedIdentifier(s) => s.hash(state),
286 TokenKind::Placeholder(n) => n.hash(state),
287 TokenKind::Comment(s) => s.hash(state),
288 TokenKind::Invalid(s) => s.hash(state),
289 _ => {}
290 }
291 }
292}
293
294impl TokenKind {
295 pub fn is_keyword(&self) -> bool {
297 matches!(
298 self,
299 TokenKind::Select
300 | TokenKind::Insert
301 | TokenKind::Update
302 | TokenKind::Delete
303 | TokenKind::Create
304 | TokenKind::Drop
305 | TokenKind::From
306 | TokenKind::Where
307 | TokenKind::And
308 | TokenKind::Or
309 | TokenKind::Not
310 | TokenKind::Join
311 | TokenKind::Inner
312 | TokenKind::Left
313 | TokenKind::Right
314 | TokenKind::Outer
315 | TokenKind::Cross
316 | TokenKind::On
317 | TokenKind::As
318 | TokenKind::Distinct
319 | TokenKind::All
320 | TokenKind::Group
321 | TokenKind::Having
322 | TokenKind::Order
323 | TokenKind::By
324 | TokenKind::Asc
325 | TokenKind::Desc
326 | TokenKind::Limit
327 | TokenKind::Offset
328 | TokenKind::Values
329 | TokenKind::Into
330 | TokenKind::Set
331 | TokenKind::Begin
332 | TokenKind::Commit
333 | TokenKind::Rollback
334 | TokenKind::Table
335 | TokenKind::Index
336 | TokenKind::Alter
337 | TokenKind::Primary
338 | TokenKind::Key
339 | TokenKind::Foreign
340 | TokenKind::References
341 | TokenKind::Unique
342 | TokenKind::Default
343 | TokenKind::If
344 | TokenKind::Exists
345 | TokenKind::Case
346 | TokenKind::When
347 | TokenKind::Then
348 | TokenKind::Else
349 | TokenKind::End
350 | TokenKind::Cast
351 | TokenKind::Union
352 | TokenKind::Intersect
353 | TokenKind::Except
354 | TokenKind::Count
355 | TokenKind::Sum
356 | TokenKind::Avg
357 | TokenKind::Min
358 | TokenKind::Max
359 | TokenKind::Is
360 | TokenKind::In
361 | TokenKind::Like
362 | TokenKind::Between
363 | TokenKind::Null
364 | TokenKind::True
365 | TokenKind::False
366 | TokenKind::Int
367 | TokenKind::IntegerKw
368 | TokenKind::Bigint
369 | TokenKind::Smallint
370 | TokenKind::FloatKw
371 | TokenKind::Double
372 | TokenKind::Real
373 | TokenKind::Varchar
374 | TokenKind::Char
375 | TokenKind::Text
376 | TokenKind::BlobKw
377 | TokenKind::Boolean
378 | TokenKind::Bool
379 | TokenKind::Date
380 | TokenKind::Time
381 | TokenKind::Timestamp
382 | TokenKind::Datetime
383 | TokenKind::Vector
384 | TokenKind::VectorSearch
385 | TokenKind::Embedding
386 | TokenKind::Cosine
387 | TokenKind::Euclidean
388 | TokenKind::DotProduct
389 | TokenKind::ContextWindow
390 | TokenKind::Using
391 | TokenKind::Transaction
392 | TokenKind::Savepoint
393 | TokenKind::Release
394 | TokenKind::Escape
395 | TokenKind::Nulls
396 | TokenKind::First
397 | TokenKind::Last
398 | TokenKind::AutoIncrement
399 | TokenKind::Add
400 | TokenKind::Column
401 | TokenKind::Rename
402 | TokenKind::Collate
403 | TokenKind::Tinyint
404 | TokenKind::Decimal
405 | TokenKind::Numeric
406 | TokenKind::JsonExtract
407 | TokenKind::JsonSet
408 | TokenKind::Ignore
410 | TokenKind::Replace
411 | TokenKind::Conflict
412 | TokenKind::Do
413 | TokenKind::Nothing
414 | TokenKind::Duplicate
415 | TokenKind::Abort
416 | TokenKind::Fail
417 | TokenKind::Returning
418 )
419 }
420
421 pub fn from_keyword(s: &str) -> Option<TokenKind> {
423 match s.to_uppercase().as_str() {
424 "SELECT" => Some(TokenKind::Select),
425 "INSERT" => Some(TokenKind::Insert),
426 "UPDATE" => Some(TokenKind::Update),
427 "DELETE" => Some(TokenKind::Delete),
428 "CREATE" => Some(TokenKind::Create),
429 "TABLE" => Some(TokenKind::Table),
430 "DROP" => Some(TokenKind::Drop),
431 "ALTER" => Some(TokenKind::Alter),
432 "ADD" => Some(TokenKind::Add),
433 "COLUMN" => Some(TokenKind::Column),
434 "RENAME" => Some(TokenKind::Rename),
435 "INDEX" => Some(TokenKind::Index),
436 "FROM" => Some(TokenKind::From),
437 "WHERE" => Some(TokenKind::Where),
438 "AND" => Some(TokenKind::And),
439 "OR" => Some(TokenKind::Or),
440 "NOT" => Some(TokenKind::Not),
441 "NULL" => Some(TokenKind::Null),
442 "TRUE" => Some(TokenKind::True),
443 "FALSE" => Some(TokenKind::False),
444 "IS" => Some(TokenKind::Is),
445 "IN" => Some(TokenKind::In),
446 "LIKE" => Some(TokenKind::Like),
447 "ESCAPE" => Some(TokenKind::Escape),
448 "BETWEEN" => Some(TokenKind::Between),
449 "JOIN" => Some(TokenKind::Join),
450 "INNER" => Some(TokenKind::Inner),
451 "LEFT" => Some(TokenKind::Left),
452 "RIGHT" => Some(TokenKind::Right),
453 "OUTER" => Some(TokenKind::Outer),
454 "CROSS" => Some(TokenKind::Cross),
455 "ON" => Some(TokenKind::On),
456 "USING" => Some(TokenKind::Using),
457 "AS" => Some(TokenKind::As),
458 "DISTINCT" => Some(TokenKind::Distinct),
459 "ALL" => Some(TokenKind::All),
460 "GROUP" => Some(TokenKind::Group),
461 "HAVING" => Some(TokenKind::Having),
462 "ORDER" => Some(TokenKind::Order),
463 "BY" => Some(TokenKind::By),
464 "ASC" => Some(TokenKind::Asc),
465 "DESC" => Some(TokenKind::Desc),
466 "NULLS" => Some(TokenKind::Nulls),
467 "FIRST" => Some(TokenKind::First),
468 "LAST" => Some(TokenKind::Last),
469 "LIMIT" => Some(TokenKind::Limit),
470 "OFFSET" => Some(TokenKind::Offset),
471 "VALUES" => Some(TokenKind::Values),
472 "INTO" => Some(TokenKind::Into),
473 "SET" => Some(TokenKind::Set),
474 "BEGIN" => Some(TokenKind::Begin),
475 "COMMIT" => Some(TokenKind::Commit),
476 "ROLLBACK" => Some(TokenKind::Rollback),
477 "TRANSACTION" => Some(TokenKind::Transaction),
478 "SAVEPOINT" => Some(TokenKind::Savepoint),
479 "RELEASE" => Some(TokenKind::Release),
480 "PRIMARY" => Some(TokenKind::Primary),
481 "KEY" => Some(TokenKind::Key),
482 "FOREIGN" => Some(TokenKind::Foreign),
483 "REFERENCES" => Some(TokenKind::References),
484 "UNIQUE" => Some(TokenKind::Unique),
485 "DEFAULT" => Some(TokenKind::Default),
486 "AUTOINCREMENT" | "AUTO_INCREMENT" => Some(TokenKind::AutoIncrement),
487 "IF" => Some(TokenKind::If),
488 "EXISTS" => Some(TokenKind::Exists),
489 "CASE" => Some(TokenKind::Case),
490 "WHEN" => Some(TokenKind::When),
491 "THEN" => Some(TokenKind::Then),
492 "ELSE" => Some(TokenKind::Else),
493 "END" => Some(TokenKind::End),
494 "CAST" => Some(TokenKind::Cast),
495 "COLLATE" => Some(TokenKind::Collate),
496 "UNION" => Some(TokenKind::Union),
497 "INTERSECT" => Some(TokenKind::Intersect),
498 "EXCEPT" => Some(TokenKind::Except),
499 "COUNT" => Some(TokenKind::Count),
500 "SUM" => Some(TokenKind::Sum),
501 "AVG" => Some(TokenKind::Avg),
502 "MIN" => Some(TokenKind::Min),
503 "MAX" => Some(TokenKind::Max),
504 "IGNORE" => Some(TokenKind::Ignore),
506 "REPLACE" => Some(TokenKind::Replace),
507 "CONFLICT" => Some(TokenKind::Conflict),
508 "DO" => Some(TokenKind::Do),
509 "NOTHING" => Some(TokenKind::Nothing),
510 "DUPLICATE" => Some(TokenKind::Duplicate),
511 "ABORT" => Some(TokenKind::Abort),
512 "FAIL" => Some(TokenKind::Fail),
513 "RETURNING" => Some(TokenKind::Returning),
514 "INT" => Some(TokenKind::Int),
516 "INTEGER" => Some(TokenKind::IntegerKw),
517 "BIGINT" => Some(TokenKind::Bigint),
518 "SMALLINT" => Some(TokenKind::Smallint),
519 "TINYINT" => Some(TokenKind::Tinyint),
520 "FLOAT" => Some(TokenKind::FloatKw),
521 "DOUBLE" => Some(TokenKind::Double),
522 "REAL" => Some(TokenKind::Real),
523 "DECIMAL" => Some(TokenKind::Decimal),
524 "NUMERIC" => Some(TokenKind::Numeric),
525 "VARCHAR" => Some(TokenKind::Varchar),
526 "CHAR" => Some(TokenKind::Char),
527 "TEXT" => Some(TokenKind::Text),
528 "BLOB" => Some(TokenKind::BlobKw),
529 "BOOLEAN" => Some(TokenKind::Boolean),
530 "BOOL" => Some(TokenKind::Bool),
531 "DATE" => Some(TokenKind::Date),
532 "TIME" => Some(TokenKind::Time),
533 "TIMESTAMP" => Some(TokenKind::Timestamp),
534 "DATETIME" => Some(TokenKind::Datetime),
535 "VECTOR" => Some(TokenKind::Vector),
537 "VECTOR_SEARCH" => Some(TokenKind::VectorSearch),
538 "JSON_EXTRACT" => Some(TokenKind::JsonExtract),
539 "JSON_SET" => Some(TokenKind::JsonSet),
540 "CONTEXT_WINDOW" => Some(TokenKind::ContextWindow),
541 "EMBEDDING" => Some(TokenKind::Embedding),
542 "COSINE" => Some(TokenKind::Cosine),
543 "EUCLIDEAN" => Some(TokenKind::Euclidean),
544 "DOT_PRODUCT" => Some(TokenKind::DotProduct),
545 _ => None,
546 }
547 }
548}
549
550impl fmt::Display for TokenKind {
551 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
552 match self {
553 TokenKind::Integer(n) => write!(f, "{}", n),
554 TokenKind::Float(n) => write!(f, "{}", n),
555 TokenKind::String(s) => write!(f, "'{}'", s),
556 TokenKind::Identifier(s) => write!(f, "{}", s),
557 TokenKind::QuotedIdentifier(s) => write!(f, "\"{}\"", s),
558 TokenKind::Select => write!(f, "SELECT"),
559 TokenKind::From => write!(f, "FROM"),
560 TokenKind::Where => write!(f, "WHERE"),
561 TokenKind::Plus => write!(f, "+"),
562 TokenKind::Minus => write!(f, "-"),
563 TokenKind::Star => write!(f, "*"),
564 TokenKind::Slash => write!(f, "/"),
565 TokenKind::Eq => write!(f, "="),
566 TokenKind::Ne => write!(f, "!="),
567 TokenKind::Lt => write!(f, "<"),
568 TokenKind::Le => write!(f, "<="),
569 TokenKind::Gt => write!(f, ">"),
570 TokenKind::Ge => write!(f, ">="),
571 TokenKind::LParen => write!(f, "("),
572 TokenKind::RParen => write!(f, ")"),
573 TokenKind::LBracket => write!(f, "["),
574 TokenKind::RBracket => write!(f, "]"),
575 TokenKind::Comma => write!(f, ","),
576 TokenKind::Semicolon => write!(f, ";"),
577 TokenKind::Dot => write!(f, "."),
578 TokenKind::Eof => write!(f, "EOF"),
579 TokenKind::Null => write!(f, "NULL"),
580 TokenKind::True => write!(f, "TRUE"),
581 TokenKind::False => write!(f, "FALSE"),
582 TokenKind::And => write!(f, "AND"),
583 TokenKind::Or => write!(f, "OR"),
584 TokenKind::Not => write!(f, "NOT"),
585 _ => write!(f, "{:?}", self),
586 }
587 }
588}