1use std::borrow::Cow;
23use std::fmt;
24use std::hash::Hash;
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub struct Span {
29 pub start: usize,
30 pub end: usize,
31 pub line: usize,
32 pub column: usize,
33}
34
35impl Span {
36 pub fn new(start: usize, end: usize, line: usize, column: usize) -> Self {
37 Self {
38 start,
39 end,
40 line,
41 column,
42 }
43 }
44
45 pub fn merge(self, other: Span) -> Span {
46 Span {
47 start: self.start.min(other.start),
48 end: self.end.max(other.end),
49 line: self.line,
50 column: self.column,
51 }
52 }
53}
54
55impl Default for Span {
56 fn default() -> Self {
57 Self {
58 start: 0,
59 end: 0,
60 line: 1,
61 column: 1,
62 }
63 }
64}
65
66#[derive(Debug, Clone, PartialEq)]
68pub struct Token<'a> {
69 pub kind: TokenKind<'a>,
70 pub span: Span,
71 pub literal: &'a str,
72}
73
74impl<'a> Token<'a> {
75 pub fn new(kind: TokenKind<'a>, span: Span, literal: &'a str) -> Self {
76 Self {
77 kind,
78 span,
79 literal,
80 }
81 }
82}
83
84#[derive(Debug, Clone, PartialEq)]
86pub enum TokenKind<'a> {
87 Integer(i64),
89 Float(f64),
90 String(Cow<'a, str>),
91 Blob(Vec<u8>),
92 Null,
93 True,
94 False,
95
96 Identifier(&'a str),
98 QuotedIdentifier(Cow<'a, str>), Create,
102 Table,
103 Index,
104 Drop,
105 Alter,
106 Add,
107 Column,
108 Rename,
109 To,
110 Cascade,
111 Primary,
112 Key,
113 Foreign,
114 References,
115 Unique,
116 Default,
117 AutoIncrement,
118 If,
119 Exists,
120
121 Ignore, Replace, Conflict, Do, Nothing, Duplicate, Abort, Fail, Returning, Select,
134 Insert,
135 Update,
136 Delete,
137 Into,
138 Values,
139 Set,
140 From,
141 Where,
142 Join,
143 Inner,
144 Left,
145 Right,
146 Outer,
147 Cross,
148 On,
149 Using,
150
151 As,
153 Distinct,
154 All,
155 Group,
156 Having,
157 Order,
158 By,
159 Asc,
160 Desc,
161 Nulls,
162 First,
163 Last,
164 Limit,
165 Offset,
166 Union,
167 Intersect,
168 Except,
169
170 And,
172 Or,
173 Not,
174 Is,
175 In,
176 Like,
177 Escape,
178 Between,
179 Case,
180 When,
181 Then,
182 Else,
183 End,
184 Cast,
185 Collate,
186
187 Begin,
189 Commit,
190 Rollback,
191 Transaction,
192 Savepoint,
193 Release,
194
195 Int,
197 IntegerKw,
198 Bigint,
199 Smallint,
200 Tinyint,
201 FloatKw,
202 Double,
203 Real,
204 Decimal,
205 Numeric,
206 Varchar,
207 Char,
208 Text,
209 BlobKw,
210 Boolean,
211 Bool,
212 Date,
213 Time,
214 Timestamp,
215 Datetime,
216
217 Count,
219 Sum,
220 Avg,
221 Min,
222 Max,
223
224 Vector,
226 VectorSearch,
227 JsonExtract,
228 JsonSet,
229 ContextWindow,
230 Embedding,
231 Cosine,
232 Euclidean,
233 DotProduct,
234
235 Relate,
237 Live,
238 Content,
239 Event,
240 Diff,
241
242 Define,
244 Scope,
245 Remove,
246 Session,
247 Signin,
248 Signup,
249 Permissions,
250 For,
251
252 Plus, Minus, Star, Slash, Percent, Eq, Ne, Lt, Le, Gt, Ge, Concat, BitAnd, BitOr, BitNot, LeftShift, RightShift, LParen, RParen, LBracket, RBracket, Comma, Semicolon, Dot, Colon, DoubleColon, Arrow, DoubleArrow, LeftArrow, BiArrow, QuestionMark, At, Placeholder(u32), Comment(&'a str),
291 Whitespace,
292 Eof,
293 Invalid(&'a str),
294}
295
296impl Eq for TokenKind<'_> {}
297
298impl Hash for TokenKind<'_> {
299 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
300 std::mem::discriminant(self).hash(state);
301 match self {
302 TokenKind::Integer(n) => n.hash(state),
303 TokenKind::Float(f) => f.to_bits().hash(state),
304 TokenKind::String(s) => s.hash(state),
305 TokenKind::Blob(b) => b.hash(state),
306 TokenKind::Identifier(s) => s.hash(state),
307 TokenKind::QuotedIdentifier(s) => s.hash(state),
308 TokenKind::Placeholder(n) => n.hash(state),
309 TokenKind::Comment(s) => s.hash(state),
310 TokenKind::Invalid(s) => s.hash(state),
311 _ => {}
312 }
313 }
314}
315
316impl<'a> TokenKind<'a> {
317 pub fn is_keyword(&self) -> bool {
319 matches!(
320 self,
321 TokenKind::Select
322 | TokenKind::Insert
323 | TokenKind::Update
324 | TokenKind::Delete
325 | TokenKind::Create
326 | TokenKind::Drop
327 | TokenKind::From
328 | TokenKind::Where
329 | TokenKind::And
330 | TokenKind::Or
331 | TokenKind::Not
332 | TokenKind::Join
333 | TokenKind::Inner
334 | TokenKind::Left
335 | TokenKind::Right
336 | TokenKind::Outer
337 | TokenKind::Cross
338 | TokenKind::On
339 | TokenKind::As
340 | TokenKind::Distinct
341 | TokenKind::All
342 | TokenKind::Group
343 | TokenKind::Having
344 | TokenKind::Order
345 | TokenKind::By
346 | TokenKind::Asc
347 | TokenKind::Desc
348 | TokenKind::Limit
349 | TokenKind::Offset
350 | TokenKind::Values
351 | TokenKind::Into
352 | TokenKind::Set
353 | TokenKind::Begin
354 | TokenKind::Commit
355 | TokenKind::Rollback
356 | TokenKind::Table
357 | TokenKind::Index
358 | TokenKind::Alter
359 | TokenKind::To
360 | TokenKind::Cascade
361 | TokenKind::Primary
362 | TokenKind::Key
363 | TokenKind::Foreign
364 | TokenKind::References
365 | TokenKind::Unique
366 | TokenKind::Default
367 | TokenKind::If
368 | TokenKind::Exists
369 | TokenKind::Case
370 | TokenKind::When
371 | TokenKind::Then
372 | TokenKind::Else
373 | TokenKind::End
374 | TokenKind::Cast
375 | TokenKind::Union
376 | TokenKind::Intersect
377 | TokenKind::Except
378 | TokenKind::Count
379 | TokenKind::Sum
380 | TokenKind::Avg
381 | TokenKind::Min
382 | TokenKind::Max
383 | TokenKind::Is
384 | TokenKind::In
385 | TokenKind::Like
386 | TokenKind::Between
387 | TokenKind::Null
388 | TokenKind::True
389 | TokenKind::False
390 | TokenKind::Int
391 | TokenKind::IntegerKw
392 | TokenKind::Bigint
393 | TokenKind::Smallint
394 | TokenKind::FloatKw
395 | TokenKind::Double
396 | TokenKind::Real
397 | TokenKind::Varchar
398 | TokenKind::Char
399 | TokenKind::Text
400 | TokenKind::BlobKw
401 | TokenKind::Boolean
402 | TokenKind::Bool
403 | TokenKind::Date
404 | TokenKind::Time
405 | TokenKind::Timestamp
406 | TokenKind::Datetime
407 | TokenKind::Vector
408 | TokenKind::VectorSearch
409 | TokenKind::Embedding
410 | TokenKind::Cosine
411 | TokenKind::Euclidean
412 | TokenKind::DotProduct
413 | TokenKind::ContextWindow
414 | TokenKind::Using
415 | TokenKind::Transaction
416 | TokenKind::Savepoint
417 | TokenKind::Release
418 | TokenKind::Escape
419 | TokenKind::Nulls
420 | TokenKind::First
421 | TokenKind::Last
422 | TokenKind::AutoIncrement
423 | TokenKind::Add
424 | TokenKind::Column
425 | TokenKind::Rename
426 | TokenKind::Collate
427 | TokenKind::Tinyint
428 | TokenKind::Decimal
429 | TokenKind::Numeric
430 | TokenKind::JsonExtract
431 | TokenKind::JsonSet
432 | TokenKind::Ignore
434 | TokenKind::Replace
435 | TokenKind::Conflict
436 | TokenKind::Do
437 | TokenKind::Nothing
438 | TokenKind::Duplicate
439 | TokenKind::Abort
440 | TokenKind::Fail
441 | TokenKind::Returning
442 | TokenKind::Relate
444 | TokenKind::Live
445 | TokenKind::Content
446 | TokenKind::Event
447 | TokenKind::Diff
448 )
449 }
450
451 pub fn from_keyword(s: &str) -> Option<TokenKind<'a>> {
454 let len = s.len();
455 if len == 0 || len > 20 {
456 return None;
457 }
458 let mut buf = [0u8; 20];
459 for (i, &b) in s.as_bytes().iter().enumerate() {
460 buf[i] = b.to_ascii_uppercase();
461 }
462 let upper = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
465 match upper {
466 "SELECT" => Some(TokenKind::Select),
467 "INSERT" => Some(TokenKind::Insert),
468 "UPDATE" => Some(TokenKind::Update),
469 "DELETE" => Some(TokenKind::Delete),
470 "CREATE" => Some(TokenKind::Create),
471 "TABLE" => Some(TokenKind::Table),
472 "DROP" => Some(TokenKind::Drop),
473 "ALTER" => Some(TokenKind::Alter),
474 "ADD" => Some(TokenKind::Add),
475 "COLUMN" => Some(TokenKind::Column),
476 "RENAME" => Some(TokenKind::Rename),
477 "TO" => Some(TokenKind::To),
478 "CASCADE" => Some(TokenKind::Cascade),
479 "INDEX" => Some(TokenKind::Index),
480 "FROM" => Some(TokenKind::From),
481 "WHERE" => Some(TokenKind::Where),
482 "AND" => Some(TokenKind::And),
483 "OR" => Some(TokenKind::Or),
484 "NOT" => Some(TokenKind::Not),
485 "NULL" => Some(TokenKind::Null),
486 "TRUE" => Some(TokenKind::True),
487 "FALSE" => Some(TokenKind::False),
488 "IS" => Some(TokenKind::Is),
489 "IN" => Some(TokenKind::In),
490 "LIKE" => Some(TokenKind::Like),
491 "ESCAPE" => Some(TokenKind::Escape),
492 "BETWEEN" => Some(TokenKind::Between),
493 "JOIN" => Some(TokenKind::Join),
494 "INNER" => Some(TokenKind::Inner),
495 "LEFT" => Some(TokenKind::Left),
496 "RIGHT" => Some(TokenKind::Right),
497 "OUTER" => Some(TokenKind::Outer),
498 "CROSS" => Some(TokenKind::Cross),
499 "ON" => Some(TokenKind::On),
500 "USING" => Some(TokenKind::Using),
501 "AS" => Some(TokenKind::As),
502 "DISTINCT" => Some(TokenKind::Distinct),
503 "ALL" => Some(TokenKind::All),
504 "GROUP" => Some(TokenKind::Group),
505 "HAVING" => Some(TokenKind::Having),
506 "ORDER" => Some(TokenKind::Order),
507 "BY" => Some(TokenKind::By),
508 "ASC" => Some(TokenKind::Asc),
509 "DESC" => Some(TokenKind::Desc),
510 "NULLS" => Some(TokenKind::Nulls),
511 "FIRST" => Some(TokenKind::First),
512 "LAST" => Some(TokenKind::Last),
513 "LIMIT" => Some(TokenKind::Limit),
514 "OFFSET" => Some(TokenKind::Offset),
515 "VALUES" => Some(TokenKind::Values),
516 "INTO" => Some(TokenKind::Into),
517 "SET" => Some(TokenKind::Set),
518 "BEGIN" => Some(TokenKind::Begin),
519 "COMMIT" => Some(TokenKind::Commit),
520 "ROLLBACK" => Some(TokenKind::Rollback),
521 "TRANSACTION" => Some(TokenKind::Transaction),
522 "SAVEPOINT" => Some(TokenKind::Savepoint),
523 "RELEASE" => Some(TokenKind::Release),
524 "PRIMARY" => Some(TokenKind::Primary),
525 "KEY" => Some(TokenKind::Key),
526 "FOREIGN" => Some(TokenKind::Foreign),
527 "REFERENCES" => Some(TokenKind::References),
528 "UNIQUE" => Some(TokenKind::Unique),
529 "DEFAULT" => Some(TokenKind::Default),
530 "AUTOINCREMENT" | "AUTO_INCREMENT" => Some(TokenKind::AutoIncrement),
531 "IF" => Some(TokenKind::If),
532 "EXISTS" => Some(TokenKind::Exists),
533 "CASE" => Some(TokenKind::Case),
534 "WHEN" => Some(TokenKind::When),
535 "THEN" => Some(TokenKind::Then),
536 "ELSE" => Some(TokenKind::Else),
537 "END" => Some(TokenKind::End),
538 "CAST" => Some(TokenKind::Cast),
539 "COLLATE" => Some(TokenKind::Collate),
540 "UNION" => Some(TokenKind::Union),
541 "INTERSECT" => Some(TokenKind::Intersect),
542 "EXCEPT" => Some(TokenKind::Except),
543 "COUNT" => Some(TokenKind::Count),
544 "SUM" => Some(TokenKind::Sum),
545 "AVG" => Some(TokenKind::Avg),
546 "MIN" => Some(TokenKind::Min),
547 "MAX" => Some(TokenKind::Max),
548 "IGNORE" => Some(TokenKind::Ignore),
550 "REPLACE" => Some(TokenKind::Replace),
551 "CONFLICT" => Some(TokenKind::Conflict),
552 "DO" => Some(TokenKind::Do),
553 "NOTHING" => Some(TokenKind::Nothing),
554 "DUPLICATE" => Some(TokenKind::Duplicate),
555 "ABORT" => Some(TokenKind::Abort),
556 "FAIL" => Some(TokenKind::Fail),
557 "RETURNING" => Some(TokenKind::Returning),
558 "INT" => Some(TokenKind::Int),
560 "INTEGER" => Some(TokenKind::IntegerKw),
561 "BIGINT" => Some(TokenKind::Bigint),
562 "SMALLINT" => Some(TokenKind::Smallint),
563 "TINYINT" => Some(TokenKind::Tinyint),
564 "FLOAT" => Some(TokenKind::FloatKw),
565 "DOUBLE" => Some(TokenKind::Double),
566 "REAL" => Some(TokenKind::Real),
567 "DECIMAL" => Some(TokenKind::Decimal),
568 "NUMERIC" => Some(TokenKind::Numeric),
569 "VARCHAR" => Some(TokenKind::Varchar),
570 "CHAR" => Some(TokenKind::Char),
571 "TEXT" => Some(TokenKind::Text),
572 "BLOB" => Some(TokenKind::BlobKw),
573 "BOOLEAN" => Some(TokenKind::Boolean),
574 "BOOL" => Some(TokenKind::Bool),
575 "DATE" => Some(TokenKind::Date),
576 "TIME" => Some(TokenKind::Time),
577 "TIMESTAMP" => Some(TokenKind::Timestamp),
578 "DATETIME" => Some(TokenKind::Datetime),
579 "VECTOR" => Some(TokenKind::Vector),
581 "VECTOR_SEARCH" => Some(TokenKind::VectorSearch),
582 "JSON_EXTRACT" => Some(TokenKind::JsonExtract),
583 "JSON_SET" => Some(TokenKind::JsonSet),
584 "CONTEXT_WINDOW" => Some(TokenKind::ContextWindow),
585 "EMBEDDING" => Some(TokenKind::Embedding),
586 "COSINE" => Some(TokenKind::Cosine),
587 "EUCLIDEAN" => Some(TokenKind::Euclidean),
588 "DOT_PRODUCT" => Some(TokenKind::DotProduct),
589 "RELATE" => Some(TokenKind::Relate),
591 "LIVE" => Some(TokenKind::Live),
592 "CONTENT" => Some(TokenKind::Content),
593 "EVENT" => Some(TokenKind::Event),
594 "DIFF" => Some(TokenKind::Diff),
595 "DEFINE" => Some(TokenKind::Define),
597 "SCOPE" => Some(TokenKind::Scope),
598 "REMOVE" => Some(TokenKind::Remove),
599 "SESSION" => Some(TokenKind::Session),
600 "SIGNIN" => Some(TokenKind::Signin),
601 "SIGNUP" => Some(TokenKind::Signup),
602 "PERMISSIONS" => Some(TokenKind::Permissions),
603 "FOR" => Some(TokenKind::For),
604 _ => None,
605 }
606 }
607}
608
609impl fmt::Display for TokenKind<'_> {
610 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
611 match self {
612 TokenKind::Integer(n) => write!(f, "{}", n),
613 TokenKind::Float(n) => write!(f, "{}", n),
614 TokenKind::String(s) => write!(f, "'{}'", s),
615 TokenKind::Identifier(s) => write!(f, "{}", s),
616 TokenKind::QuotedIdentifier(s) => write!(f, "\"{}\"", s),
617 TokenKind::Select => write!(f, "SELECT"),
618 TokenKind::From => write!(f, "FROM"),
619 TokenKind::Where => write!(f, "WHERE"),
620 TokenKind::Plus => write!(f, "+"),
621 TokenKind::Minus => write!(f, "-"),
622 TokenKind::Star => write!(f, "*"),
623 TokenKind::Slash => write!(f, "/"),
624 TokenKind::Eq => write!(f, "="),
625 TokenKind::Ne => write!(f, "!="),
626 TokenKind::Lt => write!(f, "<"),
627 TokenKind::Le => write!(f, "<="),
628 TokenKind::Gt => write!(f, ">"),
629 TokenKind::Ge => write!(f, ">="),
630 TokenKind::LParen => write!(f, "("),
631 TokenKind::RParen => write!(f, ")"),
632 TokenKind::LBracket => write!(f, "["),
633 TokenKind::RBracket => write!(f, "]"),
634 TokenKind::Comma => write!(f, ","),
635 TokenKind::Semicolon => write!(f, ";"),
636 TokenKind::Dot => write!(f, "."),
637 TokenKind::Eof => write!(f, "EOF"),
638 TokenKind::Null => write!(f, "NULL"),
639 TokenKind::True => write!(f, "TRUE"),
640 TokenKind::False => write!(f, "FALSE"),
641 TokenKind::And => write!(f, "AND"),
642 TokenKind::Or => write!(f, "OR"),
643 TokenKind::Not => write!(f, "NOT"),
644 _ => write!(f, "{:?}", self),
645 }
646 }
647}