1use alloc::string::{String, ToString};
12use alloc::vec::Vec;
13use core::fmt;
14
15#[derive(Debug, Clone, PartialEq)]
16pub enum Token {
17 Select,
19 From,
20 Where,
21 As,
22 Null,
23 True,
24 False,
25 And,
26 Or,
27 Not,
28 Create,
29 Table,
30 Insert,
31 Into,
32 Values,
33 Index,
34 On,
35 Begin,
36 Commit,
37 Rollback,
38 Order,
39 By,
40 Limit,
41
42 Ident(String), QuotedIdent(String), Integer(i64),
48 Float(f64),
49 String(String),
50
51 Plus,
53 Minus,
54 Star,
55 Slash,
56 Eq,
57 NotEq,
58 Lt,
59 LtEq,
60 Gt,
61 GtEq,
62
63 LParen,
65 RParen,
66 LBracket,
67 RBracket,
68 Comma,
69 Semicolon,
70 Dot,
71 JsonGet,
75 JsonGetText,
77 JsonGetPath,
80 JsonGetPathText,
82 JsonContains,
86 L2Distance,
87 InnerProduct,
90 CosineDistance,
92 DoubleColon,
95 Concat,
97 Is,
99 Between,
100 In,
101 Like,
102 Group,
103 Distinct,
104 Union,
105 All,
106 Join,
107 Inner,
108 Left,
109 Cross,
110 Outer,
111 Default,
112 Savepoint,
113 Release,
114 To,
115 Having,
116 Show,
117 Extract,
118 Offset,
119 Asc,
120 Desc,
121 Interval,
124 Placeholder(u16),
128
129 Drop,
133 For,
135 Tables,
140 Except,
143 Publication,
145 Subscription,
147 Connection,
150
151 Eof,
152}
153
154#[derive(Debug, Clone, PartialEq, Eq)]
155pub enum LexErrorKind {
156 UnknownChar(char),
157 UnterminatedString,
158 UnterminatedQuotedIdent,
159 UnterminatedBlockComment,
160 BadNumber(String),
161}
162
163#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct LexError {
165 pub kind: LexErrorKind,
166 pub pos: usize,
167}
168
169impl fmt::Display for LexError {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 match &self.kind {
172 LexErrorKind::UnknownChar(c) => write!(f, "unknown char {c:?} at byte {}", self.pos),
173 LexErrorKind::UnterminatedString => {
174 write!(f, "unterminated string literal at byte {}", self.pos)
175 }
176 LexErrorKind::UnterminatedQuotedIdent => {
177 write!(f, "unterminated quoted identifier at byte {}", self.pos)
178 }
179 LexErrorKind::UnterminatedBlockComment => {
180 write!(f, "unterminated /* */ comment at byte {}", self.pos)
181 }
182 LexErrorKind::BadNumber(s) => {
183 write!(f, "invalid number literal {s:?} at byte {}", self.pos)
184 }
185 }
186 }
187}
188
189#[allow(clippy::too_many_lines)] pub fn tokenize(input: &str) -> Result<Vec<Token>, LexError> {
192 let bytes = input.as_bytes();
193 let mut i = 0usize;
194 let mut out = Vec::new();
195
196 while i < bytes.len() {
197 let b = bytes[i];
198 match b {
199 b' ' | b'\t' | b'\n' | b'\r' => {
200 i += 1;
201 }
202 b'-' if peek_eq(bytes, i + 1, b'-') => {
203 i += 2;
204 while i < bytes.len() && bytes[i] != b'\n' {
205 i += 1;
206 }
207 }
208 b'/' if peek_eq(bytes, i + 1, b'*') => {
209 let start = i;
210 i += 2;
211 let mut closed = false;
212 while i + 1 < bytes.len() {
213 if bytes[i] == b'*' && bytes[i + 1] == b'/' {
214 i += 2;
215 closed = true;
216 break;
217 }
218 i += 1;
219 }
220 if !closed {
221 return Err(LexError {
222 kind: LexErrorKind::UnterminatedBlockComment,
223 pos: start,
224 });
225 }
226 }
227 b'\'' => {
228 let (tok, consumed) = lex_quoted(input, i, b'\'', false)?;
229 out.push(tok);
230 i += consumed;
231 }
232 b'"' => {
233 let (tok, consumed) = lex_quoted(input, i, b'"', true)?;
234 out.push(tok);
235 i += consumed;
236 }
237 b'`' => {
241 let (tok, consumed) = lex_quoted(input, i, b'`', true)?;
242 out.push(tok);
243 i += consumed;
244 }
245 b if b.is_ascii_alphabetic() || b == b'_' => {
246 let start = i;
247 i += 1;
248 while i < bytes.len() {
249 let c = bytes[i];
250 if c.is_ascii_alphanumeric() || c == b'_' {
251 i += 1;
252 } else {
253 break;
254 }
255 }
256 let raw = &input[start..i];
257 out.push(keyword_or_ident_raw(raw));
261 }
262 b if b.is_ascii_digit() => {
263 let (tok, consumed) =
264 lex_number(&input[i..]).map_err(|kind| LexError { kind, pos: i })?;
265 out.push(tok);
266 i += consumed;
267 }
268 b'.' if peek_pred(bytes, i + 1, u8::is_ascii_digit) => {
269 let (tok, consumed) =
270 lex_number(&input[i..]).map_err(|kind| LexError { kind, pos: i })?;
271 out.push(tok);
272 i += consumed;
273 }
274 b'+' => single(&mut out, Token::Plus, &mut i),
275 b'-' => {
276 if peek_eq(bytes, i + 1, b'>') && peek_eq(bytes, i + 2, b'>') {
279 out.push(Token::JsonGetText);
280 i += 3;
281 } else if peek_eq(bytes, i + 1, b'>') {
282 out.push(Token::JsonGet);
283 i += 2;
284 } else {
285 single(&mut out, Token::Minus, &mut i);
286 }
287 }
288 b'#' => {
290 if peek_eq(bytes, i + 1, b'>') && peek_eq(bytes, i + 2, b'>') {
291 out.push(Token::JsonGetPathText);
292 i += 3;
293 } else if peek_eq(bytes, i + 1, b'>') {
294 out.push(Token::JsonGetPath);
295 i += 2;
296 } else {
297 return Err(LexError {
298 kind: LexErrorKind::UnknownChar('#'),
299 pos: i,
300 });
301 }
302 }
303 b'@' => {
305 if peek_eq(bytes, i + 1, b'>') {
306 out.push(Token::JsonContains);
307 i += 2;
308 } else {
309 return Err(LexError {
310 kind: LexErrorKind::UnknownChar('@'),
311 pos: i,
312 });
313 }
314 }
315 b'*' => single(&mut out, Token::Star, &mut i),
316 b'/' => single(&mut out, Token::Slash, &mut i),
317 b'(' => single(&mut out, Token::LParen, &mut i),
318 b')' => single(&mut out, Token::RParen, &mut i),
319 b'[' => single(&mut out, Token::LBracket, &mut i),
320 b']' => single(&mut out, Token::RBracket, &mut i),
321 b',' => single(&mut out, Token::Comma, &mut i),
322 b';' => single(&mut out, Token::Semicolon, &mut i),
323 b'.' => single(&mut out, Token::Dot, &mut i),
324 b'=' => single(&mut out, Token::Eq, &mut i),
325 b'<' => {
326 if peek_eq(bytes, i + 1, b'=') && peek_eq(bytes, i + 2, b'>') {
327 out.push(Token::CosineDistance);
328 i += 3;
329 } else if peek_eq(bytes, i + 1, b'#') && peek_eq(bytes, i + 2, b'>') {
330 out.push(Token::InnerProduct);
331 i += 3;
332 } else if peek_eq(bytes, i + 1, b'-') && peek_eq(bytes, i + 2, b'>') {
333 out.push(Token::L2Distance);
334 i += 3;
335 } else if peek_eq(bytes, i + 1, b'=') {
336 out.push(Token::LtEq);
337 i += 2;
338 } else if peek_eq(bytes, i + 1, b'>') {
339 out.push(Token::NotEq);
340 i += 2;
341 } else {
342 out.push(Token::Lt);
343 i += 1;
344 }
345 }
346 b':' if peek_eq(bytes, i + 1, b':') => {
347 out.push(Token::DoubleColon);
348 i += 2;
349 }
350 b'|' if peek_eq(bytes, i + 1, b'|') => {
351 out.push(Token::Concat);
352 i += 2;
353 }
354 b'>' => {
355 if peek_eq(bytes, i + 1, b'=') {
356 out.push(Token::GtEq);
357 i += 2;
358 } else {
359 out.push(Token::Gt);
360 i += 1;
361 }
362 }
363 b'!' if peek_eq(bytes, i + 1, b'=') => {
364 out.push(Token::NotEq);
365 i += 2;
366 }
367 b'$' if i + 1 < bytes.len() && bytes[i + 1] == b'$' => {
375 let end = find_dollar_tag_end(bytes, i + 2, b"$$");
377 let body = match end {
378 Some(e) => &input[i + 2..e],
379 None => {
380 return Err(LexError {
381 kind: LexErrorKind::UnterminatedString,
382 pos: i,
383 });
384 }
385 };
386 out.push(Token::String(body.to_string()));
387 i = end.unwrap() + 2;
388 }
389 b'$' if i + 1 < bytes.len()
390 && (bytes[i + 1].is_ascii_alphabetic() || bytes[i + 1] == b'_') =>
391 {
392 let mut j = i + 1;
395 while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
396 j += 1;
397 }
398 if j >= bytes.len() || bytes[j] != b'$' {
399 let ch = input[i..].chars().next().unwrap_or('?');
402 return Err(LexError {
403 kind: LexErrorKind::UnknownChar(ch),
404 pos: i,
405 });
406 }
407 let close: alloc::vec::Vec<u8> = bytes[i..=j].to_vec();
408 let end = find_dollar_tag_end(bytes, j + 1, &close);
409 let body = match end {
410 Some(e) => &input[j + 1..e],
411 None => {
412 return Err(LexError {
413 kind: LexErrorKind::UnterminatedString,
414 pos: i,
415 });
416 }
417 };
418 out.push(Token::String(body.to_string()));
419 i = end.unwrap() + close.len();
420 }
421 b'$' if i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() => {
425 let mut j = i + 1;
426 let mut n: u32 = 0;
427 while j < bytes.len() && bytes[j].is_ascii_digit() {
428 n = n
429 .saturating_mul(10)
430 .saturating_add(u32::from(bytes[j] - b'0'));
431 j += 1;
432 }
433 if n == 0 || n > u32::from(u16::MAX) {
434 return Err(LexError {
435 kind: LexErrorKind::BadNumber(input[i..j].to_string()),
436 pos: i,
437 });
438 }
439 #[allow(clippy::cast_possible_truncation)]
440 out.push(Token::Placeholder(n as u16));
441 i = j;
442 }
443 _ => {
444 let ch = input[i..].chars().next().unwrap_or('?');
445 return Err(LexError {
446 kind: LexErrorKind::UnknownChar(ch),
447 pos: i,
448 });
449 }
450 }
451 }
452 out.push(Token::Eof);
453 Ok(out)
454}
455
456fn peek_eq(bytes: &[u8], i: usize, target: u8) -> bool {
457 bytes.get(i) == Some(&target)
458}
459
460fn find_dollar_tag_end(bytes: &[u8], from: usize, tag: &[u8]) -> Option<usize> {
463 if tag.is_empty() || from > bytes.len() {
464 return None;
465 }
466 let mut i = from;
467 while i + tag.len() <= bytes.len() {
468 if &bytes[i..i + tag.len()] == tag {
469 return Some(i);
470 }
471 i += 1;
472 }
473 None
474}
475
476fn peek_pred<F: Fn(&u8) -> bool>(bytes: &[u8], i: usize, pred: F) -> bool {
477 bytes.get(i).is_some_and(pred)
478}
479
480fn single(out: &mut Vec<Token>, tok: Token, i: &mut usize) {
481 out.push(tok);
482 *i += 1;
483}
484
485fn keyword_or_ident_raw(raw: &str) -> Token {
495 let b = raw.as_bytes();
496 let tok = match b.len() {
497 2 => kw_len2(b),
498 3 => kw_len3(b),
499 4 => kw_len4(b),
500 5 => kw_len5(b),
501 6 => kw_len6(b),
502 7 => kw_len7(b),
503 8 => kw_len8(b),
504 9 => kw_len9(b),
505 10 => kw_len10(b),
506 11 => kw_len11(b),
507 12 => kw_len12(b),
508 _ => None,
509 };
510 match tok {
511 Some(t) => t,
512 None => Token::Ident(raw.to_ascii_lowercase()),
514 }
515}
516
517#[inline]
523fn eq_ci(input: &[u8], lower: &[u8]) -> bool {
524 if input.len() != lower.len() {
525 return false;
526 }
527 for i in 0..lower.len() {
528 if input[i].to_ascii_lowercase() != lower[i] {
529 return false;
530 }
531 }
532 true
533}
534
535#[inline]
536fn kw_len2(b: &[u8]) -> Option<Token> {
537 if eq_ci(b, b"as") {
539 return Some(Token::As);
540 }
541 if eq_ci(b, b"by") {
542 return Some(Token::By);
543 }
544 if eq_ci(b, b"in") {
545 return Some(Token::In);
546 }
547 if eq_ci(b, b"is") {
548 return Some(Token::Is);
549 }
550 if eq_ci(b, b"on") {
551 return Some(Token::On);
552 }
553 if eq_ci(b, b"or") {
554 return Some(Token::Or);
555 }
556 if eq_ci(b, b"to") {
557 return Some(Token::To);
558 }
559 None
560}
561
562#[inline]
563fn kw_len3(b: &[u8]) -> Option<Token> {
564 if eq_ci(b, b"for") {
566 return Some(Token::For);
567 }
568 if eq_ci(b, b"all") {
569 return Some(Token::All);
570 }
571 if eq_ci(b, b"and") {
572 return Some(Token::And);
573 }
574 if eq_ci(b, b"asc") {
575 return Some(Token::Asc);
576 }
577 if eq_ci(b, b"not") {
578 return Some(Token::Not);
579 }
580 None
581}
582
583#[inline]
584fn kw_len4(b: &[u8]) -> Option<Token> {
585 if eq_ci(b, b"from") {
587 return Some(Token::From);
588 }
589 if eq_ci(b, b"drop") {
590 return Some(Token::Drop);
591 }
592 if eq_ci(b, b"null") {
593 return Some(Token::Null);
594 }
595 if eq_ci(b, b"true") {
596 return Some(Token::True);
597 }
598 if eq_ci(b, b"into") {
599 return Some(Token::Into);
600 }
601 if eq_ci(b, b"like") {
602 return Some(Token::Like);
603 }
604 if eq_ci(b, b"join") {
605 return Some(Token::Join);
606 }
607 if eq_ci(b, b"left") {
608 return Some(Token::Left);
609 }
610 if eq_ci(b, b"show") {
611 return Some(Token::Show);
612 }
613 if eq_ci(b, b"desc") {
614 return Some(Token::Desc);
615 }
616 None
617}
618
619#[inline]
620fn kw_len5(b: &[u8]) -> Option<Token> {
621 if eq_ci(b, b"false") {
624 return Some(Token::False);
625 }
626 if eq_ci(b, b"where") {
627 return Some(Token::Where);
628 }
629 if eq_ci(b, b"table") {
630 return Some(Token::Table);
631 }
632 if eq_ci(b, b"index") {
633 return Some(Token::Index);
634 }
635 if eq_ci(b, b"begin") {
636 return Some(Token::Begin);
637 }
638 if eq_ci(b, b"order") {
639 return Some(Token::Order);
640 }
641 if eq_ci(b, b"limit") {
642 return Some(Token::Limit);
643 }
644 if eq_ci(b, b"group") {
645 return Some(Token::Group);
646 }
647 if eq_ci(b, b"union") {
648 return Some(Token::Union);
649 }
650 if eq_ci(b, b"inner") {
651 return Some(Token::Inner);
652 }
653 if eq_ci(b, b"cross") {
654 return Some(Token::Cross);
655 }
656 if eq_ci(b, b"outer") {
657 return Some(Token::Outer);
658 }
659 None
660}
661
662#[inline]
663fn kw_len6(b: &[u8]) -> Option<Token> {
664 if eq_ci(b, b"select") {
666 return Some(Token::Select);
667 }
668 if eq_ci(b, b"tables") {
669 return Some(Token::Tables);
670 }
671 if eq_ci(b, b"except") {
672 return Some(Token::Except);
673 }
674 if eq_ci(b, b"create") {
675 return Some(Token::Create);
676 }
677 if eq_ci(b, b"insert") {
678 return Some(Token::Insert);
679 }
680 if eq_ci(b, b"values") {
681 return Some(Token::Values);
682 }
683 if eq_ci(b, b"commit") {
684 return Some(Token::Commit);
685 }
686 if eq_ci(b, b"having") {
687 return Some(Token::Having);
688 }
689 if eq_ci(b, b"offset") {
690 return Some(Token::Offset);
691 }
692 None
693}
694
695#[inline]
696fn kw_len7(b: &[u8]) -> Option<Token> {
697 if eq_ci(b, b"between") {
699 return Some(Token::Between);
700 }
701 if eq_ci(b, b"default") {
702 return Some(Token::Default);
703 }
704 if eq_ci(b, b"release") {
705 return Some(Token::Release);
706 }
707 if eq_ci(b, b"extract") {
708 return Some(Token::Extract);
709 }
710 None
711}
712
713#[inline]
714fn kw_len8(b: &[u8]) -> Option<Token> {
715 if eq_ci(b, b"rollback") {
717 return Some(Token::Rollback);
718 }
719 if eq_ci(b, b"distinct") {
720 return Some(Token::Distinct);
721 }
722 if eq_ci(b, b"interval") {
723 return Some(Token::Interval);
724 }
725 None
726}
727
728#[inline]
729fn kw_len9(b: &[u8]) -> Option<Token> {
730 if eq_ci(b, b"savepoint") {
732 return Some(Token::Savepoint);
733 }
734 None
735}
736
737#[inline]
738fn kw_len10(b: &[u8]) -> Option<Token> {
739 if eq_ci(b, b"connection") {
741 return Some(Token::Connection);
742 }
743 None
744}
745
746#[inline]
747fn kw_len11(b: &[u8]) -> Option<Token> {
748 if eq_ci(b, b"publication") {
750 return Some(Token::Publication);
751 }
752 None
753}
754
755#[inline]
756fn kw_len12(b: &[u8]) -> Option<Token> {
757 if eq_ci(b, b"subscription") {
759 return Some(Token::Subscription);
760 }
761 None
762}
763
764fn lex_quoted(
771 input: &str,
772 start: usize,
773 quote: u8,
774 is_ident: bool,
775) -> Result<(Token, usize), LexError> {
776 let bytes = input.as_bytes();
777 let mut i = start + 1;
778 let mut s = String::new();
779 loop {
780 if i >= bytes.len() {
781 return Err(LexError {
782 kind: if is_ident {
783 LexErrorKind::UnterminatedQuotedIdent
784 } else {
785 LexErrorKind::UnterminatedString
786 },
787 pos: start,
788 });
789 }
790 if bytes[i] == quote {
791 if peek_eq(bytes, i + 1, quote) {
792 s.push(quote as char);
793 i += 2;
794 } else {
795 i += 1;
796 break;
797 }
798 } else {
799 let ch = input[i..].chars().next().expect("non-empty UTF-8 boundary");
800 s.push(ch);
801 i += ch.len_utf8();
802 }
803 }
804 let tok = if is_ident {
805 Token::QuotedIdent(s)
806 } else {
807 Token::String(s)
808 };
809 Ok((tok, i - start))
810}
811
812fn lex_number(s: &str) -> Result<(Token, usize), LexErrorKind> {
813 let bytes = s.as_bytes();
814 let mut i = 0usize;
815 let mut is_float = false;
816
817 while i < bytes.len() && bytes[i].is_ascii_digit() {
818 i += 1;
819 }
820 if i < bytes.len() && bytes[i] == b'.' {
821 is_float = true;
822 i += 1;
823 while i < bytes.len() && bytes[i].is_ascii_digit() {
824 i += 1;
825 }
826 }
827 if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
828 is_float = true;
829 i += 1;
830 if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
831 i += 1;
832 }
833 let exp_start = i;
834 while i < bytes.len() && bytes[i].is_ascii_digit() {
835 i += 1;
836 }
837 if exp_start == i {
838 return Err(LexErrorKind::BadNumber(s[..i].to_string()));
839 }
840 }
841
842 let lit = &s[..i];
843 if is_float {
844 lit.parse::<f64>()
845 .map(|v| (Token::Float(v), i))
846 .map_err(|_| LexErrorKind::BadNumber(lit.to_string()))
847 } else {
848 lit.parse::<i64>()
849 .map(|v| (Token::Integer(v), i))
850 .map_err(|_| LexErrorKind::BadNumber(lit.to_string()))
851 }
852}
853
854#[cfg(test)]
855mod tests {
856 use super::*;
857 use alloc::vec;
858
859 fn lex(s: &str) -> Vec<Token> {
860 tokenize(s).expect("lex ok")
861 }
862
863 #[test]
864 fn empty_yields_only_eof() {
865 assert_eq!(lex(""), vec![Token::Eof]);
866 }
867
868 #[test]
869 fn whitespace_only_yields_only_eof() {
870 assert_eq!(lex(" \t\n "), vec![Token::Eof]);
871 }
872
873 #[test]
874 fn keywords_are_case_insensitive() {
875 assert_eq!(
876 lex("SELECT select Select"),
877 vec![Token::Select, Token::Select, Token::Select, Token::Eof]
878 );
879 }
880
881 #[test]
882 fn identifiers_lowercase_ascii() {
883 assert_eq!(
884 lex("hello WORLD _x x1"),
885 vec![
886 Token::Ident("hello".into()),
887 Token::Ident("world".into()),
888 Token::Ident("_x".into()),
889 Token::Ident("x1".into()),
890 Token::Eof,
891 ]
892 );
893 }
894
895 #[test]
896 fn quoted_identifier_keeps_case_and_handles_embedded_quote() {
897 assert_eq!(
898 lex(r#""User Name" "a""b""#),
899 vec![
900 Token::QuotedIdent("User Name".into()),
901 Token::QuotedIdent("a\"b".into()),
902 Token::Eof,
903 ]
904 );
905 }
906
907 #[test]
908 fn integer_and_float_literals() {
909 assert_eq!(
910 lex("0 42 1.5 .5 1e10 2.5e-3"),
911 vec![
912 Token::Integer(0),
913 Token::Integer(42),
914 Token::Float(1.5),
915 Token::Float(0.5),
916 Token::Float(1e10),
917 Token::Float(2.5e-3),
918 Token::Eof,
919 ]
920 );
921 }
922
923 #[test]
924 fn negative_number_is_minus_then_integer() {
925 assert_eq!(
927 lex("-42"),
928 vec![Token::Minus, Token::Integer(42), Token::Eof]
929 );
930 }
931
932 #[test]
933 fn string_literal_doubled_quote_escape() {
934 assert_eq!(
935 lex("'hello' 'it''s'"),
936 vec![
937 Token::String("hello".into()),
938 Token::String("it's".into()),
939 Token::Eof,
940 ]
941 );
942 }
943
944 #[test]
945 fn all_comparison_and_arithmetic_operators() {
946 assert_eq!(
947 lex("= <> != < <= > >= + - * /"),
948 vec![
949 Token::Eq,
950 Token::NotEq,
951 Token::NotEq,
952 Token::Lt,
953 Token::LtEq,
954 Token::Gt,
955 Token::GtEq,
956 Token::Plus,
957 Token::Minus,
958 Token::Star,
959 Token::Slash,
960 Token::Eof,
961 ]
962 );
963 }
964
965 #[test]
966 fn punctuation() {
967 assert_eq!(
968 lex("( ) , ; ."),
969 vec![
970 Token::LParen,
971 Token::RParen,
972 Token::Comma,
973 Token::Semicolon,
974 Token::Dot,
975 Token::Eof,
976 ]
977 );
978 }
979
980 #[test]
981 fn line_comment_skipped() {
982 assert_eq!(
983 lex("SELECT -- trailing junk\nFROM"),
984 vec![Token::Select, Token::From, Token::Eof]
985 );
986 }
987
988 #[test]
989 fn block_comment_skipped() {
990 assert_eq!(
991 lex("SELECT /* skipped */ 1"),
992 vec![Token::Select, Token::Integer(1), Token::Eof]
993 );
994 }
995
996 #[test]
997 fn unterminated_string_errors() {
998 let err = tokenize("'oops").unwrap_err();
999 assert!(matches!(err.kind, LexErrorKind::UnterminatedString));
1000 assert_eq!(err.pos, 0);
1001 }
1002
1003 #[test]
1004 fn unterminated_block_comment_errors() {
1005 let err = tokenize("/* never closed").unwrap_err();
1006 assert!(matches!(err.kind, LexErrorKind::UnterminatedBlockComment));
1007 }
1008
1009 #[test]
1010 fn unknown_char_errors() {
1011 let err = tokenize("@").unwrap_err();
1012 assert!(matches!(err.kind, LexErrorKind::UnknownChar('@')));
1013 }
1014
1015 #[test]
1016 fn dot_in_qualified_column() {
1017 assert_eq!(
1018 lex("t.col"),
1019 vec![
1020 Token::Ident("t".into()),
1021 Token::Dot,
1022 Token::Ident("col".into()),
1023 Token::Eof,
1024 ]
1025 );
1026 }
1027
1028 #[test]
1031 fn brackets_are_distinct_tokens() {
1032 assert_eq!(
1033 lex("[ ]"),
1034 vec![Token::LBracket, Token::RBracket, Token::Eof]
1035 );
1036 }
1037
1038 #[test]
1039 fn l2_distance_is_three_char_token() {
1040 assert_eq!(
1041 lex("a <-> b"),
1042 vec![
1043 Token::Ident("a".into()),
1044 Token::L2Distance,
1045 Token::Ident("b".into()),
1046 Token::Eof,
1047 ]
1048 );
1049 assert_eq!(
1051 lex("a <- b"),
1052 vec![
1053 Token::Ident("a".into()),
1054 Token::Lt,
1055 Token::Minus,
1056 Token::Ident("b".into()),
1057 Token::Eof,
1058 ]
1059 );
1060 }
1061
1062 #[test]
1063 fn order_by_limit_are_keywords() {
1064 assert_eq!(
1065 lex("ORDER BY LIMIT"),
1066 vec![Token::Order, Token::By, Token::Limit, Token::Eof]
1067 );
1068 }
1069
1070 #[test]
1073 fn inner_product_operator_3char() {
1074 assert_eq!(
1075 lex("a <#> b"),
1076 vec![
1077 Token::Ident("a".into()),
1078 Token::InnerProduct,
1079 Token::Ident("b".into()),
1080 Token::Eof,
1081 ]
1082 );
1083 }
1084
1085 #[test]
1086 fn cosine_distance_operator_3char() {
1087 assert_eq!(
1088 lex("a <=> b"),
1089 vec![
1090 Token::Ident("a".into()),
1091 Token::CosineDistance,
1092 Token::Ident("b".into()),
1093 Token::Eof,
1094 ]
1095 );
1096 assert_eq!(
1099 lex("a <= b"),
1100 vec![
1101 Token::Ident("a".into()),
1102 Token::LtEq,
1103 Token::Ident("b".into()),
1104 Token::Eof,
1105 ]
1106 );
1107 }
1108
1109 #[test]
1110 fn double_colon_cast_token() {
1111 assert_eq!(
1112 lex("x::INT"),
1113 vec![
1114 Token::Ident("x".into()),
1115 Token::DoubleColon,
1116 Token::Ident("int".into()),
1117 Token::Eof,
1118 ]
1119 );
1120 }
1121
1122 #[test]
1123 fn lone_single_colon_is_unknown_char() {
1124 let err = tokenize(":x").unwrap_err();
1125 assert!(matches!(err.kind, LexErrorKind::UnknownChar(':')));
1126 }
1127}