1use alloc::string::{String, ToString};
12use alloc::vec::Vec;
13use core::fmt;
14
15#[derive(Debug, Clone, PartialEq)]
16pub enum Token {
17 Select,
19 From,
20 Where,
21 As,
22 Null,
23 True,
24 False,
25 And,
26 Or,
27 Not,
28 Create,
29 Table,
30 Insert,
31 Into,
32 Values,
33 Index,
34 On,
35 Begin,
36 Commit,
37 Rollback,
38 Order,
39 By,
40 Limit,
41
42 Ident(String), QuotedIdent(String), Integer(i64),
48 Float(f64),
49 String(String),
50
51 Plus,
53 Minus,
54 Star,
55 Slash,
56 Eq,
57 NotEq,
58 Lt,
59 LtEq,
60 Gt,
61 GtEq,
62
63 LParen,
65 RParen,
66 LBracket,
67 RBracket,
68 Comma,
69 Semicolon,
70 Dot,
71 JsonGet,
75 JsonGetText,
77 JsonGetPath,
80 JsonGetPathText,
82 JsonContains,
86 L2Distance,
87 InnerProduct,
90 CosineDistance,
92 DoubleColon,
95 Concat,
97 Is,
99 Between,
100 In,
101 Like,
102 Group,
103 Distinct,
104 Union,
105 All,
106 Join,
107 Inner,
108 Left,
109 Cross,
110 Outer,
111 Default,
112 Savepoint,
113 Release,
114 To,
115 Having,
116 Show,
117 Extract,
118 Offset,
119 Asc,
120 Desc,
121 Interval,
124 Placeholder(u16),
128
129 Drop,
133 For,
135 Tables,
140 Except,
143 Publication,
145 Subscription,
147 Connection,
150
151 Eof,
152}
153
154#[derive(Debug, Clone, PartialEq, Eq)]
155pub enum LexErrorKind {
156 UnknownChar(char),
157 UnterminatedString,
158 UnterminatedQuotedIdent,
159 UnterminatedBlockComment,
160 BadNumber(String),
161}
162
163#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct LexError {
165 pub kind: LexErrorKind,
166 pub pos: usize,
167}
168
169impl fmt::Display for LexError {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 match &self.kind {
172 LexErrorKind::UnknownChar(c) => write!(f, "unknown char {c:?} at byte {}", self.pos),
173 LexErrorKind::UnterminatedString => {
174 write!(f, "unterminated string literal at byte {}", self.pos)
175 }
176 LexErrorKind::UnterminatedQuotedIdent => {
177 write!(f, "unterminated quoted identifier at byte {}", self.pos)
178 }
179 LexErrorKind::UnterminatedBlockComment => {
180 write!(f, "unterminated /* */ comment at byte {}", self.pos)
181 }
182 LexErrorKind::BadNumber(s) => {
183 write!(f, "invalid number literal {s:?} at byte {}", self.pos)
184 }
185 }
186 }
187}
188
189#[allow(clippy::too_many_lines)] pub fn tokenize(input: &str) -> Result<Vec<Token>, LexError> {
192 let bytes = input.as_bytes();
193 let mut i = 0usize;
194 let mut out = Vec::new();
195
196 while i < bytes.len() {
197 let b = bytes[i];
198 match b {
199 b' ' | b'\t' | b'\n' | b'\r' => {
200 i += 1;
201 }
202 b'-' if peek_eq(bytes, i + 1, b'-') => {
203 i += 2;
204 while i < bytes.len() && bytes[i] != b'\n' {
205 i += 1;
206 }
207 }
208 b'/' if peek_eq(bytes, i + 1, b'*') => {
209 let start = i;
210 i += 2;
211 let mut closed = false;
212 while i + 1 < bytes.len() {
213 if bytes[i] == b'*' && bytes[i + 1] == b'/' {
214 i += 2;
215 closed = true;
216 break;
217 }
218 i += 1;
219 }
220 if !closed {
221 return Err(LexError {
222 kind: LexErrorKind::UnterminatedBlockComment,
223 pos: start,
224 });
225 }
226 }
227 b'\'' => {
228 let (tok, consumed) = lex_quoted(input, i, b'\'', false)?;
229 out.push(tok);
230 i += consumed;
231 }
232 b'"' => {
233 let (tok, consumed) = lex_quoted(input, i, b'"', true)?;
234 out.push(tok);
235 i += consumed;
236 }
237 b'`' => {
241 let (tok, consumed) = lex_quoted(input, i, b'`', true)?;
242 out.push(tok);
243 i += consumed;
244 }
245 b if b.is_ascii_alphabetic() || b == b'_' => {
246 let start = i;
247 i += 1;
248 while i < bytes.len() {
249 let c = bytes[i];
250 if c.is_ascii_alphanumeric() || c == b'_' {
251 i += 1;
252 } else {
253 break;
254 }
255 }
256 let raw = &input[start..i];
257 out.push(keyword_or_ident_raw(raw));
261 }
262 b if b.is_ascii_digit() => {
263 let (tok, consumed) =
264 lex_number(&input[i..]).map_err(|kind| LexError { kind, pos: i })?;
265 out.push(tok);
266 i += consumed;
267 }
268 b'.' if peek_pred(bytes, i + 1, u8::is_ascii_digit) => {
269 let (tok, consumed) =
270 lex_number(&input[i..]).map_err(|kind| LexError { kind, pos: i })?;
271 out.push(tok);
272 i += consumed;
273 }
274 b'+' => single(&mut out, Token::Plus, &mut i),
275 b'-' => {
276 if peek_eq(bytes, i + 1, b'>') && peek_eq(bytes, i + 2, b'>') {
279 out.push(Token::JsonGetText);
280 i += 3;
281 } else if peek_eq(bytes, i + 1, b'>') {
282 out.push(Token::JsonGet);
283 i += 2;
284 } else {
285 single(&mut out, Token::Minus, &mut i);
286 }
287 }
288 b'#' => {
290 if peek_eq(bytes, i + 1, b'>') && peek_eq(bytes, i + 2, b'>') {
291 out.push(Token::JsonGetPathText);
292 i += 3;
293 } else if peek_eq(bytes, i + 1, b'>') {
294 out.push(Token::JsonGetPath);
295 i += 2;
296 } else {
297 return Err(LexError {
298 kind: LexErrorKind::UnknownChar('#'),
299 pos: i,
300 });
301 }
302 }
303 b'@' => {
305 if peek_eq(bytes, i + 1, b'>') {
306 out.push(Token::JsonContains);
307 i += 2;
308 } else {
309 return Err(LexError {
310 kind: LexErrorKind::UnknownChar('@'),
311 pos: i,
312 });
313 }
314 }
315 b'*' => single(&mut out, Token::Star, &mut i),
316 b'/' => single(&mut out, Token::Slash, &mut i),
317 b'(' => single(&mut out, Token::LParen, &mut i),
318 b')' => single(&mut out, Token::RParen, &mut i),
319 b'[' => single(&mut out, Token::LBracket, &mut i),
320 b']' => single(&mut out, Token::RBracket, &mut i),
321 b',' => single(&mut out, Token::Comma, &mut i),
322 b';' => single(&mut out, Token::Semicolon, &mut i),
323 b'.' => single(&mut out, Token::Dot, &mut i),
324 b'=' => single(&mut out, Token::Eq, &mut i),
325 b'<' => {
326 if peek_eq(bytes, i + 1, b'=') && peek_eq(bytes, i + 2, b'>') {
327 out.push(Token::CosineDistance);
328 i += 3;
329 } else if peek_eq(bytes, i + 1, b'#') && peek_eq(bytes, i + 2, b'>') {
330 out.push(Token::InnerProduct);
331 i += 3;
332 } else if peek_eq(bytes, i + 1, b'-') && peek_eq(bytes, i + 2, b'>') {
333 out.push(Token::L2Distance);
334 i += 3;
335 } else if peek_eq(bytes, i + 1, b'=') {
336 out.push(Token::LtEq);
337 i += 2;
338 } else if peek_eq(bytes, i + 1, b'>') {
339 out.push(Token::NotEq);
340 i += 2;
341 } else {
342 out.push(Token::Lt);
343 i += 1;
344 }
345 }
346 b':' if peek_eq(bytes, i + 1, b':') => {
347 out.push(Token::DoubleColon);
348 i += 2;
349 }
350 b'|' if peek_eq(bytes, i + 1, b'|') => {
351 out.push(Token::Concat);
352 i += 2;
353 }
354 b'>' => {
355 if peek_eq(bytes, i + 1, b'=') {
356 out.push(Token::GtEq);
357 i += 2;
358 } else {
359 out.push(Token::Gt);
360 i += 1;
361 }
362 }
363 b'!' if peek_eq(bytes, i + 1, b'=') => {
364 out.push(Token::NotEq);
365 i += 2;
366 }
367 b'$' if i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() => {
373 let mut j = i + 1;
374 let mut n: u32 = 0;
375 while j < bytes.len() && bytes[j].is_ascii_digit() {
376 n = n.saturating_mul(10).saturating_add(u32::from(bytes[j] - b'0'));
377 j += 1;
378 }
379 if n == 0 || n > u32::from(u16::MAX) {
380 return Err(LexError {
381 kind: LexErrorKind::BadNumber(input[i..j].to_string()),
382 pos: i,
383 });
384 }
385 #[allow(clippy::cast_possible_truncation)]
386 out.push(Token::Placeholder(n as u16));
387 i = j;
388 }
389 _ => {
390 let ch = input[i..].chars().next().unwrap_or('?');
391 return Err(LexError {
392 kind: LexErrorKind::UnknownChar(ch),
393 pos: i,
394 });
395 }
396 }
397 }
398 out.push(Token::Eof);
399 Ok(out)
400}
401
402fn peek_eq(bytes: &[u8], i: usize, target: u8) -> bool {
403 bytes.get(i) == Some(&target)
404}
405
406fn peek_pred<F: Fn(&u8) -> bool>(bytes: &[u8], i: usize, pred: F) -> bool {
407 bytes.get(i).is_some_and(pred)
408}
409
410fn single(out: &mut Vec<Token>, tok: Token, i: &mut usize) {
411 out.push(tok);
412 *i += 1;
413}
414
415fn keyword_or_ident_raw(raw: &str) -> Token {
425 let b = raw.as_bytes();
426 let tok = match b.len() {
427 2 => kw_len2(b),
428 3 => kw_len3(b),
429 4 => kw_len4(b),
430 5 => kw_len5(b),
431 6 => kw_len6(b),
432 7 => kw_len7(b),
433 8 => kw_len8(b),
434 9 => kw_len9(b),
435 10 => kw_len10(b),
436 11 => kw_len11(b),
437 12 => kw_len12(b),
438 _ => None,
439 };
440 match tok {
441 Some(t) => t,
442 None => Token::Ident(raw.to_ascii_lowercase()),
444 }
445}
446
447#[inline]
453fn eq_ci(input: &[u8], lower: &[u8]) -> bool {
454 if input.len() != lower.len() {
455 return false;
456 }
457 for i in 0..lower.len() {
458 if input[i].to_ascii_lowercase() != lower[i] {
459 return false;
460 }
461 }
462 true
463}
464
465#[inline]
466fn kw_len2(b: &[u8]) -> Option<Token> {
467 if eq_ci(b, b"as") {
469 return Some(Token::As);
470 }
471 if eq_ci(b, b"by") {
472 return Some(Token::By);
473 }
474 if eq_ci(b, b"in") {
475 return Some(Token::In);
476 }
477 if eq_ci(b, b"is") {
478 return Some(Token::Is);
479 }
480 if eq_ci(b, b"on") {
481 return Some(Token::On);
482 }
483 if eq_ci(b, b"or") {
484 return Some(Token::Or);
485 }
486 if eq_ci(b, b"to") {
487 return Some(Token::To);
488 }
489 None
490}
491
492#[inline]
493fn kw_len3(b: &[u8]) -> Option<Token> {
494 if eq_ci(b, b"for") {
496 return Some(Token::For);
497 }
498 if eq_ci(b, b"all") {
499 return Some(Token::All);
500 }
501 if eq_ci(b, b"and") {
502 return Some(Token::And);
503 }
504 if eq_ci(b, b"asc") {
505 return Some(Token::Asc);
506 }
507 if eq_ci(b, b"not") {
508 return Some(Token::Not);
509 }
510 None
511}
512
513#[inline]
514fn kw_len4(b: &[u8]) -> Option<Token> {
515 if eq_ci(b, b"from") {
517 return Some(Token::From);
518 }
519 if eq_ci(b, b"drop") {
520 return Some(Token::Drop);
521 }
522 if eq_ci(b, b"null") {
523 return Some(Token::Null);
524 }
525 if eq_ci(b, b"true") {
526 return Some(Token::True);
527 }
528 if eq_ci(b, b"into") {
529 return Some(Token::Into);
530 }
531 if eq_ci(b, b"like") {
532 return Some(Token::Like);
533 }
534 if eq_ci(b, b"join") {
535 return Some(Token::Join);
536 }
537 if eq_ci(b, b"left") {
538 return Some(Token::Left);
539 }
540 if eq_ci(b, b"show") {
541 return Some(Token::Show);
542 }
543 if eq_ci(b, b"desc") {
544 return Some(Token::Desc);
545 }
546 None
547}
548
549#[inline]
550fn kw_len5(b: &[u8]) -> Option<Token> {
551 if eq_ci(b, b"false") {
554 return Some(Token::False);
555 }
556 if eq_ci(b, b"where") {
557 return Some(Token::Where);
558 }
559 if eq_ci(b, b"table") {
560 return Some(Token::Table);
561 }
562 if eq_ci(b, b"index") {
563 return Some(Token::Index);
564 }
565 if eq_ci(b, b"begin") {
566 return Some(Token::Begin);
567 }
568 if eq_ci(b, b"order") {
569 return Some(Token::Order);
570 }
571 if eq_ci(b, b"limit") {
572 return Some(Token::Limit);
573 }
574 if eq_ci(b, b"group") {
575 return Some(Token::Group);
576 }
577 if eq_ci(b, b"union") {
578 return Some(Token::Union);
579 }
580 if eq_ci(b, b"inner") {
581 return Some(Token::Inner);
582 }
583 if eq_ci(b, b"cross") {
584 return Some(Token::Cross);
585 }
586 if eq_ci(b, b"outer") {
587 return Some(Token::Outer);
588 }
589 None
590}
591
592#[inline]
593fn kw_len6(b: &[u8]) -> Option<Token> {
594 if eq_ci(b, b"select") {
596 return Some(Token::Select);
597 }
598 if eq_ci(b, b"tables") {
599 return Some(Token::Tables);
600 }
601 if eq_ci(b, b"except") {
602 return Some(Token::Except);
603 }
604 if eq_ci(b, b"create") {
605 return Some(Token::Create);
606 }
607 if eq_ci(b, b"insert") {
608 return Some(Token::Insert);
609 }
610 if eq_ci(b, b"values") {
611 return Some(Token::Values);
612 }
613 if eq_ci(b, b"commit") {
614 return Some(Token::Commit);
615 }
616 if eq_ci(b, b"having") {
617 return Some(Token::Having);
618 }
619 if eq_ci(b, b"offset") {
620 return Some(Token::Offset);
621 }
622 None
623}
624
625#[inline]
626fn kw_len7(b: &[u8]) -> Option<Token> {
627 if eq_ci(b, b"between") {
629 return Some(Token::Between);
630 }
631 if eq_ci(b, b"default") {
632 return Some(Token::Default);
633 }
634 if eq_ci(b, b"release") {
635 return Some(Token::Release);
636 }
637 if eq_ci(b, b"extract") {
638 return Some(Token::Extract);
639 }
640 None
641}
642
643#[inline]
644fn kw_len8(b: &[u8]) -> Option<Token> {
645 if eq_ci(b, b"rollback") {
647 return Some(Token::Rollback);
648 }
649 if eq_ci(b, b"distinct") {
650 return Some(Token::Distinct);
651 }
652 if eq_ci(b, b"interval") {
653 return Some(Token::Interval);
654 }
655 None
656}
657
658#[inline]
659fn kw_len9(b: &[u8]) -> Option<Token> {
660 if eq_ci(b, b"savepoint") {
662 return Some(Token::Savepoint);
663 }
664 None
665}
666
667#[inline]
668fn kw_len10(b: &[u8]) -> Option<Token> {
669 if eq_ci(b, b"connection") {
671 return Some(Token::Connection);
672 }
673 None
674}
675
676#[inline]
677fn kw_len11(b: &[u8]) -> Option<Token> {
678 if eq_ci(b, b"publication") {
680 return Some(Token::Publication);
681 }
682 None
683}
684
685#[inline]
686fn kw_len12(b: &[u8]) -> Option<Token> {
687 if eq_ci(b, b"subscription") {
689 return Some(Token::Subscription);
690 }
691 None
692}
693
694fn lex_quoted(
701 input: &str,
702 start: usize,
703 quote: u8,
704 is_ident: bool,
705) -> Result<(Token, usize), LexError> {
706 let bytes = input.as_bytes();
707 let mut i = start + 1;
708 let mut s = String::new();
709 loop {
710 if i >= bytes.len() {
711 return Err(LexError {
712 kind: if is_ident {
713 LexErrorKind::UnterminatedQuotedIdent
714 } else {
715 LexErrorKind::UnterminatedString
716 },
717 pos: start,
718 });
719 }
720 if bytes[i] == quote {
721 if peek_eq(bytes, i + 1, quote) {
722 s.push(quote as char);
723 i += 2;
724 } else {
725 i += 1;
726 break;
727 }
728 } else {
729 let ch = input[i..].chars().next().expect("non-empty UTF-8 boundary");
730 s.push(ch);
731 i += ch.len_utf8();
732 }
733 }
734 let tok = if is_ident {
735 Token::QuotedIdent(s)
736 } else {
737 Token::String(s)
738 };
739 Ok((tok, i - start))
740}
741
742fn lex_number(s: &str) -> Result<(Token, usize), LexErrorKind> {
743 let bytes = s.as_bytes();
744 let mut i = 0usize;
745 let mut is_float = false;
746
747 while i < bytes.len() && bytes[i].is_ascii_digit() {
748 i += 1;
749 }
750 if i < bytes.len() && bytes[i] == b'.' {
751 is_float = true;
752 i += 1;
753 while i < bytes.len() && bytes[i].is_ascii_digit() {
754 i += 1;
755 }
756 }
757 if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
758 is_float = true;
759 i += 1;
760 if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
761 i += 1;
762 }
763 let exp_start = i;
764 while i < bytes.len() && bytes[i].is_ascii_digit() {
765 i += 1;
766 }
767 if exp_start == i {
768 return Err(LexErrorKind::BadNumber(s[..i].to_string()));
769 }
770 }
771
772 let lit = &s[..i];
773 if is_float {
774 lit.parse::<f64>()
775 .map(|v| (Token::Float(v), i))
776 .map_err(|_| LexErrorKind::BadNumber(lit.to_string()))
777 } else {
778 lit.parse::<i64>()
779 .map(|v| (Token::Integer(v), i))
780 .map_err(|_| LexErrorKind::BadNumber(lit.to_string()))
781 }
782}
783
784#[cfg(test)]
785mod tests {
786 use super::*;
787 use alloc::vec;
788
789 fn lex(s: &str) -> Vec<Token> {
790 tokenize(s).expect("lex ok")
791 }
792
793 #[test]
794 fn empty_yields_only_eof() {
795 assert_eq!(lex(""), vec![Token::Eof]);
796 }
797
798 #[test]
799 fn whitespace_only_yields_only_eof() {
800 assert_eq!(lex(" \t\n "), vec![Token::Eof]);
801 }
802
803 #[test]
804 fn keywords_are_case_insensitive() {
805 assert_eq!(
806 lex("SELECT select Select"),
807 vec![Token::Select, Token::Select, Token::Select, Token::Eof]
808 );
809 }
810
811 #[test]
812 fn identifiers_lowercase_ascii() {
813 assert_eq!(
814 lex("hello WORLD _x x1"),
815 vec![
816 Token::Ident("hello".into()),
817 Token::Ident("world".into()),
818 Token::Ident("_x".into()),
819 Token::Ident("x1".into()),
820 Token::Eof,
821 ]
822 );
823 }
824
825 #[test]
826 fn quoted_identifier_keeps_case_and_handles_embedded_quote() {
827 assert_eq!(
828 lex(r#""User Name" "a""b""#),
829 vec![
830 Token::QuotedIdent("User Name".into()),
831 Token::QuotedIdent("a\"b".into()),
832 Token::Eof,
833 ]
834 );
835 }
836
837 #[test]
838 fn integer_and_float_literals() {
839 assert_eq!(
840 lex("0 42 1.5 .5 1e10 2.5e-3"),
841 vec![
842 Token::Integer(0),
843 Token::Integer(42),
844 Token::Float(1.5),
845 Token::Float(0.5),
846 Token::Float(1e10),
847 Token::Float(2.5e-3),
848 Token::Eof,
849 ]
850 );
851 }
852
853 #[test]
854 fn negative_number_is_minus_then_integer() {
855 assert_eq!(
857 lex("-42"),
858 vec![Token::Minus, Token::Integer(42), Token::Eof]
859 );
860 }
861
862 #[test]
863 fn string_literal_doubled_quote_escape() {
864 assert_eq!(
865 lex("'hello' 'it''s'"),
866 vec![
867 Token::String("hello".into()),
868 Token::String("it's".into()),
869 Token::Eof,
870 ]
871 );
872 }
873
874 #[test]
875 fn all_comparison_and_arithmetic_operators() {
876 assert_eq!(
877 lex("= <> != < <= > >= + - * /"),
878 vec![
879 Token::Eq,
880 Token::NotEq,
881 Token::NotEq,
882 Token::Lt,
883 Token::LtEq,
884 Token::Gt,
885 Token::GtEq,
886 Token::Plus,
887 Token::Minus,
888 Token::Star,
889 Token::Slash,
890 Token::Eof,
891 ]
892 );
893 }
894
895 #[test]
896 fn punctuation() {
897 assert_eq!(
898 lex("( ) , ; ."),
899 vec![
900 Token::LParen,
901 Token::RParen,
902 Token::Comma,
903 Token::Semicolon,
904 Token::Dot,
905 Token::Eof,
906 ]
907 );
908 }
909
910 #[test]
911 fn line_comment_skipped() {
912 assert_eq!(
913 lex("SELECT -- trailing junk\nFROM"),
914 vec![Token::Select, Token::From, Token::Eof]
915 );
916 }
917
918 #[test]
919 fn block_comment_skipped() {
920 assert_eq!(
921 lex("SELECT /* skipped */ 1"),
922 vec![Token::Select, Token::Integer(1), Token::Eof]
923 );
924 }
925
926 #[test]
927 fn unterminated_string_errors() {
928 let err = tokenize("'oops").unwrap_err();
929 assert!(matches!(err.kind, LexErrorKind::UnterminatedString));
930 assert_eq!(err.pos, 0);
931 }
932
933 #[test]
934 fn unterminated_block_comment_errors() {
935 let err = tokenize("/* never closed").unwrap_err();
936 assert!(matches!(err.kind, LexErrorKind::UnterminatedBlockComment));
937 }
938
939 #[test]
940 fn unknown_char_errors() {
941 let err = tokenize("@").unwrap_err();
942 assert!(matches!(err.kind, LexErrorKind::UnknownChar('@')));
943 }
944
945 #[test]
946 fn dot_in_qualified_column() {
947 assert_eq!(
948 lex("t.col"),
949 vec![
950 Token::Ident("t".into()),
951 Token::Dot,
952 Token::Ident("col".into()),
953 Token::Eof,
954 ]
955 );
956 }
957
958 #[test]
961 fn brackets_are_distinct_tokens() {
962 assert_eq!(
963 lex("[ ]"),
964 vec![Token::LBracket, Token::RBracket, Token::Eof]
965 );
966 }
967
968 #[test]
969 fn l2_distance_is_three_char_token() {
970 assert_eq!(
971 lex("a <-> b"),
972 vec![
973 Token::Ident("a".into()),
974 Token::L2Distance,
975 Token::Ident("b".into()),
976 Token::Eof,
977 ]
978 );
979 assert_eq!(
981 lex("a <- b"),
982 vec![
983 Token::Ident("a".into()),
984 Token::Lt,
985 Token::Minus,
986 Token::Ident("b".into()),
987 Token::Eof,
988 ]
989 );
990 }
991
992 #[test]
993 fn order_by_limit_are_keywords() {
994 assert_eq!(
995 lex("ORDER BY LIMIT"),
996 vec![Token::Order, Token::By, Token::Limit, Token::Eof]
997 );
998 }
999
1000 #[test]
1003 fn inner_product_operator_3char() {
1004 assert_eq!(
1005 lex("a <#> b"),
1006 vec![
1007 Token::Ident("a".into()),
1008 Token::InnerProduct,
1009 Token::Ident("b".into()),
1010 Token::Eof,
1011 ]
1012 );
1013 }
1014
1015 #[test]
1016 fn cosine_distance_operator_3char() {
1017 assert_eq!(
1018 lex("a <=> b"),
1019 vec![
1020 Token::Ident("a".into()),
1021 Token::CosineDistance,
1022 Token::Ident("b".into()),
1023 Token::Eof,
1024 ]
1025 );
1026 assert_eq!(
1029 lex("a <= b"),
1030 vec![
1031 Token::Ident("a".into()),
1032 Token::LtEq,
1033 Token::Ident("b".into()),
1034 Token::Eof,
1035 ]
1036 );
1037 }
1038
1039 #[test]
1040 fn double_colon_cast_token() {
1041 assert_eq!(
1042 lex("x::INT"),
1043 vec![
1044 Token::Ident("x".into()),
1045 Token::DoubleColon,
1046 Token::Ident("int".into()),
1047 Token::Eof,
1048 ]
1049 );
1050 }
1051
1052 #[test]
1053 fn lone_single_colon_is_unknown_char() {
1054 let err = tokenize(":x").unwrap_err();
1055 assert!(matches!(err.kind, LexErrorKind::UnknownChar(':')));
1056 }
1057}