1use crate::span::Span;
6use logos::Logos;
7
8fn process_escape_sequences(s: &str) -> String {
11 let mut result = String::with_capacity(s.len());
12 let mut chars = s.chars().peekable();
13
14 while let Some(c) = chars.next() {
15 if c == '\\' {
16 match chars.next() {
17 Some('n') => result.push('\n'),
18 Some('t') => result.push('\t'),
19 Some('r') => result.push('\r'),
20 Some('\\') => result.push('\\'),
21 Some('"') => result.push('"'),
22 Some('\'') => result.push('\''),
23 Some('0') => result.push('\0'),
24 Some('x') => {
25 let mut hex = String::new();
27 for _ in 0..2 {
28 if let Some(&c) = chars.peek() {
29 if c.is_ascii_hexdigit() {
30 hex.push(chars.next().unwrap());
31 }
32 }
33 }
34 if let Ok(val) = u8::from_str_radix(&hex, 16) {
35 result.push(val as char);
36 }
37 }
38 Some('u') => {
39 if chars.peek() == Some(&'{') {
41 chars.next(); let mut hex = String::new();
43 while let Some(&c) = chars.peek() {
44 if c == '}' {
45 chars.next();
46 break;
47 }
48 if c.is_ascii_hexdigit() {
49 hex.push(chars.next().unwrap());
50 } else {
51 break;
52 }
53 }
54 if let Ok(val) = u32::from_str_radix(&hex, 16) {
55 if let Some(c) = char::from_u32(val) {
56 result.push(c);
57 }
58 }
59 }
60 }
61 Some(other) => {
62 result.push('\\');
64 result.push(other);
65 }
66 None => result.push('\\'),
67 }
68 } else {
69 result.push(c);
70 }
71 }
72 result
73}
74
75fn raw_string_delimited_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
78 let remainder = lex.remainder();
79
80 if let Some(end_pos) = remainder.find("\"#") {
82 let content = &remainder[..end_pos];
83 lex.bump(end_pos + 2);
85 Some(content.to_string())
86 } else {
87 None
88 }
89}
90
91fn multiline_string_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
94 let remainder = lex.remainder();
95
96 if let Some(end_pos) = remainder.find("\"\"\"") {
98 let content = &remainder[..end_pos];
99 lex.bump(end_pos + 3);
101 Some(process_escape_sequences(content))
102 } else {
103 None
105 }
106}
107
108fn process_char_escape(s: &str) -> char {
110 let mut chars = s.chars();
111 match chars.next() {
112 Some('\\') => match chars.next() {
113 Some('n') => '\n',
114 Some('t') => '\t',
115 Some('r') => '\r',
116 Some('\\') => '\\',
117 Some('"') => '"',
118 Some('\'') => '\'',
119 Some('0') => '\0',
120 Some('x') => {
121 let hex: String = chars.take(2).collect();
122 u8::from_str_radix(&hex, 16)
123 .map(|v| v as char)
124 .unwrap_or('?')
125 }
126 Some('u') => {
127 if chars.next() == Some('{') {
128 let hex: String = chars.take_while(|&c| c != '}').collect();
129 u32::from_str_radix(&hex, 16)
130 .ok()
131 .and_then(char::from_u32)
132 .unwrap_or('?')
133 } else {
134 '?'
135 }
136 }
137 Some(c) => c,
138 None => '?',
139 },
140 Some(c) => c,
141 None => '?',
142 }
143}
144
145#[derive(Logos, Debug, Clone, PartialEq)]
147#[logos(skip r"[ \t\r\n\f]+")]
148pub enum Token {
149 #[regex(r"//[^\n]*", |lex| lex.slice().to_string())]
151 LineComment(String),
152
153 #[regex(r"//![^\n]*", |lex| lex.slice().to_string())]
154 DocComment(String),
155
156 #[token("fn")]
158 Fn,
159 #[token("async")]
160 Async,
161 #[token("let")]
162 Let,
163 #[token("mut")]
164 Mut,
165 #[token("const")]
166 Const,
167 #[token("type")]
168 Type,
169 #[token("struct")]
170 Struct,
171 #[token("enum")]
172 Enum,
173 #[token("trait")]
174 Trait,
175 #[token("impl")]
176 Impl,
177 #[token("mod")]
178 Mod,
179 #[token("use")]
180 Use,
181 #[token("pub")]
182 Pub,
183 #[token("actor")]
184 Actor,
185 #[token("saga")]
186 Saga,
187 #[token("scope")]
188 Scope,
189 #[token("rune")]
190 Rune,
191
192 #[token("if")]
194 If,
195 #[token("else")]
196 Else,
197 #[token("match")]
198 Match,
199 #[token("loop")]
200 Loop,
201 #[token("while")]
202 While,
203 #[token("for")]
204 For,
205 #[token("in")]
206 In,
207 #[token("break")]
208 Break,
209 #[token("continue")]
210 Continue,
211 #[token("return")]
212 Return,
213 #[token("yield")]
214 Yield,
215 #[token("await")]
216 Await,
217
218 #[token("self")]
220 SelfLower,
221 #[token("Self")]
222 SelfUpper,
223 #[token("super")]
224 Super,
225 #[token("crate")]
226 Crate,
227 #[token("where")]
228 Where,
229 #[token("as")]
230 As,
231 #[token("dyn")]
232 Dyn,
233 #[token("move")]
234 Move,
235 #[token("ref")]
236 Ref,
237 #[token("static")]
238 Static,
239 #[token("unsafe")]
240 Unsafe,
241 #[token("extern")]
242 Extern,
243 #[token("asm")]
244 Asm,
245 #[token("volatile")]
246 Volatile,
247 #[token("naked")]
248 Naked,
249 #[token("packed")]
250 Packed,
251 #[token("simd")]
252 Simd,
253 #[token("atomic")]
254 Atomic,
255 #[token("derive")]
256 Derive,
257 #[token("on")]
258 On,
259
260 #[token("true")]
262 True,
263 #[token("false")]
264 False,
265
266 #[token("null")]
268 Null,
269
270 #[token("τ")]
272 #[token("Τ")]
273 Tau, #[token("φ")]
276 #[token("Φ")]
277 Phi, #[token("σ")]
280 #[token("Σ")]
281 Sigma, #[token("ρ")]
284 #[token("Ρ")]
285 Rho, #[token("λ")]
288 #[token("Λ")]
289 Lambda, #[token("Π")]
292 Pi, #[token("⌛")]
295 Hourglass, #[token("δ")]
299 #[token("Δ")]
300 Delta, #[token("ε")]
303 Epsilon, #[token("ω")]
306 #[token("Ω")]
307 Omega, #[token("α")]
310 Alpha, #[token("ζ")]
313 Zeta, #[token("μ")]
317 #[token("Μ")]
318 Mu, #[token("χ")]
321 #[token("Χ")]
322 Chi, #[token("ν")]
325 #[token("Ν")]
326 Nu, #[token("ξ")]
329 #[token("Ξ")]
330 Xi, #[token("∥")]
334 #[token("parallel")]
335 Parallel, #[token("⊛")]
338 #[token("gpu")]
339 Gpu, #[token("∀")]
343 ForAll, #[token("∃")]
346 Exists, #[token("∈")]
349 ElementOf, #[token("∉")]
352 NotElementOf, #[token("∪")]
356 Union, #[token("∩")]
359 Intersection, #[token("∖")]
362 SetMinus, #[token("⊂")]
365 Subset, #[token("⊆")]
368 SubsetEq, #[token("⊃")]
371 Superset, #[token("⊇")]
374 SupersetEq, #[token("∧")]
378 LogicAnd, #[token("∨")]
381 LogicOr, #[token("¬")]
384 LogicNot, #[token("⊻")]
387 LogicXor, #[token("⊤")]
390 Top, #[token("⊥")]
393 Bottom, #[token("⋏")]
397 BitwiseAndSymbol, #[token("⋎")]
400 BitwiseOrSymbol, #[token("∷")]
404 TypeAnnotation, #[token("∫")]
408 Integral, #[token("∂")]
411 Partial, #[token("√")]
414 Sqrt, #[token("∛")]
417 Cbrt, #[token("∇")]
420 Nabla, #[token("⍋")]
424 GradeUp, #[token("⍒")]
427 GradeDown, #[token("⌽")]
430 Rotate, #[token("↻")]
433 CycleArrow, #[token("⌺")]
436 QuadDiamond, #[token("⊞")]
439 SquaredPlus, #[token("⍳")]
442 Iota, #[token("∘")]
446 Compose, #[token("⊗")]
449 Tensor, #[token("⊕")]
452 DirectSum, #[token("⋈")]
456 Bowtie, #[token("⋳")]
459 ElementSmallVerticalBar, #[token("⊔")]
462 SquareCup, #[token("⊓")]
465 SquareCap, #[token("‽")]
470 Interrobang, #[token("⊖")]
475 AffectNegative, #[token("⊜")]
478 AffectNeutral, #[token("⸮")]
484 IronyMark, #[token("↑")]
488 IntensityUp, #[token("↓")]
491 IntensityDown, #[token("⇈")]
494 IntensityMax, #[token("♔")]
498 FormalRegister, #[token("♟")]
501 InformalRegister, #[token("☺")]
505 EmotionJoy, #[token("☹")]
508 EmotionSadness, #[token("⚡")]
511 EmotionAnger, #[token("❄")]
514 EmotionFear, #[token("✦")]
517 EmotionSurprise, #[token("♡")]
520 EmotionLove, #[token("◉")]
524 ConfidenceHigh, #[token("◎")]
527 ConfidenceMedium, #[token("○")]
530 ConfidenceLow, #[token("·ing")]
534 AspectProgressive, #[token("·ed")]
537 AspectPerfective, #[token("·able")]
540 AspectPotential, #[token("·ive")]
543 AspectResultative, #[token("|")]
547 Pipe,
548 #[token("·")]
549 MiddleDot, #[token("->")]
551 Arrow,
552 #[token("=>")]
553 FatArrow,
554 #[token("<-")]
555 LeftArrow,
556 #[token("==")]
557 EqEq,
558 #[token("!=")]
559 NotEq,
560 #[token("<=")]
561 LtEq,
562 #[token(">=")]
563 GtEq,
564 #[token("<")]
565 Lt,
566 #[token(">")]
567 Gt,
568 #[token("+")]
569 Plus,
570 #[token("-")]
571 Minus,
572 #[token("*")]
573 Star,
574 #[token("/")]
575 Slash,
576 #[token("%")]
577 Percent,
578 #[token("**")]
579 StarStar, #[token("&&")]
581 AndAnd,
582 #[token("||")]
583 OrOr,
584 #[token("!")]
585 Bang, #[token("?")]
587 Question, #[token("~")]
589 Tilde, #[token("&")]
591 Amp,
592 #[token("^")]
593 Caret,
594 #[token("<<")]
595 Shl,
596 #[token(">>")]
597 Shr,
598 #[token("=")]
599 Eq,
600 #[token("+=")]
601 PlusEq,
602 #[token("-=")]
603 MinusEq,
604 #[token("*=")]
605 StarEq,
606 #[token("/=")]
607 SlashEq,
608 #[token("..")]
609 DotDot,
610 #[token("..=")]
611 DotDotEq,
612 #[token("++")]
613 PlusPlus, #[token("::")]
615 ColonColon,
616 #[token(":")]
617 Colon,
618 #[token(";")]
619 Semi,
620 #[token(",")]
621 Comma,
622 #[token(".")]
623 Dot,
624 #[token("@")]
625 At,
626 #[token("#!")]
627 HashBang, #[token("#")]
629 Hash,
630 #[token("_", priority = 3)]
631 Underscore,
632
633 #[token("(")]
635 LParen,
636 #[token(")")]
637 RParen,
638 #[token("{")]
639 LBrace,
640 #[token("}")]
641 RBrace,
642 #[token("[")]
643 LBracket,
644 #[token("]")]
645 RBracket,
646
647 #[token("∅")]
649 Empty, #[token("◯")]
651 Circle, #[token("∞")]
653 Infinity, #[token("⇒")]
657 ProtoSend, #[token("⇐")]
660 ProtoRecv, #[token("≋")]
663 ProtoStream, #[token("⊸")]
666 ProtoConnect, #[token("⏱")]
669 ProtoTimeout, #[token("send")]
675 Send,
676 #[token("recv")]
677 Recv,
678 #[token("stream")]
679 Stream,
680 #[token("connect")]
681 Connect,
682 #[token("close")]
683 Close,
684 #[token("timeout")]
685 Timeout,
686 #[token("retry")]
687 Retry,
688 #[token("header")]
689 Header,
690 #[token("body")]
691 Body,
692
693 #[token("http")]
695 Http,
696 #[token("https")]
697 Https,
698 #[token("ws")]
699 Ws,
700 #[token("wss")]
701 Wss,
702 #[token("grpc")]
703 Grpc,
704 #[token("kafka")]
705 Kafka,
706 #[token("amqp")]
707 Amqp,
708 #[token("graphql")]
709 GraphQL,
710
711 #[regex(r"0b[01_]+", |lex| lex.slice().to_string())]
714 BinaryLit(String),
715
716 #[regex(r"0o[0-7_]+", |lex| lex.slice().to_string())]
718 OctalLit(String),
719
720 #[regex(r"0x[0-9a-fA-F_]+", |lex| lex.slice().to_string())]
722 HexLit(String),
723
724 #[regex(r"0v[0-9a-jA-J_]+", |lex| lex.slice().to_string())]
726 VigesimalLit(String),
727
728 #[regex(r"0s[0-9a-zA-Z_]+", |lex| lex.slice().to_string())]
730 SexagesimalLit(String),
731
732 #[regex(r"0z[0-9a-bA-B_]+", |lex| lex.slice().to_string())]
734 DuodecimalLit(String),
735
736 #[regex(r"[0-9][0-9_]*\.[0-9][0-9_]*([eE][+-]?[0-9_]+)?", |lex| lex.slice().to_string())]
738 FloatLit(String),
739
740 #[regex(r"[0-9][0-9_]*", |lex| lex.slice().to_string())]
742 IntLit(String),
743
744 #[regex(r#""([^"\\]|\\.)*""#, |lex| {
747 let s = lex.slice();
748 let inner = &s[1..s.len()-1];
749 process_escape_sequences(inner)
750 })]
751 StringLit(String),
752
753 #[token(r#"""""#, multiline_string_callback)]
755 MultiLineStringLit(String),
756
757 #[regex(r#"b"([^"\\]|\\.)*""#, |lex| {
759 let s = lex.slice();
760 let inner = &s[2..s.len()-1];
761 inner.as_bytes().to_vec()
762 })]
763 ByteStringLit(Vec<u8>),
764
765 #[regex(r#"f"([^"\\]|\\.)*""#, |lex| {
767 let s = lex.slice();
768 let inner = &s[2..s.len()-1];
769 process_escape_sequences(inner)
770 })]
771 InterpolatedStringLit(String),
772
773 #[regex(r#"σ"([^"\\]|\\.)*""#, |lex| {
775 let s = lex.slice();
776 let start = "σ".len() + 1; let inner = &s[start..s.len()-1];
779 process_escape_sequences(inner)
780 })]
781 SigilStringSql(String),
782
783 #[regex(r#"ρ"([^"\\]|\\.)*""#, |lex| {
785 let s = lex.slice();
786 let start = "ρ".len() + 1; let inner = &s[start..s.len()-1];
789 process_escape_sequences(inner)
790 })]
791 SigilStringRoute(String),
792
793 #[regex(r"'([^'\\]|\\.)'", |lex| {
795 let s = lex.slice();
796 let inner = &s[1..s.len()-1];
797 process_char_escape(inner)
798 })]
799 CharLit(char),
800
801 #[regex(r#"r"[^"]*""#, |lex| {
803 let s = lex.slice();
804 s[2..s.len()-1].to_string()
805 })]
806 RawStringLit(String),
807
808 #[token(r##"r#""##, raw_string_delimited_callback)]
810 RawStringDelimited(String),
811
812 #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
814 Ident(String),
815
816 #[regex(r"//@\s*rune:\s*[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
818 RuneAnnotation(String),
819}
820
821impl Token {
822 pub fn is_keyword(&self) -> bool {
823 matches!(
824 self,
825 Token::Fn
826 | Token::Async
827 | Token::Let
828 | Token::Mut
829 | Token::Const
830 | Token::Type
831 | Token::Struct
832 | Token::Enum
833 | Token::Trait
834 | Token::Impl
835 | Token::Mod
836 | Token::Use
837 | Token::Pub
838 | Token::Actor
839 | Token::Saga
840 | Token::Scope
841 | Token::Rune
842 | Token::If
843 | Token::Else
844 | Token::Match
845 | Token::Loop
846 | Token::While
847 | Token::For
848 | Token::In
849 | Token::Break
850 | Token::Continue
851 | Token::Return
852 | Token::Yield
853 | Token::Await
854 )
855 }
856
857 pub fn is_morpheme(&self) -> bool {
858 matches!(
859 self,
860 Token::Tau | Token::Phi | Token::Sigma | Token::Rho |
861 Token::Lambda | Token::Pi | Token::Hourglass |
862 Token::Delta | Token::Epsilon | Token::Omega | Token::Alpha | Token::Zeta |
863 Token::Mu | Token::Chi | Token::Nu | Token::Xi | Token::Parallel | Token::Gpu | Token::Integral | Token::Partial | Token::Sqrt | Token::Cbrt |
866 Token::Compose
867 )
868 }
869
870 pub fn is_aspect(&self) -> bool {
871 matches!(
872 self,
873 Token::AspectProgressive
874 | Token::AspectPerfective
875 | Token::AspectPotential
876 | Token::AspectResultative
877 )
878 }
879
880 pub fn is_data_op(&self) -> bool {
881 matches!(
882 self,
883 Token::Bowtie | Token::ElementSmallVerticalBar | Token::SquareCup | Token::SquareCap
884 )
885 }
886
887 pub fn is_bitwise_symbol(&self) -> bool {
888 matches!(self, Token::BitwiseAndSymbol | Token::BitwiseOrSymbol)
889 }
890
891 pub fn is_quantifier(&self) -> bool {
892 matches!(
893 self,
894 Token::ForAll | Token::Exists | Token::ElementOf | Token::NotElementOf
895 )
896 }
897
898 pub fn is_set_op(&self) -> bool {
899 matches!(
900 self,
901 Token::Union
902 | Token::Intersection
903 | Token::SetMinus
904 | Token::Subset
905 | Token::SubsetEq
906 | Token::Superset
907 | Token::SupersetEq
908 )
909 }
910
911 pub fn is_logic_op(&self) -> bool {
912 matches!(
913 self,
914 Token::LogicAnd
915 | Token::LogicOr
916 | Token::LogicNot
917 | Token::LogicXor
918 | Token::Top
919 | Token::Bottom
920 )
921 }
922
923 pub fn is_evidentiality(&self) -> bool {
924 matches!(
925 self,
926 Token::Bang | Token::Question | Token::Tilde | Token::Interrobang
927 )
928 }
929
930 pub fn is_affective(&self) -> bool {
931 matches!(
932 self,
933 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral | Token::IronyMark | Token::IntensityUp | Token::IntensityDown | Token::IntensityMax | Token::FormalRegister | Token::InformalRegister | Token::EmotionJoy | Token::EmotionSadness | Token::EmotionAnger | Token::EmotionFear | Token::EmotionSurprise | Token::EmotionLove | Token::ConfidenceHigh | Token::ConfidenceMedium | Token::ConfidenceLow )
958 }
959
960 pub fn is_sentiment(&self) -> bool {
961 matches!(
962 self,
963 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral
964 )
965 }
966
967 pub fn is_emotion(&self) -> bool {
968 matches!(
969 self,
970 Token::EmotionJoy
971 | Token::EmotionSadness
972 | Token::EmotionAnger
973 | Token::EmotionFear
974 | Token::EmotionSurprise
975 | Token::EmotionLove
976 )
977 }
978
979 pub fn is_intensity(&self) -> bool {
980 matches!(
981 self,
982 Token::IntensityUp | Token::IntensityDown | Token::IntensityMax
983 )
984 }
985}
986
987pub struct Lexer<'a> {
989 inner: logos::Lexer<'a, Token>,
990 peeked: Option<Option<(Token, Span)>>,
991}
992
993impl<'a> Lexer<'a> {
994 pub fn new(source: &'a str) -> Self {
995 Self {
996 inner: Token::lexer(source),
997 peeked: None,
998 }
999 }
1000
1001 pub fn next_token(&mut self) -> Option<(Token, Span)> {
1002 if let Some(peeked) = self.peeked.take() {
1003 return peeked;
1004 }
1005
1006 match self.inner.next() {
1007 Some(Ok(token)) => {
1008 let span = self.inner.span();
1009 Some((token, Span::new(span.start, span.end)))
1010 }
1011 Some(Err(_)) => {
1012 self.next_token()
1014 }
1015 None => None,
1016 }
1017 }
1018
1019 pub fn peek(&mut self) -> Option<&(Token, Span)> {
1020 if self.peeked.is_none() {
1021 self.peeked = Some(self.next_token());
1022 }
1023 self.peeked.as_ref().and_then(|p| p.as_ref())
1024 }
1025
1026 pub fn span(&self) -> Span {
1027 let span = self.inner.span();
1028 Span::new(span.start, span.end)
1029 }
1030}
1031
1032#[cfg(test)]
1033mod tests {
1034 use super::*;
1035
1036 #[test]
1037 fn test_morphemes() {
1038 let mut lexer = Lexer::new("τ φ σ ρ λ Σ Π ⌛");
1039 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1040 assert!(matches!(lexer.next_token(), Some((Token::Phi, _))));
1041 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
1042 assert!(matches!(lexer.next_token(), Some((Token::Rho, _))));
1043 assert!(matches!(lexer.next_token(), Some((Token::Lambda, _))));
1044 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
1045 assert!(matches!(lexer.next_token(), Some((Token::Pi, _))));
1046 assert!(matches!(lexer.next_token(), Some((Token::Hourglass, _))));
1047 }
1048
1049 #[test]
1050 fn test_evidentiality() {
1051 let mut lexer = Lexer::new("value! uncertain? reported~ paradox‽");
1052 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "value"));
1053 assert!(matches!(lexer.next_token(), Some((Token::Bang, _))));
1054 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "uncertain"));
1055 assert!(matches!(lexer.next_token(), Some((Token::Question, _))));
1056 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "reported"));
1057 assert!(matches!(lexer.next_token(), Some((Token::Tilde, _))));
1058 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "paradox"));
1059 assert!(matches!(lexer.next_token(), Some((Token::Interrobang, _))));
1060 }
1061
1062 #[test]
1063 fn test_pipe_chain() {
1064 let mut lexer = Lexer::new("data|τ{f}|φ{p}|σ");
1065 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "data"));
1066 assert!(matches!(lexer.next_token(), Some((Token::Pipe, _))));
1067 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1068 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1069 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1070 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1071 }
1072
1073 #[test]
1074 fn test_numbers() {
1075 let mut lexer = Lexer::new("42 0b1010 0o52 0x2A 0v22 0s42 3.14");
1076 assert!(matches!(lexer.next_token(), Some((Token::IntLit(s), _)) if s == "42"));
1077 assert!(matches!(lexer.next_token(), Some((Token::BinaryLit(s), _)) if s == "0b1010"));
1078 assert!(matches!(lexer.next_token(), Some((Token::OctalLit(s), _)) if s == "0o52"));
1079 assert!(matches!(lexer.next_token(), Some((Token::HexLit(s), _)) if s == "0x2A"));
1080 assert!(matches!(lexer.next_token(), Some((Token::VigesimalLit(s), _)) if s == "0v22"));
1081 assert!(matches!(lexer.next_token(), Some((Token::SexagesimalLit(s), _)) if s == "0s42"));
1082 assert!(matches!(lexer.next_token(), Some((Token::FloatLit(s), _)) if s == "3.14"));
1083 }
1084
1085 #[test]
1086 fn test_incorporation() {
1087 let mut lexer = Lexer::new("file·open·read");
1088 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "file"));
1089 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1090 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "open"));
1091 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1092 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "read"));
1093 }
1094
1095 #[test]
1096 fn test_special_symbols() {
1097 let mut lexer = Lexer::new("∅ ◯ ∞");
1098 assert!(matches!(lexer.next_token(), Some((Token::Empty, _))));
1099 assert!(matches!(lexer.next_token(), Some((Token::Circle, _))));
1100 assert!(matches!(lexer.next_token(), Some((Token::Infinity, _))));
1101 }
1102
1103 #[test]
1104 fn test_quantifiers() {
1105 let mut lexer = Lexer::new("∀x ∃y x∈S y∉T");
1106 assert!(matches!(lexer.next_token(), Some((Token::ForAll, _))));
1107 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1108 assert!(matches!(lexer.next_token(), Some((Token::Exists, _))));
1109 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "y"));
1110 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1111 assert!(matches!(lexer.next_token(), Some((Token::ElementOf, _))));
1112 }
1113
1114 #[test]
1115 fn test_set_operations() {
1116 let mut lexer = Lexer::new("A∪B A∩B A∖B A⊂B A⊆B");
1117 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1118 assert!(matches!(lexer.next_token(), Some((Token::Union, _))));
1119 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "B"));
1120 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1121 assert!(matches!(lexer.next_token(), Some((Token::Intersection, _))));
1122 }
1123
1124 #[test]
1125 fn test_logic_operators() {
1126 let mut lexer = Lexer::new("p∧q p∨q ¬p p⊻q ⊤ ⊥");
1127 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1128 assert!(matches!(lexer.next_token(), Some((Token::LogicAnd, _))));
1129 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "q"));
1130 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1131 assert!(matches!(lexer.next_token(), Some((Token::LogicOr, _))));
1132 }
1133
1134 #[test]
1135 fn test_analysis_operators() {
1136 let mut lexer = Lexer::new("∫f ∂g √x ∛y f∘g");
1137 assert!(matches!(lexer.next_token(), Some((Token::Integral, _))));
1138 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1139 assert!(matches!(lexer.next_token(), Some((Token::Partial, _))));
1140 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "g"));
1141 assert!(matches!(lexer.next_token(), Some((Token::Sqrt, _))));
1142 }
1143
1144 #[test]
1145 fn test_additional_morphemes() {
1146 let mut lexer = Lexer::new("δ ε ω α ζ");
1147 assert!(matches!(lexer.next_token(), Some((Token::Delta, _))));
1148 assert!(matches!(lexer.next_token(), Some((Token::Epsilon, _))));
1149 assert!(matches!(lexer.next_token(), Some((Token::Omega, _))));
1150 assert!(matches!(lexer.next_token(), Some((Token::Alpha, _))));
1151 assert!(matches!(lexer.next_token(), Some((Token::Zeta, _))));
1152 }
1153
1154 #[test]
1155 fn test_ffi_keywords() {
1156 let mut lexer = Lexer::new("extern unsafe");
1157 assert!(matches!(lexer.next_token(), Some((Token::Extern, _))));
1158 assert!(matches!(lexer.next_token(), Some((Token::Unsafe, _))));
1159 }
1160
1161 #[test]
1162 fn test_parallel_morphemes() {
1163 let mut lexer = Lexer::new("∥ parallel ⊛ gpu");
1164 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1165 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1166 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1167 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1168 }
1169
1170 #[test]
1173 fn test_string_escape_sequences() {
1174 let mut lexer = Lexer::new(r#""hello\nworld""#);
1176 match lexer.next_token() {
1177 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\nworld"),
1178 other => panic!("Expected StringLit, got {:?}", other),
1179 }
1180
1181 let mut lexer = Lexer::new(r#""hello\tworld""#);
1183 match lexer.next_token() {
1184 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\tworld"),
1185 other => panic!("Expected StringLit, got {:?}", other),
1186 }
1187
1188 let mut lexer = Lexer::new(r#""hello\rworld""#);
1190 match lexer.next_token() {
1191 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\rworld"),
1192 other => panic!("Expected StringLit, got {:?}", other),
1193 }
1194
1195 let mut lexer = Lexer::new(r#""hello\\world""#);
1197 match lexer.next_token() {
1198 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\\world"),
1199 other => panic!("Expected StringLit, got {:?}", other),
1200 }
1201
1202 let mut lexer = Lexer::new(r#""hello\"world""#);
1204 match lexer.next_token() {
1205 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\"world"),
1206 other => panic!("Expected StringLit, got {:?}", other),
1207 }
1208
1209 let mut lexer = Lexer::new(r#""hello\0world""#);
1211 match lexer.next_token() {
1212 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\0world"),
1213 other => panic!("Expected StringLit, got {:?}", other),
1214 }
1215 }
1216
1217 #[test]
1218 fn test_string_hex_escape() {
1219 let mut lexer = Lexer::new(r#""hello\x41world""#);
1221 match lexer.next_token() {
1222 Some((Token::StringLit(s), _)) => assert_eq!(s, "helloAworld"),
1223 other => panic!("Expected StringLit, got {:?}", other),
1224 }
1225 }
1226
1227 #[test]
1228 fn test_string_unicode_escape() {
1229 let mut lexer = Lexer::new(r#""hello\u{1F600}world""#);
1231 match lexer.next_token() {
1232 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello😀world"),
1233 other => panic!("Expected StringLit, got {:?}", other),
1234 }
1235
1236 let mut lexer = Lexer::new(r#""\u{03C4}""#);
1238 match lexer.next_token() {
1239 Some((Token::StringLit(s), _)) => assert_eq!(s, "τ"),
1240 other => panic!("Expected StringLit, got {:?}", other),
1241 }
1242 }
1243
1244 #[test]
1245 fn test_char_escape_sequences() {
1246 let mut lexer = Lexer::new(r"'\n'");
1247 match lexer.next_token() {
1248 Some((Token::CharLit(c), _)) => assert_eq!(c, '\n'),
1249 other => panic!("Expected CharLit, got {:?}", other),
1250 }
1251
1252 let mut lexer = Lexer::new(r"'\t'");
1253 match lexer.next_token() {
1254 Some((Token::CharLit(c), _)) => assert_eq!(c, '\t'),
1255 other => panic!("Expected CharLit, got {:?}", other),
1256 }
1257
1258 let mut lexer = Lexer::new(r"'\\'");
1259 match lexer.next_token() {
1260 Some((Token::CharLit(c), _)) => assert_eq!(c, '\\'),
1261 other => panic!("Expected CharLit, got {:?}", other),
1262 }
1263 }
1264
1265 #[test]
1266 fn test_raw_string() {
1267 let mut lexer = Lexer::new(r#"r"hello\nworld""#);
1269 match lexer.next_token() {
1270 Some((Token::RawStringLit(s), _)) => assert_eq!(s, r"hello\nworld"),
1271 other => panic!("Expected RawStringLit, got {:?}", other),
1272 }
1273 }
1274
1275 #[test]
1276 fn test_raw_string_delimited() {
1277 let mut lexer = Lexer::new(r##"r#"hello "world""#"##);
1279 match lexer.next_token() {
1280 Some((Token::RawStringDelimited(s), _)) => assert_eq!(s, r#"hello "world""#),
1281 other => panic!("Expected RawStringDelimited, got {:?}", other),
1282 }
1283 }
1284
1285 #[test]
1286 fn test_byte_string() {
1287 let mut lexer = Lexer::new(r#"b"hello""#);
1288 match lexer.next_token() {
1289 Some((Token::ByteStringLit(bytes), _)) => {
1290 assert_eq!(bytes, vec![104, 101, 108, 108, 111]); }
1292 other => panic!("Expected ByteStringLit, got {:?}", other),
1293 }
1294 }
1295
1296 #[test]
1297 fn test_interpolated_string() {
1298 let mut lexer = Lexer::new(r#"f"hello {name}""#);
1299 match lexer.next_token() {
1300 Some((Token::InterpolatedStringLit(s), _)) => assert_eq!(s, "hello {name}"),
1301 other => panic!("Expected InterpolatedStringLit, got {:?}", other),
1302 }
1303 }
1304
1305 #[test]
1306 fn test_sigil_string_sql() {
1307 let mut lexer = Lexer::new(r#"σ"SELECT * FROM {table}""#);
1308 match lexer.next_token() {
1309 Some((Token::SigilStringSql(s), _)) => assert_eq!(s, "SELECT * FROM {table}"),
1310 other => panic!("Expected SigilStringSql, got {:?}", other),
1311 }
1312 }
1313
1314 #[test]
1315 fn test_sigil_string_route() {
1316 let mut lexer = Lexer::new(r#"ρ"/api/v1/{resource}/{id}""#);
1317 match lexer.next_token() {
1318 Some((Token::SigilStringRoute(s), _)) => assert_eq!(s, "/api/v1/{resource}/{id}"),
1319 other => panic!("Expected SigilStringRoute, got {:?}", other),
1320 }
1321 }
1322
1323 #[test]
1324 fn test_unicode_in_strings() {
1325 let mut lexer = Lexer::new(r#""τφσρ 你好 🦀""#);
1327 match lexer.next_token() {
1328 Some((Token::StringLit(s), _)) => assert_eq!(s, "τφσρ 你好 🦀"),
1329 other => panic!("Expected StringLit, got {:?}", other),
1330 }
1331 }
1332
1333 #[test]
1334 fn test_empty_string() {
1335 let mut lexer = Lexer::new(r#""""#);
1336 match lexer.next_token() {
1337 Some((Token::StringLit(s), _)) => assert_eq!(s, ""),
1338 other => panic!("Expected empty StringLit, got {:?}", other),
1339 }
1340 }
1341
1342 #[test]
1343 fn test_escape_sequence_helper() {
1344 assert_eq!(process_escape_sequences(r"hello\nworld"), "hello\nworld");
1346 assert_eq!(process_escape_sequences(r"hello\tworld"), "hello\tworld");
1347 assert_eq!(process_escape_sequences(r"hello\\world"), "hello\\world");
1348 assert_eq!(process_escape_sequences(r#"hello\"world"#), "hello\"world");
1349 assert_eq!(process_escape_sequences(r"hello\x41world"), "helloAworld");
1350 assert_eq!(
1351 process_escape_sequences(r"hello\u{1F600}world"),
1352 "hello😀world"
1353 );
1354 }
1355}