1use logos::Logos;
6use crate::span::Span;
7
8fn process_escape_sequences(s: &str) -> String {
11 let mut result = String::with_capacity(s.len());
12 let mut chars = s.chars().peekable();
13
14 while let Some(c) = chars.next() {
15 if c == '\\' {
16 match chars.next() {
17 Some('n') => result.push('\n'),
18 Some('t') => result.push('\t'),
19 Some('r') => result.push('\r'),
20 Some('\\') => result.push('\\'),
21 Some('"') => result.push('"'),
22 Some('\'') => result.push('\''),
23 Some('0') => result.push('\0'),
24 Some('x') => {
25 let mut hex = String::new();
27 for _ in 0..2 {
28 if let Some(&c) = chars.peek() {
29 if c.is_ascii_hexdigit() {
30 hex.push(chars.next().unwrap());
31 }
32 }
33 }
34 if let Ok(val) = u8::from_str_radix(&hex, 16) {
35 result.push(val as char);
36 }
37 }
38 Some('u') => {
39 if chars.peek() == Some(&'{') {
41 chars.next(); let mut hex = String::new();
43 while let Some(&c) = chars.peek() {
44 if c == '}' {
45 chars.next();
46 break;
47 }
48 if c.is_ascii_hexdigit() {
49 hex.push(chars.next().unwrap());
50 } else {
51 break;
52 }
53 }
54 if let Ok(val) = u32::from_str_radix(&hex, 16) {
55 if let Some(c) = char::from_u32(val) {
56 result.push(c);
57 }
58 }
59 }
60 }
61 Some(other) => {
62 result.push('\\');
64 result.push(other);
65 }
66 None => result.push('\\'),
67 }
68 } else {
69 result.push(c);
70 }
71 }
72 result
73}
74
75fn raw_string_delimited_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
78 let remainder = lex.remainder();
79
80 if let Some(end_pos) = remainder.find("\"#") {
82 let content = &remainder[..end_pos];
83 lex.bump(end_pos + 2);
85 Some(content.to_string())
86 } else {
87 None
88 }
89}
90
91fn multiline_string_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
94 let remainder = lex.remainder();
95
96 if let Some(end_pos) = remainder.find("\"\"\"") {
98 let content = &remainder[..end_pos];
99 lex.bump(end_pos + 3);
101 Some(process_escape_sequences(content))
102 } else {
103 None
105 }
106}
107
108fn process_char_escape(s: &str) -> char {
110 let mut chars = s.chars();
111 match chars.next() {
112 Some('\\') => {
113 match chars.next() {
114 Some('n') => '\n',
115 Some('t') => '\t',
116 Some('r') => '\r',
117 Some('\\') => '\\',
118 Some('"') => '"',
119 Some('\'') => '\'',
120 Some('0') => '\0',
121 Some('x') => {
122 let hex: String = chars.take(2).collect();
123 u8::from_str_radix(&hex, 16).map(|v| v as char).unwrap_or('?')
124 }
125 Some('u') => {
126 if chars.next() == Some('{') {
127 let hex: String = chars.take_while(|&c| c != '}').collect();
128 u32::from_str_radix(&hex, 16)
129 .ok()
130 .and_then(char::from_u32)
131 .unwrap_or('?')
132 } else {
133 '?'
134 }
135 }
136 Some(c) => c,
137 None => '?',
138 }
139 }
140 Some(c) => c,
141 None => '?',
142 }
143}
144
145#[derive(Logos, Debug, Clone, PartialEq)]
147#[logos(skip r"[ \t\r\n\f]+")]
148pub enum Token {
149 #[regex(r"//[^\n]*", |lex| lex.slice().to_string())]
151 LineComment(String),
152
153 #[regex(r"//![^\n]*", |lex| lex.slice().to_string())]
154 DocComment(String),
155
156 #[token("fn")]
158 Fn,
159 #[token("async")]
160 Async,
161 #[token("let")]
162 Let,
163 #[token("mut")]
164 Mut,
165 #[token("const")]
166 Const,
167 #[token("type")]
168 Type,
169 #[token("struct")]
170 Struct,
171 #[token("enum")]
172 Enum,
173 #[token("trait")]
174 Trait,
175 #[token("impl")]
176 Impl,
177 #[token("mod")]
178 Mod,
179 #[token("use")]
180 Use,
181 #[token("pub")]
182 Pub,
183 #[token("actor")]
184 Actor,
185 #[token("saga")]
186 Saga,
187 #[token("scope")]
188 Scope,
189 #[token("rune")]
190 Rune,
191
192 #[token("if")]
194 If,
195 #[token("else")]
196 Else,
197 #[token("match")]
198 Match,
199 #[token("loop")]
200 Loop,
201 #[token("while")]
202 While,
203 #[token("for")]
204 For,
205 #[token("in")]
206 In,
207 #[token("break")]
208 Break,
209 #[token("continue")]
210 Continue,
211 #[token("return")]
212 Return,
213 #[token("yield")]
214 Yield,
215 #[token("await")]
216 Await,
217
218 #[token("self")]
220 SelfLower,
221 #[token("Self")]
222 SelfUpper,
223 #[token("super")]
224 Super,
225 #[token("crate")]
226 Crate,
227 #[token("where")]
228 Where,
229 #[token("as")]
230 As,
231 #[token("dyn")]
232 Dyn,
233 #[token("move")]
234 Move,
235 #[token("ref")]
236 Ref,
237 #[token("static")]
238 Static,
239 #[token("unsafe")]
240 Unsafe,
241 #[token("extern")]
242 Extern,
243 #[token("asm")]
244 Asm,
245 #[token("volatile")]
246 Volatile,
247 #[token("naked")]
248 Naked,
249 #[token("packed")]
250 Packed,
251 #[token("simd")]
252 Simd,
253 #[token("atomic")]
254 Atomic,
255 #[token("derive")]
256 Derive,
257 #[token("on")]
258 On,
259
260 #[token("true")]
262 True,
263 #[token("false")]
264 False,
265
266 #[token("null")]
268 Null,
269
270 #[token("τ")]
272 #[token("Τ")]
273 Tau, #[token("φ")]
276 #[token("Φ")]
277 Phi, #[token("σ")]
280 #[token("Σ")]
281 Sigma, #[token("ρ")]
284 #[token("Ρ")]
285 Rho, #[token("λ")]
288 #[token("Λ")]
289 Lambda, #[token("Π")]
292 Pi, #[token("⌛")]
295 Hourglass, #[token("δ")]
299 #[token("Δ")]
300 Delta, #[token("ε")]
303 Epsilon, #[token("ω")]
306 #[token("Ω")]
307 Omega, #[token("α")]
310 Alpha, #[token("ζ")]
313 Zeta, #[token("μ")]
317 #[token("Μ")]
318 Mu, #[token("χ")]
321 #[token("Χ")]
322 Chi, #[token("ν")]
325 #[token("Ν")]
326 Nu, #[token("ξ")]
329 #[token("Ξ")]
330 Xi, #[token("∥")]
334 #[token("parallel")]
335 Parallel, #[token("⊛")]
338 #[token("gpu")]
339 Gpu, #[token("∀")]
343 ForAll, #[token("∃")]
346 Exists, #[token("∈")]
349 ElementOf, #[token("∉")]
352 NotElementOf, #[token("∪")]
356 Union, #[token("∩")]
359 Intersection, #[token("∖")]
362 SetMinus, #[token("⊂")]
365 Subset, #[token("⊆")]
368 SubsetEq, #[token("⊃")]
371 Superset, #[token("⊇")]
374 SupersetEq, #[token("∧")]
378 LogicAnd, #[token("∨")]
381 LogicOr, #[token("¬")]
384 LogicNot, #[token("⊻")]
387 LogicXor, #[token("⊤")]
390 Top, #[token("⊥")]
393 Bottom, #[token("⋏")]
397 BitwiseAndSymbol, #[token("⋎")]
400 BitwiseOrSymbol, #[token("∷")]
404 TypeAnnotation, #[token("∫")]
408 Integral, #[token("∂")]
411 Partial, #[token("√")]
414 Sqrt, #[token("∛")]
417 Cbrt, #[token("∘")]
421 Compose, #[token("⊗")]
424 Tensor, #[token("⊕")]
427 DirectSum, #[token("⋈")]
431 Bowtie, #[token("⋳")]
434 ElementSmallVerticalBar, #[token("⊔")]
437 SquareCup, #[token("⊓")]
440 SquareCap, #[token("‽")]
445 Interrobang, #[token("⊖")]
450 AffectNegative, #[token("⊜")]
453 AffectNeutral, #[token("⸮")]
459 IronyMark, #[token("↑")]
463 IntensityUp, #[token("↓")]
466 IntensityDown, #[token("⇈")]
469 IntensityMax, #[token("♔")]
473 FormalRegister, #[token("♟")]
476 InformalRegister, #[token("☺")]
480 EmotionJoy, #[token("☹")]
483 EmotionSadness, #[token("⚡")]
486 EmotionAnger, #[token("❄")]
489 EmotionFear, #[token("✦")]
492 EmotionSurprise, #[token("♡")]
495 EmotionLove, #[token("◉")]
499 ConfidenceHigh, #[token("◎")]
502 ConfidenceMedium, #[token("○")]
505 ConfidenceLow, #[token("·ing")]
509 AspectProgressive, #[token("·ed")]
512 AspectPerfective, #[token("·able")]
515 AspectPotential, #[token("·ive")]
518 AspectResultative, #[token("|")]
522 Pipe,
523 #[token("·")]
524 MiddleDot, #[token("->")]
526 Arrow,
527 #[token("=>")]
528 FatArrow,
529 #[token("<-")]
530 LeftArrow,
531 #[token("==")]
532 EqEq,
533 #[token("!=")]
534 NotEq,
535 #[token("<=")]
536 LtEq,
537 #[token(">=")]
538 GtEq,
539 #[token("<")]
540 Lt,
541 #[token(">")]
542 Gt,
543 #[token("+")]
544 Plus,
545 #[token("-")]
546 Minus,
547 #[token("*")]
548 Star,
549 #[token("/")]
550 Slash,
551 #[token("%")]
552 Percent,
553 #[token("**")]
554 StarStar, #[token("&&")]
556 AndAnd,
557 #[token("||")]
558 OrOr,
559 #[token("!")]
560 Bang, #[token("?")]
562 Question, #[token("~")]
564 Tilde, #[token("&")]
566 Amp,
567 #[token("^")]
568 Caret,
569 #[token("<<")]
570 Shl,
571 #[token(">>")]
572 Shr,
573 #[token("=")]
574 Eq,
575 #[token("+=")]
576 PlusEq,
577 #[token("-=")]
578 MinusEq,
579 #[token("*=")]
580 StarEq,
581 #[token("/=")]
582 SlashEq,
583 #[token("..")]
584 DotDot,
585 #[token("..=")]
586 DotDotEq,
587 #[token("++")]
588 PlusPlus, #[token("::")]
590 ColonColon,
591 #[token(":")]
592 Colon,
593 #[token(";")]
594 Semi,
595 #[token(",")]
596 Comma,
597 #[token(".")]
598 Dot,
599 #[token("@")]
600 At,
601 #[token("#!")]
602 HashBang, #[token("#")]
604 Hash,
605 #[token("_", priority = 3)]
606 Underscore,
607
608 #[token("(")]
610 LParen,
611 #[token(")")]
612 RParen,
613 #[token("{")]
614 LBrace,
615 #[token("}")]
616 RBrace,
617 #[token("[")]
618 LBracket,
619 #[token("]")]
620 RBracket,
621
622 #[token("∅")]
624 Empty, #[token("◯")]
626 Circle, #[token("∞")]
628 Infinity, #[token("⇒")]
632 ProtoSend, #[token("⇐")]
635 ProtoRecv, #[token("≋")]
638 ProtoStream, #[token("⊸")]
641 ProtoConnect, #[token("⏱")]
644 ProtoTimeout, #[token("send")]
650 Send,
651 #[token("recv")]
652 Recv,
653 #[token("stream")]
654 Stream,
655 #[token("connect")]
656 Connect,
657 #[token("close")]
658 Close,
659 #[token("timeout")]
660 Timeout,
661 #[token("retry")]
662 Retry,
663 #[token("header")]
664 Header,
665 #[token("body")]
666 Body,
667
668 #[token("http")]
670 Http,
671 #[token("https")]
672 Https,
673 #[token("ws")]
674 Ws,
675 #[token("wss")]
676 Wss,
677 #[token("grpc")]
678 Grpc,
679 #[token("kafka")]
680 Kafka,
681 #[token("amqp")]
682 Amqp,
683 #[token("graphql")]
684 GraphQL,
685
686 #[regex(r"0b[01_]+", |lex| lex.slice().to_string())]
689 BinaryLit(String),
690
691 #[regex(r"0o[0-7_]+", |lex| lex.slice().to_string())]
693 OctalLit(String),
694
695 #[regex(r"0x[0-9a-fA-F_]+", |lex| lex.slice().to_string())]
697 HexLit(String),
698
699 #[regex(r"0v[0-9a-jA-J_]+", |lex| lex.slice().to_string())]
701 VigesimalLit(String),
702
703 #[regex(r"0s[0-9a-zA-Z_]+", |lex| lex.slice().to_string())]
705 SexagesimalLit(String),
706
707 #[regex(r"0z[0-9a-bA-B_]+", |lex| lex.slice().to_string())]
709 DuodecimalLit(String),
710
711 #[regex(r"[0-9][0-9_]*\.[0-9][0-9_]*([eE][+-]?[0-9_]+)?", |lex| lex.slice().to_string())]
713 FloatLit(String),
714
715 #[regex(r"[0-9][0-9_]*", |lex| lex.slice().to_string())]
717 IntLit(String),
718
719 #[regex(r#""([^"\\]|\\.)*""#, |lex| {
722 let s = lex.slice();
723 let inner = &s[1..s.len()-1];
724 process_escape_sequences(inner)
725 })]
726 StringLit(String),
727
728 #[token(r#"""""#, multiline_string_callback)]
730 MultiLineStringLit(String),
731
732 #[regex(r#"b"([^"\\]|\\.)*""#, |lex| {
734 let s = lex.slice();
735 let inner = &s[2..s.len()-1];
736 inner.as_bytes().to_vec()
737 })]
738 ByteStringLit(Vec<u8>),
739
740 #[regex(r#"f"([^"\\]|\\.)*""#, |lex| {
742 let s = lex.slice();
743 let inner = &s[2..s.len()-1];
744 process_escape_sequences(inner)
745 })]
746 InterpolatedStringLit(String),
747
748 #[regex(r#"σ"([^"\\]|\\.)*""#, |lex| {
750 let s = lex.slice();
751 let start = "σ".len() + 1; let inner = &s[start..s.len()-1];
754 process_escape_sequences(inner)
755 })]
756 SigilStringSql(String),
757
758 #[regex(r#"ρ"([^"\\]|\\.)*""#, |lex| {
760 let s = lex.slice();
761 let start = "ρ".len() + 1; let inner = &s[start..s.len()-1];
764 process_escape_sequences(inner)
765 })]
766 SigilStringRoute(String),
767
768 #[regex(r"'([^'\\]|\\.)'", |lex| {
770 let s = lex.slice();
771 let inner = &s[1..s.len()-1];
772 process_char_escape(inner)
773 })]
774 CharLit(char),
775
776 #[regex(r#"r"[^"]*""#, |lex| {
778 let s = lex.slice();
779 s[2..s.len()-1].to_string()
780 })]
781 RawStringLit(String),
782
783 #[token(r##"r#""##, raw_string_delimited_callback)]
785 RawStringDelimited(String),
786
787 #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
789 Ident(String),
790
791 #[regex(r"//@\s*rune:\s*[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
793 RuneAnnotation(String),
794}
795
796impl Token {
797 pub fn is_keyword(&self) -> bool {
798 matches!(
799 self,
800 Token::Fn | Token::Async | Token::Let | Token::Mut | Token::Const |
801 Token::Type | Token::Struct | Token::Enum | Token::Trait | Token::Impl |
802 Token::Mod | Token::Use | Token::Pub | Token::Actor | Token::Saga |
803 Token::Scope | Token::Rune | Token::If | Token::Else | Token::Match |
804 Token::Loop | Token::While | Token::For | Token::In | Token::Break |
805 Token::Continue | Token::Return | Token::Yield | Token::Await
806 )
807 }
808
809 pub fn is_morpheme(&self) -> bool {
810 matches!(
811 self,
812 Token::Tau | Token::Phi | Token::Sigma | Token::Rho |
813 Token::Lambda | Token::Pi | Token::Hourglass |
814 Token::Delta | Token::Epsilon | Token::Omega | Token::Alpha | Token::Zeta |
815 Token::Mu | Token::Chi | Token::Nu | Token::Xi | Token::Parallel | Token::Gpu | Token::Integral | Token::Partial | Token::Sqrt | Token::Cbrt |
818 Token::Compose
819 )
820 }
821
822 pub fn is_aspect(&self) -> bool {
823 matches!(
824 self,
825 Token::AspectProgressive | Token::AspectPerfective |
826 Token::AspectPotential | Token::AspectResultative
827 )
828 }
829
830 pub fn is_data_op(&self) -> bool {
831 matches!(
832 self,
833 Token::Bowtie | Token::ElementSmallVerticalBar |
834 Token::SquareCup | Token::SquareCap
835 )
836 }
837
838 pub fn is_bitwise_symbol(&self) -> bool {
839 matches!(
840 self,
841 Token::BitwiseAndSymbol | Token::BitwiseOrSymbol
842 )
843 }
844
845 pub fn is_quantifier(&self) -> bool {
846 matches!(
847 self,
848 Token::ForAll | Token::Exists | Token::ElementOf | Token::NotElementOf
849 )
850 }
851
852 pub fn is_set_op(&self) -> bool {
853 matches!(
854 self,
855 Token::Union | Token::Intersection | Token::SetMinus |
856 Token::Subset | Token::SubsetEq | Token::Superset | Token::SupersetEq
857 )
858 }
859
860 pub fn is_logic_op(&self) -> bool {
861 matches!(
862 self,
863 Token::LogicAnd | Token::LogicOr | Token::LogicNot | Token::LogicXor |
864 Token::Top | Token::Bottom
865 )
866 }
867
868 pub fn is_evidentiality(&self) -> bool {
869 matches!(
870 self,
871 Token::Bang | Token::Question | Token::Tilde | Token::Interrobang
872 )
873 }
874
875 pub fn is_affective(&self) -> bool {
876 matches!(
877 self,
878 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral | Token::IronyMark | Token::IntensityUp | Token::IntensityDown | Token::IntensityMax | Token::FormalRegister | Token::InformalRegister | Token::EmotionJoy | Token::EmotionSadness | Token::EmotionAnger | Token::EmotionFear | Token::EmotionSurprise | Token::EmotionLove | Token::ConfidenceHigh | Token::ConfidenceMedium | Token::ConfidenceLow )
903 }
904
905 pub fn is_sentiment(&self) -> bool {
906 matches!(
907 self,
908 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral
909 )
910 }
911
912 pub fn is_emotion(&self) -> bool {
913 matches!(
914 self,
915 Token::EmotionJoy | Token::EmotionSadness | Token::EmotionAnger |
916 Token::EmotionFear | Token::EmotionSurprise | Token::EmotionLove
917 )
918 }
919
920 pub fn is_intensity(&self) -> bool {
921 matches!(
922 self,
923 Token::IntensityUp | Token::IntensityDown | Token::IntensityMax
924 )
925 }
926}
927
928pub struct Lexer<'a> {
930 inner: logos::Lexer<'a, Token>,
931 peeked: Option<Option<(Token, Span)>>,
932}
933
934impl<'a> Lexer<'a> {
935 pub fn new(source: &'a str) -> Self {
936 Self {
937 inner: Token::lexer(source),
938 peeked: None,
939 }
940 }
941
942 pub fn next_token(&mut self) -> Option<(Token, Span)> {
943 if let Some(peeked) = self.peeked.take() {
944 return peeked;
945 }
946
947 match self.inner.next() {
948 Some(Ok(token)) => {
949 let span = self.inner.span();
950 Some((token, Span::new(span.start, span.end)))
951 }
952 Some(Err(_)) => {
953 self.next_token()
955 }
956 None => None,
957 }
958 }
959
960 pub fn peek(&mut self) -> Option<&(Token, Span)> {
961 if self.peeked.is_none() {
962 self.peeked = Some(self.next_token());
963 }
964 self.peeked.as_ref().and_then(|p| p.as_ref())
965 }
966
967 pub fn span(&self) -> Span {
968 let span = self.inner.span();
969 Span::new(span.start, span.end)
970 }
971}
972
973#[cfg(test)]
974mod tests {
975 use super::*;
976
977 #[test]
978 fn test_morphemes() {
979 let mut lexer = Lexer::new("τ φ σ ρ λ Σ Π ⌛");
980 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
981 assert!(matches!(lexer.next_token(), Some((Token::Phi, _))));
982 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
983 assert!(matches!(lexer.next_token(), Some((Token::Rho, _))));
984 assert!(matches!(lexer.next_token(), Some((Token::Lambda, _))));
985 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
986 assert!(matches!(lexer.next_token(), Some((Token::Pi, _))));
987 assert!(matches!(lexer.next_token(), Some((Token::Hourglass, _))));
988 }
989
990 #[test]
991 fn test_evidentiality() {
992 let mut lexer = Lexer::new("value! uncertain? reported~ paradox‽");
993 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "value"));
994 assert!(matches!(lexer.next_token(), Some((Token::Bang, _))));
995 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "uncertain"));
996 assert!(matches!(lexer.next_token(), Some((Token::Question, _))));
997 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "reported"));
998 assert!(matches!(lexer.next_token(), Some((Token::Tilde, _))));
999 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "paradox"));
1000 assert!(matches!(lexer.next_token(), Some((Token::Interrobang, _))));
1001 }
1002
1003 #[test]
1004 fn test_pipe_chain() {
1005 let mut lexer = Lexer::new("data|τ{f}|φ{p}|σ");
1006 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "data"));
1007 assert!(matches!(lexer.next_token(), Some((Token::Pipe, _))));
1008 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1009 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1010 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1011 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1012 }
1013
1014 #[test]
1015 fn test_numbers() {
1016 let mut lexer = Lexer::new("42 0b1010 0o52 0x2A 0v22 0s42 3.14");
1017 assert!(matches!(lexer.next_token(), Some((Token::IntLit(s), _)) if s == "42"));
1018 assert!(matches!(lexer.next_token(), Some((Token::BinaryLit(s), _)) if s == "0b1010"));
1019 assert!(matches!(lexer.next_token(), Some((Token::OctalLit(s), _)) if s == "0o52"));
1020 assert!(matches!(lexer.next_token(), Some((Token::HexLit(s), _)) if s == "0x2A"));
1021 assert!(matches!(lexer.next_token(), Some((Token::VigesimalLit(s), _)) if s == "0v22"));
1022 assert!(matches!(lexer.next_token(), Some((Token::SexagesimalLit(s), _)) if s == "0s42"));
1023 assert!(matches!(lexer.next_token(), Some((Token::FloatLit(s), _)) if s == "3.14"));
1024 }
1025
1026 #[test]
1027 fn test_incorporation() {
1028 let mut lexer = Lexer::new("file·open·read");
1029 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "file"));
1030 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1031 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "open"));
1032 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1033 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "read"));
1034 }
1035
1036 #[test]
1037 fn test_special_symbols() {
1038 let mut lexer = Lexer::new("∅ ◯ ∞");
1039 assert!(matches!(lexer.next_token(), Some((Token::Empty, _))));
1040 assert!(matches!(lexer.next_token(), Some((Token::Circle, _))));
1041 assert!(matches!(lexer.next_token(), Some((Token::Infinity, _))));
1042 }
1043
1044 #[test]
1045 fn test_quantifiers() {
1046 let mut lexer = Lexer::new("∀x ∃y x∈S y∉T");
1047 assert!(matches!(lexer.next_token(), Some((Token::ForAll, _))));
1048 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1049 assert!(matches!(lexer.next_token(), Some((Token::Exists, _))));
1050 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "y"));
1051 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1052 assert!(matches!(lexer.next_token(), Some((Token::ElementOf, _))));
1053 }
1054
1055 #[test]
1056 fn test_set_operations() {
1057 let mut lexer = Lexer::new("A∪B A∩B A∖B A⊂B A⊆B");
1058 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1059 assert!(matches!(lexer.next_token(), Some((Token::Union, _))));
1060 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "B"));
1061 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1062 assert!(matches!(lexer.next_token(), Some((Token::Intersection, _))));
1063 }
1064
1065 #[test]
1066 fn test_logic_operators() {
1067 let mut lexer = Lexer::new("p∧q p∨q ¬p p⊻q ⊤ ⊥");
1068 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1069 assert!(matches!(lexer.next_token(), Some((Token::LogicAnd, _))));
1070 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "q"));
1071 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1072 assert!(matches!(lexer.next_token(), Some((Token::LogicOr, _))));
1073 }
1074
1075 #[test]
1076 fn test_analysis_operators() {
1077 let mut lexer = Lexer::new("∫f ∂g √x ∛y f∘g");
1078 assert!(matches!(lexer.next_token(), Some((Token::Integral, _))));
1079 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1080 assert!(matches!(lexer.next_token(), Some((Token::Partial, _))));
1081 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "g"));
1082 assert!(matches!(lexer.next_token(), Some((Token::Sqrt, _))));
1083 }
1084
1085 #[test]
1086 fn test_additional_morphemes() {
1087 let mut lexer = Lexer::new("δ ε ω α ζ");
1088 assert!(matches!(lexer.next_token(), Some((Token::Delta, _))));
1089 assert!(matches!(lexer.next_token(), Some((Token::Epsilon, _))));
1090 assert!(matches!(lexer.next_token(), Some((Token::Omega, _))));
1091 assert!(matches!(lexer.next_token(), Some((Token::Alpha, _))));
1092 assert!(matches!(lexer.next_token(), Some((Token::Zeta, _))));
1093 }
1094
1095 #[test]
1096 fn test_ffi_keywords() {
1097 let mut lexer = Lexer::new("extern unsafe");
1098 assert!(matches!(lexer.next_token(), Some((Token::Extern, _))));
1099 assert!(matches!(lexer.next_token(), Some((Token::Unsafe, _))));
1100 }
1101
1102 #[test]
1103 fn test_parallel_morphemes() {
1104 let mut lexer = Lexer::new("∥ parallel ⊛ gpu");
1105 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1106 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1107 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1108 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1109 }
1110
1111 #[test]
1114 fn test_string_escape_sequences() {
1115 let mut lexer = Lexer::new(r#""hello\nworld""#);
1117 match lexer.next_token() {
1118 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\nworld"),
1119 other => panic!("Expected StringLit, got {:?}", other),
1120 }
1121
1122 let mut lexer = Lexer::new(r#""hello\tworld""#);
1124 match lexer.next_token() {
1125 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\tworld"),
1126 other => panic!("Expected StringLit, got {:?}", other),
1127 }
1128
1129 let mut lexer = Lexer::new(r#""hello\rworld""#);
1131 match lexer.next_token() {
1132 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\rworld"),
1133 other => panic!("Expected StringLit, got {:?}", other),
1134 }
1135
1136 let mut lexer = Lexer::new(r#""hello\\world""#);
1138 match lexer.next_token() {
1139 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\\world"),
1140 other => panic!("Expected StringLit, got {:?}", other),
1141 }
1142
1143 let mut lexer = Lexer::new(r#""hello\"world""#);
1145 match lexer.next_token() {
1146 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\"world"),
1147 other => panic!("Expected StringLit, got {:?}", other),
1148 }
1149
1150 let mut lexer = Lexer::new(r#""hello\0world""#);
1152 match lexer.next_token() {
1153 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\0world"),
1154 other => panic!("Expected StringLit, got {:?}", other),
1155 }
1156 }
1157
1158 #[test]
1159 fn test_string_hex_escape() {
1160 let mut lexer = Lexer::new(r#""hello\x41world""#);
1162 match lexer.next_token() {
1163 Some((Token::StringLit(s), _)) => assert_eq!(s, "helloAworld"),
1164 other => panic!("Expected StringLit, got {:?}", other),
1165 }
1166 }
1167
1168 #[test]
1169 fn test_string_unicode_escape() {
1170 let mut lexer = Lexer::new(r#""hello\u{1F600}world""#);
1172 match lexer.next_token() {
1173 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello😀world"),
1174 other => panic!("Expected StringLit, got {:?}", other),
1175 }
1176
1177 let mut lexer = Lexer::new(r#""\u{03C4}""#);
1179 match lexer.next_token() {
1180 Some((Token::StringLit(s), _)) => assert_eq!(s, "τ"),
1181 other => panic!("Expected StringLit, got {:?}", other),
1182 }
1183 }
1184
1185 #[test]
1186 fn test_char_escape_sequences() {
1187 let mut lexer = Lexer::new(r"'\n'");
1188 match lexer.next_token() {
1189 Some((Token::CharLit(c), _)) => assert_eq!(c, '\n'),
1190 other => panic!("Expected CharLit, got {:?}", other),
1191 }
1192
1193 let mut lexer = Lexer::new(r"'\t'");
1194 match lexer.next_token() {
1195 Some((Token::CharLit(c), _)) => assert_eq!(c, '\t'),
1196 other => panic!("Expected CharLit, got {:?}", other),
1197 }
1198
1199 let mut lexer = Lexer::new(r"'\\'");
1200 match lexer.next_token() {
1201 Some((Token::CharLit(c), _)) => assert_eq!(c, '\\'),
1202 other => panic!("Expected CharLit, got {:?}", other),
1203 }
1204 }
1205
1206 #[test]
1207 fn test_raw_string() {
1208 let mut lexer = Lexer::new(r#"r"hello\nworld""#);
1210 match lexer.next_token() {
1211 Some((Token::RawStringLit(s), _)) => assert_eq!(s, r"hello\nworld"),
1212 other => panic!("Expected RawStringLit, got {:?}", other),
1213 }
1214 }
1215
1216 #[test]
1217 fn test_raw_string_delimited() {
1218 let mut lexer = Lexer::new(r##"r#"hello "world""#"##);
1220 match lexer.next_token() {
1221 Some((Token::RawStringDelimited(s), _)) => assert_eq!(s, r#"hello "world""#),
1222 other => panic!("Expected RawStringDelimited, got {:?}", other),
1223 }
1224 }
1225
1226 #[test]
1227 fn test_byte_string() {
1228 let mut lexer = Lexer::new(r#"b"hello""#);
1229 match lexer.next_token() {
1230 Some((Token::ByteStringLit(bytes), _)) => {
1231 assert_eq!(bytes, vec![104, 101, 108, 108, 111]); }
1233 other => panic!("Expected ByteStringLit, got {:?}", other),
1234 }
1235 }
1236
1237 #[test]
1238 fn test_interpolated_string() {
1239 let mut lexer = Lexer::new(r#"f"hello {name}""#);
1240 match lexer.next_token() {
1241 Some((Token::InterpolatedStringLit(s), _)) => assert_eq!(s, "hello {name}"),
1242 other => panic!("Expected InterpolatedStringLit, got {:?}", other),
1243 }
1244 }
1245
1246 #[test]
1247 fn test_sigil_string_sql() {
1248 let mut lexer = Lexer::new(r#"σ"SELECT * FROM {table}""#);
1249 match lexer.next_token() {
1250 Some((Token::SigilStringSql(s), _)) => assert_eq!(s, "SELECT * FROM {table}"),
1251 other => panic!("Expected SigilStringSql, got {:?}", other),
1252 }
1253 }
1254
1255 #[test]
1256 fn test_sigil_string_route() {
1257 let mut lexer = Lexer::new(r#"ρ"/api/v1/{resource}/{id}""#);
1258 match lexer.next_token() {
1259 Some((Token::SigilStringRoute(s), _)) => assert_eq!(s, "/api/v1/{resource}/{id}"),
1260 other => panic!("Expected SigilStringRoute, got {:?}", other),
1261 }
1262 }
1263
1264 #[test]
1265 fn test_unicode_in_strings() {
1266 let mut lexer = Lexer::new(r#""τφσρ 你好 🦀""#);
1268 match lexer.next_token() {
1269 Some((Token::StringLit(s), _)) => assert_eq!(s, "τφσρ 你好 🦀"),
1270 other => panic!("Expected StringLit, got {:?}", other),
1271 }
1272 }
1273
1274 #[test]
1275 fn test_empty_string() {
1276 let mut lexer = Lexer::new(r#""""#);
1277 match lexer.next_token() {
1278 Some((Token::StringLit(s), _)) => assert_eq!(s, ""),
1279 other => panic!("Expected empty StringLit, got {:?}", other),
1280 }
1281 }
1282
1283 #[test]
1284 fn test_escape_sequence_helper() {
1285 assert_eq!(process_escape_sequences(r"hello\nworld"), "hello\nworld");
1287 assert_eq!(process_escape_sequences(r"hello\tworld"), "hello\tworld");
1288 assert_eq!(process_escape_sequences(r"hello\\world"), "hello\\world");
1289 assert_eq!(process_escape_sequences(r#"hello\"world"#), "hello\"world");
1290 assert_eq!(process_escape_sequences(r"hello\x41world"), "helloAworld");
1291 assert_eq!(process_escape_sequences(r"hello\u{1F600}world"), "hello😀world");
1292 }
1293}