1use crate::span::Span;
6use logos::Logos;
7
8fn process_escape_sequences(s: &str) -> String {
12 let mut result = String::with_capacity(s.len());
13 let mut chars = s.chars().peekable();
14
15 while let Some(c) = chars.next() {
16 if c == '\\' {
17 match chars.next() {
18 Some('\n') => {
19 while let Some(&c) = chars.peek() {
21 if c == ' ' || c == '\t' {
22 chars.next();
23 } else {
24 break;
25 }
26 }
27 }
28 Some('n') => result.push('\n'),
29 Some('t') => result.push('\t'),
30 Some('r') => result.push('\r'),
31 Some('\\') => result.push('\\'),
32 Some('"') => result.push('"'),
33 Some('\'') => result.push('\''),
34 Some('0') => result.push('\0'),
35 Some('x') => {
36 let mut hex = String::new();
38 for _ in 0..2 {
39 if let Some(&c) = chars.peek() {
40 if c.is_ascii_hexdigit() {
41 hex.push(chars.next().unwrap());
42 }
43 }
44 }
45 if let Ok(val) = u8::from_str_radix(&hex, 16) {
46 result.push(val as char);
47 }
48 }
49 Some('u') => {
50 if chars.peek() == Some(&'{') {
52 chars.next(); let mut hex = String::new();
54 while let Some(&c) = chars.peek() {
55 if c == '}' {
56 chars.next();
57 break;
58 }
59 if c.is_ascii_hexdigit() {
60 hex.push(chars.next().unwrap());
61 } else {
62 break;
63 }
64 }
65 if let Ok(val) = u32::from_str_radix(&hex, 16) {
66 if let Some(c) = char::from_u32(val) {
67 result.push(c);
68 }
69 }
70 }
71 }
72 Some(other) => {
73 result.push('\\');
75 result.push(other);
76 }
77 None => result.push('\\'),
78 }
79 } else {
80 result.push(c);
81 }
82 }
83 result
84}
85
86fn process_byte_escape_sequences(s: &str) -> Vec<u8> {
88 let mut result = Vec::with_capacity(s.len());
89 let mut chars = s.chars().peekable();
90
91 while let Some(c) = chars.next() {
92 if c == '\\' {
93 match chars.next() {
94 Some('\n') => {
95 while let Some(&c) = chars.peek() {
97 if c == ' ' || c == '\t' {
98 chars.next();
99 } else {
100 break;
101 }
102 }
103 }
104 Some('n') => result.push(b'\n'),
105 Some('t') => result.push(b'\t'),
106 Some('r') => result.push(b'\r'),
107 Some('\\') => result.push(b'\\'),
108 Some('"') => result.push(b'"'),
109 Some('\'') => result.push(b'\''),
110 Some('0') => result.push(0),
111 Some('x') => {
112 let mut hex = String::new();
114 for _ in 0..2 {
115 if let Some(&c) = chars.peek() {
116 if c.is_ascii_hexdigit() {
117 hex.push(chars.next().unwrap());
118 }
119 }
120 }
121 if let Ok(val) = u8::from_str_radix(&hex, 16) {
122 result.push(val);
123 }
124 }
125 Some(other) => {
126 result.push(b'\\');
128 if other.is_ascii() {
129 result.push(other as u8);
130 }
131 }
132 None => result.push(b'\\'),
133 }
134 } else if c.is_ascii() {
135 result.push(c as u8);
136 }
137 }
139 result
140}
141
142fn block_comment_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
147 let remainder = lex.remainder();
148
149 if let Some(end_pos) = remainder.find("*/") {
151 let content = &remainder[..end_pos];
152 lex.bump(end_pos + 2);
154 Some(content.to_string())
155 } else {
156 let len = remainder.len();
158 lex.bump(len);
159 Some(remainder.to_string())
160 }
161}
162
163fn raw_string_delimited_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
164 let remainder = lex.remainder();
165
166 if let Some(end_pos) = remainder.find("\"#") {
168 let content = &remainder[..end_pos];
169 lex.bump(end_pos + 2);
171 Some(content.to_string())
172 } else {
173 None
174 }
175}
176
177fn multiline_string_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
180 let remainder = lex.remainder();
181
182 if let Some(end_pos) = remainder.find("\"\"\"") {
184 let content = &remainder[..end_pos];
185 lex.bump(end_pos + 3);
187 Some(process_escape_sequences(content))
188 } else {
189 None
191 }
192}
193
194fn process_char_escape(s: &str) -> char {
196 let mut chars = s.chars();
197 match chars.next() {
198 Some('\\') => match chars.next() {
199 Some('n') => '\n',
200 Some('t') => '\t',
201 Some('r') => '\r',
202 Some('\\') => '\\',
203 Some('"') => '"',
204 Some('\'') => '\'',
205 Some('0') => '\0',
206 Some('x') => {
207 let hex: String = chars.take(2).collect();
208 u8::from_str_radix(&hex, 16)
209 .map(|v| v as char)
210 .unwrap_or('?')
211 }
212 Some('u') => {
213 if chars.next() == Some('{') {
214 let hex: String = chars.take_while(|&c| c != '}').collect();
215 u32::from_str_radix(&hex, 16)
216 .ok()
217 .and_then(char::from_u32)
218 .unwrap_or('?')
219 } else {
220 '?'
221 }
222 }
223 Some(c) => c,
224 None => '?',
225 },
226 Some(c) => c,
227 None => '?',
228 }
229}
230
231fn process_byte_char_escape(s: &str) -> u8 {
233 let mut chars = s.chars();
234 match chars.next() {
235 Some('\\') => match chars.next() {
236 Some('n') => b'\n',
237 Some('t') => b'\t',
238 Some('r') => b'\r',
239 Some('\\') => b'\\',
240 Some('"') => b'"',
241 Some('\'') => b'\'',
242 Some('0') => b'\0',
243 Some('x') => {
244 let hex: String = chars.take(2).collect();
245 u8::from_str_radix(&hex, 16).unwrap_or(b'?')
246 }
247 Some(c) => c as u8,
248 None => b'?',
249 },
250 Some(c) => c as u8,
251 None => b'?',
252 }
253}
254
255#[derive(Logos, Debug, Clone, PartialEq)]
257#[logos(skip r"[ \t\r\n\f]+")]
258pub enum Token {
259 #[regex(r"//[^\n]*", |lex| lex.slice().to_string())]
261 LineComment(String),
262
263 #[regex(r"//![^\n]*", |lex| lex.slice().to_string())]
264 DocComment(String),
265
266 #[regex(r"~~[^\n]*", |lex| lex.slice().to_string())]
268 TildeComment(String),
269
270 #[token("/*", block_comment_callback)]
272 BlockComment(String),
273
274 #[token("rite")] Fn,
278 #[token("async")]
279 Async,
280 #[token("≔")] Let,
282 #[token("vary")] Mut,
285 #[token("◆")] Const,
287 #[token("linear")]
288 Linear,
289 #[token("type")]
290 Type,
291 #[token("sigil")] Struct,
294 #[token("ᛈ")] Enum,
296 #[token("aspect")] Trait,
299 #[token("⊢")] Impl,
301 #[token("scroll")] Mod,
303 #[token("invoke")] Use,
305 #[token("☉")] Pub,
307 #[token("actor")]
308 Actor,
309 #[token("saga")]
310 Saga,
311 #[token("scope")]
312 Scope,
313 #[token("rune")]
314 Rune,
315 #[token("macro")]
316 Macro,
317 #[token("macro_rules")]
318 MacroRules,
319
320 #[token("⎇")] If,
323 #[token("⎉")] Else,
325 #[token("⌥")] Match,
327 #[token("forever")] Loop,
330 #[token("⟳")] While,
332 #[token("each")] For,
335 #[token("of")] In,
338 #[token("⊲")] Break,
340 #[token("⊳")] Continue,
342 #[token("⤺")] Return,
344 #[token("yield")]
345 Yield,
346 #[token("await")]
347 Await,
348
349 #[token("this")] SelfLower,
353 #[token("This")] SelfUpper,
355 #[token("above")] Super,
358 #[token("tome")] Crate,
360 #[token("∋")] Where,
362 #[token("as")] As,
364 #[token("dyn")]
365 Dyn,
366 #[token("move")]
367 Move,
368 #[token("ref")]
369 Ref,
370 #[token("static")]
371 Static,
372 #[token("unsafe")]
373 Unsafe,
374 #[token("extern")]
375 Extern,
376 #[token("asm")]
377 Asm,
378 #[token("volatile")]
379 Volatile,
380 #[token("naked")]
381 Naked,
382 #[token("packed")]
383 Packed,
384 #[token("simd")]
385 Simd,
386 #[token("atomic")]
387 Atomic,
388 #[token("derive")]
389 Derive,
390 #[token("on")]
391 On,
392
393 #[token("alter")]
395 Alter,
396 #[token("switch")]
397 Switch,
398 #[token("headspace")]
399 Headspace,
400 #[token("cocon")]
401 CoCon,
402 #[token("reality")]
403 Reality,
404 #[token("split")]
405 Split,
406 #[token("trigger")]
407 Trigger,
408 #[token("layer")]
409 Layer,
410 #[token("location")]
411 Location,
412 #[token("states")]
413 States,
414 #[token("anima")]
415 Anima,
416 #[token("to")]
417 To,
418 #[token("from")]
419 From,
420
421 #[token("@!")]
423 AlterSourceFronting,
424 #[token("@~")]
425 AlterSourceCoCon,
426 #[token("@?")]
427 AlterSourceDormant,
428 #[token("@‽")]
429 AlterSourceBlended,
430
431 #[token("yea")] True,
435 #[token("nay")] False,
437
438 #[token("null")]
440 Null,
441
442 #[token("τ")]
444 #[token("Τ")]
445 Tau, #[token("φ")]
448 #[token("Φ")]
449 Phi, #[token("σ")]
452 #[token("Σ")]
453 Sigma, #[token("ρ")]
456 #[token("Ρ")]
457 Rho, #[token("λ")]
460 #[token("Λ")]
461 Lambda, #[token("Π")]
464 Pi, #[token("⌛")]
467 Hourglass, #[token("δ")]
471 #[token("Δ")]
472 Delta, #[token("ε")]
475 Epsilon, #[token("ω")]
478 #[token("Ω")]
479 Omega, #[token("α")]
482 Alpha, #[token("ζ")]
485 Zeta, #[token("μ")]
489 #[token("Μ")]
490 Mu, #[token("χ")]
493 #[token("Χ")]
494 Chi, #[token("ν")]
497 #[token("Ν")]
498 Nu, #[token("ξ")]
501 #[token("Ξ")]
502 Xi, #[token("ψ")]
505 #[token("Ψ")]
506 Psi, #[token("θ")]
509 #[token("Θ")]
510 Theta, #[token("κ")]
513 #[token("Κ")]
514 Kappa, #[token("∥")]
518 #[token("parallel")]
519 Parallel, #[token("⊛")]
522 #[token("gpu")]
523 Gpu, #[token("∀")]
527 ForAll, #[token("∃")]
530 Exists, #[token("∈")]
533 ElementOf, #[token("∉")]
536 NotElementOf, #[token("∪")]
540 Union, #[token("∩")]
543 Intersection, #[token("∖")]
546 SetMinus, #[token("⊂")]
549 Subset, #[token("⊆")]
552 SubsetEq, #[token("⊃")]
555 Superset, #[token("⊇")]
558 SupersetEq, #[token("∧")]
562 LogicAnd, #[token("∨")]
565 LogicOr, #[token("¬")]
568 LogicNot, #[token("⊻")]
571 LogicXor, #[token("⊤")]
574 Top, #[token("⊥")]
577 Bottom, #[token("⋏")]
581 BitwiseAndSymbol, #[token("⋎")]
584 BitwiseOrSymbol, #[token("⊙")]
587 CircledDot, #[token("∷")]
593 TypeAnnotation, #[token("∫")]
597 Integral, #[token("∂")]
600 Partial, #[token("√")]
603 Sqrt, #[token("∛")]
606 Cbrt, #[token("∇")]
609 Nabla, #[token("⍋")]
613 GradeUp, #[token("⍒")]
616 GradeDown, #[token("⌽")]
619 Rotate, #[token("↻")]
622 CycleArrow, #[token("⌺")]
625 QuadDiamond, #[token("⊞")]
628 SquaredPlus, #[token("⍳")]
631 Iota, #[token("∘")]
635 Compose, #[token("⊗")]
638 Tensor, #[token("⊕")]
641 DirectSum, #[token("⋈")]
645 Bowtie, #[token("⋳")]
648 ElementSmallVerticalBar, #[token("⊔")]
651 SquareCup, #[token("⊓")]
654 SquareCap, #[token("‽")]
659 Interrobang, #[token("◊")]
662 Lozenge, #[token("□")]
665 BoxSymbol, #[token("∿")]
670 #[token("legion_field")]
671 LegionField, #[token("⫰")]
674 #[token("interfere")]
675 Interfere, #[token("⟁")]
678 #[token("distribute")]
679 Distribute, #[token("⟀")]
682 #[token("gather")]
683 Gather, #[token("↠")]
686 #[token("broadcast")]
687 Broadcast, #[token("⇢")]
690 #[token("consensus")]
691 Consensus, #[token("⊕=")]
695 DirectSumEq, #[token("∂=")]
698 PartialEq_, #[token("⫰=")]
701 InterfereEq, #[token("⊖")]
706 AffectNegative, #[token("⊜")]
709 AffectNeutral, #[token("⸮")]
715 IronyMark, #[token("↑")]
719 IntensityUp, #[token("↓")]
722 IntensityDown, #[token("⇈")]
725 IntensityMax, #[token("♔")]
729 FormalRegister, #[token("♟")]
732 InformalRegister, #[token("☺")]
736 EmotionJoy, #[token("☹")]
739 EmotionSadness, #[token("⚡")]
742 EmotionAnger, #[token("❄")]
745 EmotionFear, #[token("✦")]
748 EmotionSurprise, #[token("♡")]
751 EmotionLove, #[token("◉")]
755 ConfidenceHigh, #[token("◎")]
758 ConfidenceMedium, #[token("○")]
761 ConfidenceLow, #[token("·ing")]
765 AspectProgressive, #[token("·ed")]
768 AspectPerfective, #[token("·able")]
771 AspectPotential, #[token("·ive")]
774 AspectResultative, #[token("|")]
778 Pipe,
779 #[token("·")] MiddleDot,
781 #[token("→")] Arrow,
783 #[token("=>")]
784 FatArrow,
785 #[token("<-")]
786 LeftArrow,
787 #[token("==")]
788 EqEq,
789 #[token("!=")]
790 NotEq,
791 #[token("<=")]
792 LtEq,
793 #[token(">=")]
794 GtEq,
795 #[token("<")]
796 Lt,
797 #[token(">")]
798 Gt,
799 #[token("+")]
800 Plus,
801 #[token("-")]
802 Minus,
803 #[token("*")]
804 Star,
805 #[token("/")]
806 Slash,
807 #[token("%")]
808 Percent,
809 #[token("**")]
810 StarStar, AndAnd,
814 OrOr,
815 #[token("!")]
816 Bang, #[token("?")]
818 Question, #[token("~")]
820 Tilde, #[token("&")]
822 Amp,
823 #[token("^")]
824 Caret,
825 #[token("<<=")]
826 ShlEq,
827 #[token(">>=")]
828 ShrEq,
829 #[token("<<")]
830 Shl,
831 #[token(">>")]
832 Shr,
833 #[token("=")]
834 Eq,
835 #[token("+=")]
836 PlusEq,
837 #[token("-=")]
838 MinusEq,
839 #[token("*=")]
840 StarEq,
841 #[token("/=")]
842 SlashEq,
843 #[token("%=")]
844 PercentEq,
845 #[token("|=")]
846 PipeEq,
847 #[token("&=")]
848 AmpEq,
849 #[token("^=")]
850 CaretEq,
851 #[token("..")]
852 DotDot,
853 #[token("..=")]
854 DotDotEq,
855 #[token("++")]
856 PlusPlus, ColonColon,
859 #[token(":")]
860 Colon,
861 #[token(";")]
862 Semi,
863 #[token(",")]
864 Comma,
865 #[token(".")]
866 Dot,
867 #[token("@")]
868 At,
869 #[token("#!")]
870 HashBang, #[token("#")]
872 Hash,
873 #[token("_", priority = 3)]
874 Underscore,
875
876 #[token("(")]
878 LParen,
879 #[token(")")]
880 RParen,
881 #[token("{")]
882 LBrace,
883 #[token("}")]
884 RBrace,
885 #[token("[")]
886 LBracket,
887 #[token("]")]
888 RBracket,
889
890 #[token("∅")]
892 Empty, #[token("◯")]
894 Circle, #[token("∞")]
896 Infinity, #[token("⇒")]
900 ProtoSend, #[token("⇐")]
903 ProtoRecv, #[token("≋")]
906 ProtoStream, #[token("⊸")]
909 ProtoConnect, #[token("⏱")]
912 ProtoTimeout, #[token("send")]
918 Send,
919 #[token("recv")]
920 Recv,
921 #[token("stream")]
922 Stream,
923 #[token("connect")]
924 Connect,
925 #[token("close")]
926 Close,
927 #[token("timeout")]
928 Timeout,
929 #[token("retry")]
930 Retry,
931 #[token("header")]
932 Header,
933 #[token("body")]
934 Body,
935
936 #[token("http")]
938 Http,
939 #[token("https")]
940 Https,
941 #[token("ws")]
942 Ws,
943 #[token("wss")]
944 Wss,
945 #[token("grpc")]
946 Grpc,
947 #[token("kafka")]
948 Kafka,
949 #[token("amqp")]
950 Amqp,
951 #[token("graphql")]
952 GraphQL,
953
954 #[regex(r"0b[01_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
957 BinaryLit(String),
958
959 #[regex(r"0o[0-7_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
961 OctalLit(String),
962
963 #[regex(r"0x[0-9a-fA-F_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
965 HexLit(String),
966
967 #[regex(r"0v[0-9a-jA-J_]+", |lex| lex.slice().to_string())]
969 VigesimalLit(String),
970
971 #[regex(r"0s[0-9a-zA-Z_]+", |lex| lex.slice().to_string())]
973 SexagesimalLit(String),
974
975 #[regex(r"0z[0-9a-bA-B_]+", |lex| lex.slice().to_string())]
977 DuodecimalLit(String),
978
979 #[regex(r"([0-9][0-9_]*\.[0-9][0-9_]*([eE][+-]?[0-9_]+)?|[0-9][0-9_]*[eE][+-]?[0-9_]+)(f16|f32|f64|f128)?", |lex| lex.slice().to_string())]
982 FloatLit(String),
983
984 #[regex(r"[0-9][0-9_]*(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
986 IntLit(String),
987
988 #[regex(r#""([^"\\]|\\(.|\n))*""#, |lex| {
992 let s = lex.slice();
993 let inner = &s[1..s.len()-1];
994 process_escape_sequences(inner)
995 })]
996 StringLit(String),
997
998 #[token(r#"""""#, multiline_string_callback)]
1000 MultiLineStringLit(String),
1001
1002 #[regex(r#"b"([^"\\]|\\.)*""#, |lex| {
1004 let s = lex.slice();
1005 let inner = &s[2..s.len()-1];
1006 process_byte_escape_sequences(inner)
1007 })]
1008 ByteStringLit(Vec<u8>),
1009
1010 #[regex(r#"f"([^"\\]|\\.)*""#, |lex| {
1012 let s = lex.slice();
1013 let inner = &s[2..s.len()-1];
1014 process_escape_sequences(inner)
1015 })]
1016 InterpolatedStringLit(String),
1017
1018 #[regex(r#"σ"([^"\\]|\\.)*""#, |lex| {
1020 let s = lex.slice();
1021 let start = "σ".len() + 1; let inner = &s[start..s.len()-1];
1024 process_escape_sequences(inner)
1025 })]
1026 SigilStringSql(String),
1027
1028 #[regex(r#"ρ"([^"\\]|\\.)*""#, |lex| {
1030 let s = lex.slice();
1031 let start = "ρ".len() + 1; let inner = &s[start..s.len()-1];
1034 process_escape_sequences(inner)
1035 })]
1036 SigilStringRoute(String),
1037
1038 #[regex(r"'([^'\\]|\\x[0-9a-fA-F]{2}|\\u\{[0-9a-fA-F]{1,6}\}|\\.)'", |lex| {
1041 let s = lex.slice();
1042 let inner = &s[1..s.len()-1];
1043 process_char_escape(inner)
1044 })]
1045 CharLit(char),
1046
1047 #[regex(r"b'([^'\\]|\\x[0-9a-fA-F]{2}|\\.)'", |lex| {
1049 let s = lex.slice();
1050 let inner = &s[2..s.len()-1];
1052 process_byte_char_escape(inner)
1053 })]
1054 ByteCharLit(u8),
1055
1056 #[regex(r#"r"([^"\\]|\\.)*""#, |lex| {
1058 let s = lex.slice();
1059 s[2..s.len()-1].to_string()
1060 })]
1061 RawStringLit(String),
1062
1063 #[token(r##"r#""##, raw_string_delimited_callback)]
1065 RawStringDelimited(String),
1066
1067 #[regex(r"'[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice()[1..].to_string())]
1069 Lifetime(String),
1070
1071 #[regex(r"[a-zA-Z_αΑβΒγΓδΔεΕζΖηΗθΘιΙκΚλΛμΜνΝξΞοΟπΠρΡσΣτΤυΥφΦχΧψΨωΩ][a-zA-Z0-9_αΑβΒγΓδΔεΕζΖηΗθΘιΙκΚλΛμΜνΝξΞοΟπΠρΡσΣτΤυΥφΦχΧψΨωΩ]*", |lex| lex.slice().to_string())]
1075 Ident(String),
1076
1077 #[regex(r"//@\s*rune:\s*[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
1079 RuneAnnotation(String),
1080}
1081
1082impl Token {
1083 pub fn is_keyword(&self) -> bool {
1084 matches!(
1085 self,
1086 Token::Fn
1087 | Token::Async
1088 | Token::Let
1089 | Token::Mut
1090 | Token::Const
1091 | Token::Type
1092 | Token::Struct
1093 | Token::Enum
1094 | Token::Trait
1095 | Token::Impl
1096 | Token::Mod
1097 | Token::Use
1098 | Token::Pub
1099 | Token::Actor
1100 | Token::Saga
1101 | Token::Scope
1102 | Token::Rune
1103 | Token::If
1104 | Token::Else
1105 | Token::Match
1106 | Token::Loop
1107 | Token::While
1108 | Token::For
1109 | Token::In
1110 | Token::Break
1111 | Token::Continue
1112 | Token::Return
1113 | Token::Yield
1114 | Token::Await
1115 ) || self.is_plurality_keyword()
1116 }
1117
1118 pub fn is_plurality_keyword(&self) -> bool {
1119 matches!(
1120 self,
1121 Token::Alter
1122 | Token::Switch
1123 | Token::Headspace
1124 | Token::CoCon
1125 | Token::Reality
1126 | Token::Split
1127 | Token::Trigger
1128 | Token::Layer
1129 | Token::Location
1130 | Token::States
1131 | Token::Anima
1132 | Token::To
1133 | Token::From
1134 )
1135 }
1136
1137 pub fn is_alter_source(&self) -> bool {
1138 matches!(
1139 self,
1140 Token::AlterSourceFronting
1141 | Token::AlterSourceCoCon
1142 | Token::AlterSourceDormant
1143 | Token::AlterSourceBlended
1144 )
1145 }
1146
1147 pub fn is_morpheme(&self) -> bool {
1148 matches!(
1149 self,
1150 Token::Tau | Token::Phi | Token::Sigma | Token::Rho |
1151 Token::Lambda | Token::Pi | Token::Hourglass |
1152 Token::Delta | Token::Epsilon | Token::Omega | Token::Alpha | Token::Zeta |
1153 Token::Mu | Token::Chi | Token::Nu | Token::Xi | Token::Parallel | Token::Gpu | Token::Integral | Token::Partial | Token::Sqrt | Token::Cbrt |
1156 Token::Compose
1157 )
1158 }
1159
1160 pub fn is_aspect(&self) -> bool {
1161 matches!(
1162 self,
1163 Token::AspectProgressive
1164 | Token::AspectPerfective
1165 | Token::AspectPotential
1166 | Token::AspectResultative
1167 )
1168 }
1169
1170 pub fn is_data_op(&self) -> bool {
1171 matches!(
1172 self,
1173 Token::Bowtie | Token::ElementSmallVerticalBar | Token::SquareCup | Token::SquareCap
1174 )
1175 }
1176
1177 pub fn is_bitwise_symbol(&self) -> bool {
1178 matches!(self, Token::BitwiseAndSymbol | Token::BitwiseOrSymbol)
1179 }
1180
1181 pub fn is_quantifier(&self) -> bool {
1182 matches!(
1183 self,
1184 Token::ForAll | Token::Exists | Token::ElementOf | Token::NotElementOf
1185 )
1186 }
1187
1188 pub fn is_set_op(&self) -> bool {
1189 matches!(
1190 self,
1191 Token::Union
1192 | Token::Intersection
1193 | Token::SetMinus
1194 | Token::Subset
1195 | Token::SubsetEq
1196 | Token::Superset
1197 | Token::SupersetEq
1198 )
1199 }
1200
1201 pub fn is_logic_op(&self) -> bool {
1202 matches!(
1203 self,
1204 Token::LogicAnd
1205 | Token::LogicOr
1206 | Token::LogicNot
1207 | Token::LogicXor
1208 | Token::Top
1209 | Token::Bottom
1210 )
1211 }
1212
1213 pub fn is_evidentiality(&self) -> bool {
1214 matches!(
1215 self,
1216 Token::Bang | Token::Question | Token::Tilde | Token::Interrobang | Token::Lozenge
1217 )
1218 }
1219
1220 pub fn is_legion_morpheme(&self) -> bool {
1221 matches!(
1222 self,
1223 Token::LegionField | Token::DirectSum | Token::Interfere | Token::ConfidenceHigh | Token::Distribute | Token::Gather | Token::Broadcast | Token::Consensus | Token::Partial )
1233 }
1234
1235 pub fn is_legion_assign(&self) -> bool {
1236 matches!(
1237 self,
1238 Token::DirectSumEq | Token::PartialEq_ | Token::InterfereEq
1239 )
1240 }
1241
1242 pub fn is_affective(&self) -> bool {
1243 matches!(
1244 self,
1245 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral | Token::IronyMark | Token::IntensityUp | Token::IntensityDown | Token::IntensityMax | Token::FormalRegister | Token::InformalRegister | Token::EmotionJoy | Token::EmotionSadness | Token::EmotionAnger | Token::EmotionFear | Token::EmotionSurprise | Token::EmotionLove | Token::ConfidenceHigh | Token::ConfidenceMedium | Token::ConfidenceLow )
1270 }
1271
1272 pub fn is_sentiment(&self) -> bool {
1273 matches!(
1274 self,
1275 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral
1276 )
1277 }
1278
1279 pub fn is_emotion(&self) -> bool {
1280 matches!(
1281 self,
1282 Token::EmotionJoy
1283 | Token::EmotionSadness
1284 | Token::EmotionAnger
1285 | Token::EmotionFear
1286 | Token::EmotionSurprise
1287 | Token::EmotionLove
1288 )
1289 }
1290
1291 pub fn is_intensity(&self) -> bool {
1292 matches!(
1293 self,
1294 Token::IntensityUp | Token::IntensityDown | Token::IntensityMax
1295 )
1296 }
1297}
1298
1299pub struct Lexer<'a> {
1301 inner: logos::Lexer<'a, Token>,
1302 buffer: Vec<Option<(Token, Span)>>,
1304}
1305
1306impl<'a> Lexer<'a> {
1307 pub fn new(source: &'a str) -> Self {
1308 Self {
1309 inner: Token::lexer(source),
1310 buffer: Vec::new(),
1311 }
1312 }
1313
1314 fn read_next(&mut self) -> Option<(Token, Span)> {
1316 match self.inner.next() {
1317 Some(Ok(token)) => {
1318 let span = self.inner.span();
1319 Some((token, Span::new(span.start, span.end)))
1320 }
1321 Some(Err(_)) => {
1322 self.read_next()
1324 }
1325 None => None,
1326 }
1327 }
1328
1329 pub fn next_token(&mut self) -> Option<(Token, Span)> {
1330 if !self.buffer.is_empty() {
1331 return self.buffer.remove(0);
1334 }
1335 self.read_next()
1336 }
1337
1338 pub fn peek(&mut self) -> Option<&(Token, Span)> {
1339 self.peek_n(0)
1340 }
1341
1342 pub fn peek_n(&mut self, n: usize) -> Option<&(Token, Span)> {
1344 while self.buffer.len() <= n {
1346 let token = self.read_next();
1347 self.buffer.push(token);
1348 }
1349 self.buffer.get(n).and_then(|opt| opt.as_ref())
1350 }
1351
1352 pub fn span(&self) -> Span {
1353 let span = self.inner.span();
1354 Span::new(span.start, span.end)
1355 }
1356}
1357
1358#[cfg(test)]
1359mod tests {
1360 use super::*;
1361
1362 #[test]
1363 fn test_morphemes() {
1364 let mut lexer = Lexer::new("τ φ σ ρ λ Σ Π ⌛");
1365 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1366 assert!(matches!(lexer.next_token(), Some((Token::Phi, _))));
1367 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
1368 assert!(matches!(lexer.next_token(), Some((Token::Rho, _))));
1369 assert!(matches!(lexer.next_token(), Some((Token::Lambda, _))));
1370 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
1371 assert!(matches!(lexer.next_token(), Some((Token::Pi, _))));
1372 assert!(matches!(lexer.next_token(), Some((Token::Hourglass, _))));
1373 }
1374
1375 #[test]
1376 fn test_evidentiality() {
1377 let mut lexer = Lexer::new("value! uncertain? reported~ paradox‽");
1378 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "value"));
1379 assert!(matches!(lexer.next_token(), Some((Token::Bang, _))));
1380 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "uncertain"));
1381 assert!(matches!(lexer.next_token(), Some((Token::Question, _))));
1382 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "reported"));
1383 assert!(matches!(lexer.next_token(), Some((Token::Tilde, _))));
1384 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "paradox"));
1385 assert!(matches!(lexer.next_token(), Some((Token::Interrobang, _))));
1386 }
1387
1388 #[test]
1389 fn test_pipe_chain() {
1390 let mut lexer = Lexer::new("data|τ{f}|φ{p}|σ");
1391 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "data"));
1392 assert!(matches!(lexer.next_token(), Some((Token::Pipe, _))));
1393 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1394 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1395 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1396 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1397 }
1398
1399 #[test]
1400 fn test_numbers() {
1401 let mut lexer = Lexer::new("42 0b1010 0o52 0x2A 0v22 0s42 3.14");
1402 assert!(matches!(lexer.next_token(), Some((Token::IntLit(s), _)) if s == "42"));
1403 assert!(matches!(lexer.next_token(), Some((Token::BinaryLit(s), _)) if s == "0b1010"));
1404 assert!(matches!(lexer.next_token(), Some((Token::OctalLit(s), _)) if s == "0o52"));
1405 assert!(matches!(lexer.next_token(), Some((Token::HexLit(s), _)) if s == "0x2A"));
1406 assert!(matches!(lexer.next_token(), Some((Token::VigesimalLit(s), _)) if s == "0v22"));
1407 assert!(matches!(lexer.next_token(), Some((Token::SexagesimalLit(s), _)) if s == "0s42"));
1408 assert!(matches!(lexer.next_token(), Some((Token::FloatLit(s), _)) if s == "3.14"));
1409 }
1410
1411 #[test]
1412 fn test_incorporation() {
1413 let mut lexer = Lexer::new("file·open·read");
1414 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "file"));
1415 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1416 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "open"));
1417 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1418 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "read"));
1419 }
1420
1421 #[test]
1422 fn test_special_symbols() {
1423 let mut lexer = Lexer::new("∅ ◯ ∞");
1424 assert!(matches!(lexer.next_token(), Some((Token::Empty, _))));
1425 assert!(matches!(lexer.next_token(), Some((Token::Circle, _))));
1426 assert!(matches!(lexer.next_token(), Some((Token::Infinity, _))));
1427 }
1428
1429 #[test]
1430 fn test_quantifiers() {
1431 let mut lexer = Lexer::new("∀x ∃y x∈S y∉T");
1432 assert!(matches!(lexer.next_token(), Some((Token::ForAll, _))));
1433 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1434 assert!(matches!(lexer.next_token(), Some((Token::Exists, _))));
1435 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "y"));
1436 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1437 assert!(matches!(lexer.next_token(), Some((Token::ElementOf, _))));
1438 }
1439
1440 #[test]
1441 fn test_set_operations() {
1442 let mut lexer = Lexer::new("A∪B A∩B A∖B A⊂B A⊆B");
1443 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1444 assert!(matches!(lexer.next_token(), Some((Token::Union, _))));
1445 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "B"));
1446 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1447 assert!(matches!(lexer.next_token(), Some((Token::Intersection, _))));
1448 }
1449
1450 #[test]
1451 fn test_logic_operators() {
1452 let mut lexer = Lexer::new("p∧q p∨q ¬p p⊻q ⊤ ⊥");
1453 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1454 assert!(matches!(lexer.next_token(), Some((Token::LogicAnd, _))));
1455 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "q"));
1456 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1457 assert!(matches!(lexer.next_token(), Some((Token::LogicOr, _))));
1458 }
1459
1460 #[test]
1461 fn test_analysis_operators() {
1462 let mut lexer = Lexer::new("∫f ∂g √x ∛y f∘g");
1463 assert!(matches!(lexer.next_token(), Some((Token::Integral, _))));
1464 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1465 assert!(matches!(lexer.next_token(), Some((Token::Partial, _))));
1466 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "g"));
1467 assert!(matches!(lexer.next_token(), Some((Token::Sqrt, _))));
1468 }
1469
1470 #[test]
1471 fn test_additional_morphemes() {
1472 let mut lexer = Lexer::new("δ ε ω α ζ");
1473 assert!(matches!(lexer.next_token(), Some((Token::Delta, _))));
1474 assert!(matches!(lexer.next_token(), Some((Token::Epsilon, _))));
1475 assert!(matches!(lexer.next_token(), Some((Token::Omega, _))));
1476 assert!(matches!(lexer.next_token(), Some((Token::Alpha, _))));
1477 assert!(matches!(lexer.next_token(), Some((Token::Zeta, _))));
1478 }
1479
1480 #[test]
1481 fn test_ffi_keywords() {
1482 let mut lexer = Lexer::new("extern unsafe");
1483 assert!(matches!(lexer.next_token(), Some((Token::Extern, _))));
1484 assert!(matches!(lexer.next_token(), Some((Token::Unsafe, _))));
1485 }
1486
1487 #[test]
1488 fn test_parallel_morphemes() {
1489 let mut lexer = Lexer::new("∥ parallel ⊛ gpu");
1490 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1491 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1492 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1493 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1494 }
1495
1496 #[test]
1497 fn test_lifetime_labels() {
1498 let mut lexer = Lexer::new("'outer: loop { break 'outer }");
1500 assert!(matches!(lexer.next_token(), Some((Token::Lifetime(s), _)) if s == "outer"));
1501 assert!(matches!(lexer.next_token(), Some((Token::Colon, _))));
1502 assert!(matches!(lexer.next_token(), Some((Token::Loop, _))));
1503 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1504 assert!(matches!(lexer.next_token(), Some((Token::Break, _))));
1505 assert!(matches!(lexer.next_token(), Some((Token::Lifetime(s), _)) if s == "outer"));
1506 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1507 }
1508
1509 #[test]
1512 fn test_string_escape_sequences() {
1513 let mut lexer = Lexer::new(r#""hello\nworld""#);
1515 match lexer.next_token() {
1516 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\nworld"),
1517 other => panic!("Expected StringLit, got {:?}", other),
1518 }
1519
1520 let mut lexer = Lexer::new(r#""hello\tworld""#);
1522 match lexer.next_token() {
1523 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\tworld"),
1524 other => panic!("Expected StringLit, got {:?}", other),
1525 }
1526
1527 let mut lexer = Lexer::new(r#""hello\rworld""#);
1529 match lexer.next_token() {
1530 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\rworld"),
1531 other => panic!("Expected StringLit, got {:?}", other),
1532 }
1533
1534 let mut lexer = Lexer::new(r#""hello\\world""#);
1536 match lexer.next_token() {
1537 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\\world"),
1538 other => panic!("Expected StringLit, got {:?}", other),
1539 }
1540
1541 let mut lexer = Lexer::new(r#""hello\"world""#);
1543 match lexer.next_token() {
1544 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\"world"),
1545 other => panic!("Expected StringLit, got {:?}", other),
1546 }
1547
1548 let mut lexer = Lexer::new(r#""hello\0world""#);
1550 match lexer.next_token() {
1551 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\0world"),
1552 other => panic!("Expected StringLit, got {:?}", other),
1553 }
1554 }
1555
1556 #[test]
1557 fn test_string_hex_escape() {
1558 let mut lexer = Lexer::new(r#""hello\x41world""#);
1560 match lexer.next_token() {
1561 Some((Token::StringLit(s), _)) => assert_eq!(s, "helloAworld"),
1562 other => panic!("Expected StringLit, got {:?}", other),
1563 }
1564 }
1565
1566 #[test]
1567 fn test_string_unicode_escape() {
1568 let mut lexer = Lexer::new(r#""hello\u{1F600}world""#);
1570 match lexer.next_token() {
1571 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello😀world"),
1572 other => panic!("Expected StringLit, got {:?}", other),
1573 }
1574
1575 let mut lexer = Lexer::new(r#""\u{03C4}""#);
1577 match lexer.next_token() {
1578 Some((Token::StringLit(s), _)) => assert_eq!(s, "τ"),
1579 other => panic!("Expected StringLit, got {:?}", other),
1580 }
1581 }
1582
1583 #[test]
1584 fn test_char_escape_sequences() {
1585 let mut lexer = Lexer::new(r"'\n'");
1586 match lexer.next_token() {
1587 Some((Token::CharLit(c), _)) => assert_eq!(c, '\n'),
1588 other => panic!("Expected CharLit, got {:?}", other),
1589 }
1590
1591 let mut lexer = Lexer::new(r"'\t'");
1592 match lexer.next_token() {
1593 Some((Token::CharLit(c), _)) => assert_eq!(c, '\t'),
1594 other => panic!("Expected CharLit, got {:?}", other),
1595 }
1596
1597 let mut lexer = Lexer::new(r"'\\'");
1598 match lexer.next_token() {
1599 Some((Token::CharLit(c), _)) => assert_eq!(c, '\\'),
1600 other => panic!("Expected CharLit, got {:?}", other),
1601 }
1602 }
1603
1604 #[test]
1605 fn test_raw_string() {
1606 let mut lexer = Lexer::new(r#"r"hello\nworld""#);
1608 match lexer.next_token() {
1609 Some((Token::RawStringLit(s), _)) => assert_eq!(s, r"hello\nworld"),
1610 other => panic!("Expected RawStringLit, got {:?}", other),
1611 }
1612 }
1613
1614 #[test]
1615 fn test_raw_string_delimited() {
1616 let mut lexer = Lexer::new(r##"r#"hello "world""#"##);
1618 match lexer.next_token() {
1619 Some((Token::RawStringDelimited(s), _)) => assert_eq!(s, r#"hello "world""#),
1620 other => panic!("Expected RawStringDelimited, got {:?}", other),
1621 }
1622 }
1623
1624 #[test]
1625 fn test_byte_string() {
1626 let mut lexer = Lexer::new(r#"b"hello""#);
1627 match lexer.next_token() {
1628 Some((Token::ByteStringLit(bytes), _)) => {
1629 assert_eq!(bytes, vec![104, 101, 108, 108, 111]); }
1631 other => panic!("Expected ByteStringLit, got {:?}", other),
1632 }
1633 }
1634
1635 #[test]
1636 fn test_interpolated_string() {
1637 let mut lexer = Lexer::new(r#"f"hello {name}""#);
1638 match lexer.next_token() {
1639 Some((Token::InterpolatedStringLit(s), _)) => assert_eq!(s, "hello {name}"),
1640 other => panic!("Expected InterpolatedStringLit, got {:?}", other),
1641 }
1642 }
1643
1644 #[test]
1645 fn test_sigil_string_sql() {
1646 let mut lexer = Lexer::new(r#"σ"SELECT * FROM {table}""#);
1647 match lexer.next_token() {
1648 Some((Token::SigilStringSql(s), _)) => assert_eq!(s, "SELECT * FROM {table}"),
1649 other => panic!("Expected SigilStringSql, got {:?}", other),
1650 }
1651 }
1652
1653 #[test]
1654 fn test_sigil_string_route() {
1655 let mut lexer = Lexer::new(r#"ρ"/api/v1/{resource}/{id}""#);
1656 match lexer.next_token() {
1657 Some((Token::SigilStringRoute(s), _)) => assert_eq!(s, "/api/v1/{resource}/{id}"),
1658 other => panic!("Expected SigilStringRoute, got {:?}", other),
1659 }
1660 }
1661
1662 #[test]
1663 fn test_unicode_in_strings() {
1664 let mut lexer = Lexer::new(r#""τφσρ 你好 🦀""#);
1666 match lexer.next_token() {
1667 Some((Token::StringLit(s), _)) => assert_eq!(s, "τφσρ 你好 🦀"),
1668 other => panic!("Expected StringLit, got {:?}", other),
1669 }
1670 }
1671
1672 #[test]
1673 fn test_empty_string() {
1674 let mut lexer = Lexer::new(r#""""#);
1675 match lexer.next_token() {
1676 Some((Token::StringLit(s), _)) => assert_eq!(s, ""),
1677 other => panic!("Expected empty StringLit, got {:?}", other),
1678 }
1679 }
1680
1681 #[test]
1682 fn test_escape_sequence_helper() {
1683 assert_eq!(process_escape_sequences(r"hello\nworld"), "hello\nworld");
1685 assert_eq!(process_escape_sequences(r"hello\tworld"), "hello\tworld");
1686 assert_eq!(process_escape_sequences(r"hello\\world"), "hello\\world");
1687 assert_eq!(process_escape_sequences(r#"hello\"world"#), "hello\"world");
1688 assert_eq!(process_escape_sequences(r"hello\x41world"), "helloAworld");
1689 assert_eq!(
1690 process_escape_sequences(r"hello\u{1F600}world"),
1691 "hello😀world"
1692 );
1693 }
1694}