1use crate::span::Span;
6use logos::Logos;
7
8fn process_escape_sequences(s: &str) -> String {
12 let mut result = String::with_capacity(s.len());
13 let mut chars = s.chars().peekable();
14
15 while let Some(c) = chars.next() {
16 if c == '\\' {
17 match chars.next() {
18 Some('\n') => {
19 while let Some(&c) = chars.peek() {
21 if c == ' ' || c == '\t' {
22 chars.next();
23 } else {
24 break;
25 }
26 }
27 }
28 Some('n') => result.push('\n'),
29 Some('t') => result.push('\t'),
30 Some('r') => result.push('\r'),
31 Some('\\') => result.push('\\'),
32 Some('"') => result.push('"'),
33 Some('\'') => result.push('\''),
34 Some('0') => result.push('\0'),
35 Some('x') => {
36 let mut hex = String::new();
38 for _ in 0..2 {
39 if let Some(&c) = chars.peek() {
40 if c.is_ascii_hexdigit() {
41 hex.push(chars.next().unwrap());
42 }
43 }
44 }
45 if let Ok(val) = u8::from_str_radix(&hex, 16) {
46 result.push(val as char);
47 }
48 }
49 Some('u') => {
50 if chars.peek() == Some(&'{') {
52 chars.next(); let mut hex = String::new();
54 while let Some(&c) = chars.peek() {
55 if c == '}' {
56 chars.next();
57 break;
58 }
59 if c.is_ascii_hexdigit() {
60 hex.push(chars.next().unwrap());
61 } else {
62 break;
63 }
64 }
65 if let Ok(val) = u32::from_str_radix(&hex, 16) {
66 if let Some(c) = char::from_u32(val) {
67 result.push(c);
68 }
69 }
70 }
71 }
72 Some(other) => {
73 result.push('\\');
75 result.push(other);
76 }
77 None => result.push('\\'),
78 }
79 } else {
80 result.push(c);
81 }
82 }
83 result
84}
85
86fn process_byte_escape_sequences(s: &str) -> Vec<u8> {
88 let mut result = Vec::with_capacity(s.len());
89 let mut chars = s.chars().peekable();
90
91 while let Some(c) = chars.next() {
92 if c == '\\' {
93 match chars.next() {
94 Some('\n') => {
95 while let Some(&c) = chars.peek() {
97 if c == ' ' || c == '\t' {
98 chars.next();
99 } else {
100 break;
101 }
102 }
103 }
104 Some('n') => result.push(b'\n'),
105 Some('t') => result.push(b'\t'),
106 Some('r') => result.push(b'\r'),
107 Some('\\') => result.push(b'\\'),
108 Some('"') => result.push(b'"'),
109 Some('\'') => result.push(b'\''),
110 Some('0') => result.push(0),
111 Some('x') => {
112 let mut hex = String::new();
114 for _ in 0..2 {
115 if let Some(&c) = chars.peek() {
116 if c.is_ascii_hexdigit() {
117 hex.push(chars.next().unwrap());
118 }
119 }
120 }
121 if let Ok(val) = u8::from_str_radix(&hex, 16) {
122 result.push(val);
123 }
124 }
125 Some(other) => {
126 result.push(b'\\');
128 if other.is_ascii() {
129 result.push(other as u8);
130 }
131 }
132 None => result.push(b'\\'),
133 }
134 } else if c.is_ascii() {
135 result.push(c as u8);
136 }
137 }
139 result
140}
141
142fn block_comment_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
147 let remainder = lex.remainder();
148
149 if let Some(end_pos) = remainder.find("*/") {
151 let content = &remainder[..end_pos];
152 lex.bump(end_pos + 2);
154 Some(content.to_string())
155 } else {
156 let len = remainder.len();
158 lex.bump(len);
159 Some(remainder.to_string())
160 }
161}
162
163fn raw_string_delimited_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
164 let remainder = lex.remainder();
165
166 if let Some(end_pos) = remainder.find("\"#") {
168 let content = &remainder[..end_pos];
169 lex.bump(end_pos + 2);
171 Some(content.to_string())
172 } else {
173 None
174 }
175}
176
177fn multiline_string_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
180 let remainder = lex.remainder();
181
182 if let Some(end_pos) = remainder.find("\"\"\"") {
184 let content = &remainder[..end_pos];
185 lex.bump(end_pos + 3);
187 Some(process_escape_sequences(content))
188 } else {
189 None
191 }
192}
193
194fn process_char_escape(s: &str) -> char {
196 let mut chars = s.chars();
197 match chars.next() {
198 Some('\\') => match chars.next() {
199 Some('n') => '\n',
200 Some('t') => '\t',
201 Some('r') => '\r',
202 Some('\\') => '\\',
203 Some('"') => '"',
204 Some('\'') => '\'',
205 Some('0') => '\0',
206 Some('x') => {
207 let hex: String = chars.take(2).collect();
208 u8::from_str_radix(&hex, 16)
209 .map(|v| v as char)
210 .unwrap_or('?')
211 }
212 Some('u') => {
213 if chars.next() == Some('{') {
214 let hex: String = chars.take_while(|&c| c != '}').collect();
215 u32::from_str_radix(&hex, 16)
216 .ok()
217 .and_then(char::from_u32)
218 .unwrap_or('?')
219 } else {
220 '?'
221 }
222 }
223 Some(c) => c,
224 None => '?',
225 },
226 Some(c) => c,
227 None => '?',
228 }
229}
230
231fn process_byte_char_escape(s: &str) -> u8 {
233 let mut chars = s.chars();
234 match chars.next() {
235 Some('\\') => match chars.next() {
236 Some('n') => b'\n',
237 Some('t') => b'\t',
238 Some('r') => b'\r',
239 Some('\\') => b'\\',
240 Some('"') => b'"',
241 Some('\'') => b'\'',
242 Some('0') => b'\0',
243 Some('x') => {
244 let hex: String = chars.take(2).collect();
245 u8::from_str_radix(&hex, 16).unwrap_or(b'?')
246 }
247 Some(c) => c as u8,
248 None => b'?',
249 },
250 Some(c) => c as u8,
251 None => b'?',
252 }
253}
254
255#[derive(Logos, Debug, Clone, PartialEq)]
257#[logos(skip r"[ \t\r\n\f]+")]
258pub enum Token {
259 #[regex(r"//[^\n]*", |lex| lex.slice().to_string())]
261 LineComment(String),
262
263 #[regex(r"//![^\n]*", |lex| lex.slice().to_string())]
264 DocComment(String),
265
266 #[regex(r"~~[^\n]*", |lex| lex.slice().to_string())]
268 TildeComment(String),
269
270 #[token("/*", block_comment_callback)]
272 BlockComment(String),
273
274 #[token("fn")]
276 Fn,
277 #[token("async")]
278 Async,
279 #[token("let")]
280 Let,
281 #[token("mut")]
282 Mut,
283 #[token("const")]
284 Const,
285 #[token("type")]
286 Type,
287 #[token("struct")]
288 #[token("sigil")] Struct,
290 #[token("enum")]
291 Enum,
292 #[token("trait")]
293 Trait,
294 #[token("impl")]
295 Impl,
296 #[token("mod")]
297 #[token("scroll")] Mod,
299 #[token("use")]
300 #[token("invoke")] Use,
302 #[token("pub")]
303 Pub,
304 #[token("actor")]
305 Actor,
306 #[token("saga")]
307 Saga,
308 #[token("scope")]
309 Scope,
310 #[token("rune")]
311 Rune,
312 #[token("macro")]
313 Macro,
314 #[token("macro_rules")]
315 MacroRules,
316
317 #[token("if")]
319 If,
320 #[token("else")]
321 Else,
322 #[token("match")]
323 Match,
324 #[token("loop")]
325 Loop,
326 #[token("while")]
327 While,
328 #[token("for")]
329 For,
330 #[token("in")]
331 In,
332 #[token("break")]
333 Break,
334 #[token("continue")]
335 Continue,
336 #[token("return")]
337 Return,
338 #[token("yield")]
339 Yield,
340 #[token("await")]
341 Await,
342
343 #[token("self")]
345 SelfLower,
346 #[token("Self")]
347 SelfUpper,
348 #[token("super")]
349 Super,
350 #[token("crate")]
351 #[token("tome")] Crate,
353 #[token("where")]
354 Where,
355 #[token("as")]
356 As,
357 #[token("dyn")]
358 Dyn,
359 #[token("move")]
360 Move,
361 #[token("ref")]
362 Ref,
363 #[token("static")]
364 Static,
365 #[token("unsafe")]
366 Unsafe,
367 #[token("extern")]
368 Extern,
369 #[token("asm")]
370 Asm,
371 #[token("volatile")]
372 Volatile,
373 #[token("naked")]
374 Naked,
375 #[token("packed")]
376 Packed,
377 #[token("simd")]
378 Simd,
379 #[token("atomic")]
380 Atomic,
381 #[token("derive")]
382 Derive,
383 #[token("on")]
384 On,
385
386 #[token("alter")]
388 Alter,
389 #[token("switch")]
390 Switch,
391 #[token("headspace")]
392 Headspace,
393 #[token("cocon")]
394 CoCon,
395 #[token("reality")]
396 Reality,
397 #[token("split")]
398 Split,
399 #[token("trigger")]
400 Trigger,
401 #[token("layer")]
402 Layer,
403 #[token("location")]
404 Location,
405 #[token("states")]
406 States,
407 #[token("anima")]
408 Anima,
409 #[token("to")]
410 To,
411 #[token("from")]
412 From,
413
414 #[token("@!")]
416 AlterSourceFronting,
417 #[token("@~")]
418 AlterSourceCoCon,
419 #[token("@?")]
420 AlterSourceDormant,
421 #[token("@‽")]
422 AlterSourceBlended,
423
424 #[token("true")]
426 True,
427 #[token("false")]
428 False,
429
430 #[token("null")]
432 Null,
433
434 #[token("τ")]
436 #[token("Τ")]
437 Tau, #[token("φ")]
440 #[token("Φ")]
441 Phi, #[token("σ")]
444 #[token("Σ")]
445 Sigma, #[token("ρ")]
448 #[token("Ρ")]
449 Rho, #[token("λ")]
452 #[token("Λ")]
453 Lambda, #[token("Π")]
456 Pi, #[token("⌛")]
459 Hourglass, #[token("δ")]
463 #[token("Δ")]
464 Delta, #[token("ε")]
467 Epsilon, #[token("ω")]
470 #[token("Ω")]
471 Omega, #[token("α")]
474 Alpha, #[token("ζ")]
477 Zeta, #[token("μ")]
481 #[token("Μ")]
482 Mu, #[token("χ")]
485 #[token("Χ")]
486 Chi, #[token("ν")]
489 #[token("Ν")]
490 Nu, #[token("ξ")]
493 #[token("Ξ")]
494 Xi, #[token("ψ")]
497 #[token("Ψ")]
498 Psi, #[token("θ")]
501 #[token("Θ")]
502 Theta, #[token("κ")]
505 #[token("Κ")]
506 Kappa, #[token("∥")]
510 #[token("parallel")]
511 Parallel, #[token("⊛")]
514 #[token("gpu")]
515 Gpu, #[token("∀")]
519 ForAll, #[token("∃")]
522 Exists, #[token("∈")]
525 ElementOf, #[token("∉")]
528 NotElementOf, #[token("∪")]
532 Union, #[token("∩")]
535 Intersection, #[token("∖")]
538 SetMinus, #[token("⊂")]
541 Subset, #[token("⊆")]
544 SubsetEq, #[token("⊃")]
547 Superset, #[token("⊇")]
550 SupersetEq, #[token("∧")]
554 LogicAnd, #[token("∨")]
557 LogicOr, #[token("¬")]
560 LogicNot, #[token("⊻")]
563 LogicXor, #[token("⊤")]
566 Top, #[token("⊥")]
569 Bottom, #[token("⋏")]
573 BitwiseAndSymbol, #[token("⋎")]
576 BitwiseOrSymbol, #[token("⊙")]
579 CircledDot, #[token("∷")]
585 TypeAnnotation, #[token("∫")]
589 Integral, #[token("∂")]
592 Partial, #[token("√")]
595 Sqrt, #[token("∛")]
598 Cbrt, #[token("∇")]
601 Nabla, #[token("⍋")]
605 GradeUp, #[token("⍒")]
608 GradeDown, #[token("⌽")]
611 Rotate, #[token("↻")]
614 CycleArrow, #[token("⌺")]
617 QuadDiamond, #[token("⊞")]
620 SquaredPlus, #[token("⍳")]
623 Iota, #[token("∘")]
627 Compose, #[token("⊗")]
630 Tensor, #[token("⊕")]
633 DirectSum, #[token("⋈")]
637 Bowtie, #[token("⋳")]
640 ElementSmallVerticalBar, #[token("⊔")]
643 SquareCup, #[token("⊓")]
646 SquareCap, #[token("‽")]
651 Interrobang, #[token("◊")]
654 Lozenge, #[token("∿")]
659 #[token("legion_field")]
660 LegionField, #[token("⫰")]
663 #[token("interfere")]
664 Interfere, #[token("⟁")]
667 #[token("distribute")]
668 Distribute, #[token("⟀")]
671 #[token("gather")]
672 Gather, #[token("↠")]
675 #[token("broadcast")]
676 Broadcast, #[token("⇢")]
679 #[token("consensus")]
680 Consensus, #[token("⊕=")]
684 DirectSumEq, #[token("∂=")]
687 PartialEq_, #[token("⫰=")]
690 InterfereEq, #[token("⊖")]
695 AffectNegative, #[token("⊜")]
698 AffectNeutral, #[token("⸮")]
704 IronyMark, #[token("↑")]
708 IntensityUp, #[token("↓")]
711 IntensityDown, #[token("⇈")]
714 IntensityMax, #[token("♔")]
718 FormalRegister, #[token("♟")]
721 InformalRegister, #[token("☺")]
725 EmotionJoy, #[token("☹")]
728 EmotionSadness, #[token("⚡")]
731 EmotionAnger, #[token("❄")]
734 EmotionFear, #[token("✦")]
737 EmotionSurprise, #[token("♡")]
740 EmotionLove, #[token("◉")]
744 ConfidenceHigh, #[token("◎")]
747 ConfidenceMedium, #[token("○")]
750 ConfidenceLow, #[token("·ing")]
754 AspectProgressive, #[token("·ed")]
757 AspectPerfective, #[token("·able")]
760 AspectPotential, #[token("·ive")]
763 AspectResultative, #[token("|")]
767 Pipe,
768 #[token("·")]
769 MiddleDot, #[token("->")]
771 Arrow,
772 #[token("=>")]
773 FatArrow,
774 #[token("<-")]
775 LeftArrow,
776 #[token("==")]
777 EqEq,
778 #[token("!=")]
779 NotEq,
780 #[token("<=")]
781 LtEq,
782 #[token(">=")]
783 GtEq,
784 #[token("<")]
785 Lt,
786 #[token(">")]
787 Gt,
788 #[token("+")]
789 Plus,
790 #[token("-")]
791 Minus,
792 #[token("*")]
793 Star,
794 #[token("/")]
795 Slash,
796 #[token("%")]
797 Percent,
798 #[token("**")]
799 StarStar, #[token("&&")]
801 AndAnd,
802 #[token("||")]
803 OrOr,
804 #[token("!")]
805 Bang, #[token("?")]
807 Question, #[token("~")]
809 Tilde, #[token("&")]
811 Amp,
812 #[token("^")]
813 Caret,
814 #[token("<<=")]
815 ShlEq,
816 #[token(">>=")]
817 ShrEq,
818 #[token("<<")]
819 Shl,
820 #[token(">>")]
821 Shr,
822 #[token("=")]
823 Eq,
824 #[token("+=")]
825 PlusEq,
826 #[token("-=")]
827 MinusEq,
828 #[token("*=")]
829 StarEq,
830 #[token("/=")]
831 SlashEq,
832 #[token("%=")]
833 PercentEq,
834 #[token("|=")]
835 PipeEq,
836 #[token("&=")]
837 AmpEq,
838 #[token("^=")]
839 CaretEq,
840 #[token("..")]
841 DotDot,
842 #[token("..=")]
843 DotDotEq,
844 #[token("++")]
845 PlusPlus, #[token("::")]
847 ColonColon,
848 #[token(":")]
849 Colon,
850 #[token(";")]
851 Semi,
852 #[token(",")]
853 Comma,
854 #[token(".")]
855 Dot,
856 #[token("@")]
857 At,
858 #[token("#!")]
859 HashBang, #[token("#")]
861 Hash,
862 #[token("_", priority = 3)]
863 Underscore,
864
865 #[token("(")]
867 LParen,
868 #[token(")")]
869 RParen,
870 #[token("{")]
871 LBrace,
872 #[token("}")]
873 RBrace,
874 #[token("[")]
875 LBracket,
876 #[token("]")]
877 RBracket,
878
879 #[token("∅")]
881 Empty, #[token("◯")]
883 Circle, #[token("∞")]
885 Infinity, #[token("⇒")]
889 ProtoSend, #[token("⇐")]
892 ProtoRecv, #[token("≋")]
895 ProtoStream, #[token("⊸")]
898 ProtoConnect, #[token("⏱")]
901 ProtoTimeout, #[token("send")]
907 Send,
908 #[token("recv")]
909 Recv,
910 #[token("stream")]
911 Stream,
912 #[token("connect")]
913 Connect,
914 #[token("close")]
915 Close,
916 #[token("timeout")]
917 Timeout,
918 #[token("retry")]
919 Retry,
920 #[token("header")]
921 Header,
922 #[token("body")]
923 Body,
924
925 #[token("http")]
927 Http,
928 #[token("https")]
929 Https,
930 #[token("ws")]
931 Ws,
932 #[token("wss")]
933 Wss,
934 #[token("grpc")]
935 Grpc,
936 #[token("kafka")]
937 Kafka,
938 #[token("amqp")]
939 Amqp,
940 #[token("graphql")]
941 GraphQL,
942
943 #[regex(r"0b[01_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
946 BinaryLit(String),
947
948 #[regex(r"0o[0-7_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
950 OctalLit(String),
951
952 #[regex(r"0x[0-9a-fA-F_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
954 HexLit(String),
955
956 #[regex(r"0v[0-9a-jA-J_]+", |lex| lex.slice().to_string())]
958 VigesimalLit(String),
959
960 #[regex(r"0s[0-9a-zA-Z_]+", |lex| lex.slice().to_string())]
962 SexagesimalLit(String),
963
964 #[regex(r"0z[0-9a-bA-B_]+", |lex| lex.slice().to_string())]
966 DuodecimalLit(String),
967
968 #[regex(r"([0-9][0-9_]*\.[0-9][0-9_]*([eE][+-]?[0-9_]+)?|[0-9][0-9_]*[eE][+-]?[0-9_]+)(f16|f32|f64|f128)?", |lex| lex.slice().to_string())]
971 FloatLit(String),
972
973 #[regex(r"[0-9][0-9_]*(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
975 IntLit(String),
976
977 #[regex(r#""([^"\\]|\\(.|\n))*""#, |lex| {
981 let s = lex.slice();
982 let inner = &s[1..s.len()-1];
983 process_escape_sequences(inner)
984 })]
985 StringLit(String),
986
987 #[token(r#"""""#, multiline_string_callback)]
989 MultiLineStringLit(String),
990
991 #[regex(r#"b"([^"\\]|\\.)*""#, |lex| {
993 let s = lex.slice();
994 let inner = &s[2..s.len()-1];
995 process_byte_escape_sequences(inner)
996 })]
997 ByteStringLit(Vec<u8>),
998
999 #[regex(r#"f"([^"\\]|\\.)*""#, |lex| {
1001 let s = lex.slice();
1002 let inner = &s[2..s.len()-1];
1003 process_escape_sequences(inner)
1004 })]
1005 InterpolatedStringLit(String),
1006
1007 #[regex(r#"σ"([^"\\]|\\.)*""#, |lex| {
1009 let s = lex.slice();
1010 let start = "σ".len() + 1; let inner = &s[start..s.len()-1];
1013 process_escape_sequences(inner)
1014 })]
1015 SigilStringSql(String),
1016
1017 #[regex(r#"ρ"([^"\\]|\\.)*""#, |lex| {
1019 let s = lex.slice();
1020 let start = "ρ".len() + 1; let inner = &s[start..s.len()-1];
1023 process_escape_sequences(inner)
1024 })]
1025 SigilStringRoute(String),
1026
1027 #[regex(r"'([^'\\]|\\x[0-9a-fA-F]{2}|\\u\{[0-9a-fA-F]{1,6}\}|\\.)'", |lex| {
1030 let s = lex.slice();
1031 let inner = &s[1..s.len()-1];
1032 process_char_escape(inner)
1033 })]
1034 CharLit(char),
1035
1036 #[regex(r"b'([^'\\]|\\x[0-9a-fA-F]{2}|\\.)'", |lex| {
1038 let s = lex.slice();
1039 let inner = &s[2..s.len()-1];
1041 process_byte_char_escape(inner)
1042 })]
1043 ByteCharLit(u8),
1044
1045 #[regex(r#"r"([^"\\]|\\.)*""#, |lex| {
1047 let s = lex.slice();
1048 s[2..s.len()-1].to_string()
1049 })]
1050 RawStringLit(String),
1051
1052 #[token(r##"r#""##, raw_string_delimited_callback)]
1054 RawStringDelimited(String),
1055
1056 #[regex(r"'[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice()[1..].to_string())]
1058 Lifetime(String),
1059
1060 #[regex(r"[a-zA-Z_αΑβΒγΓδΔεΕζΖηΗθΘιΙκΚλΛμΜνΝξΞοΟπΠρΡσΣτΤυΥφΦχΧψΨωΩ][a-zA-Z0-9_αΑβΒγΓδΔεΕζΖηΗθΘιΙκΚλΛμΜνΝξΞοΟπΠρΡσΣτΤυΥφΦχΧψΨωΩ]*", |lex| lex.slice().to_string())]
1064 Ident(String),
1065
1066 #[regex(r"//@\s*rune:\s*[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
1068 RuneAnnotation(String),
1069}
1070
1071impl Token {
1072 pub fn is_keyword(&self) -> bool {
1073 matches!(
1074 self,
1075 Token::Fn
1076 | Token::Async
1077 | Token::Let
1078 | Token::Mut
1079 | Token::Const
1080 | Token::Type
1081 | Token::Struct
1082 | Token::Enum
1083 | Token::Trait
1084 | Token::Impl
1085 | Token::Mod
1086 | Token::Use
1087 | Token::Pub
1088 | Token::Actor
1089 | Token::Saga
1090 | Token::Scope
1091 | Token::Rune
1092 | Token::If
1093 | Token::Else
1094 | Token::Match
1095 | Token::Loop
1096 | Token::While
1097 | Token::For
1098 | Token::In
1099 | Token::Break
1100 | Token::Continue
1101 | Token::Return
1102 | Token::Yield
1103 | Token::Await
1104 ) || self.is_plurality_keyword()
1105 }
1106
1107 pub fn is_plurality_keyword(&self) -> bool {
1108 matches!(
1109 self,
1110 Token::Alter
1111 | Token::Switch
1112 | Token::Headspace
1113 | Token::CoCon
1114 | Token::Reality
1115 | Token::Split
1116 | Token::Trigger
1117 | Token::Layer
1118 | Token::Location
1119 | Token::States
1120 | Token::Anima
1121 | Token::To
1122 | Token::From
1123 )
1124 }
1125
1126 pub fn is_alter_source(&self) -> bool {
1127 matches!(
1128 self,
1129 Token::AlterSourceFronting
1130 | Token::AlterSourceCoCon
1131 | Token::AlterSourceDormant
1132 | Token::AlterSourceBlended
1133 )
1134 }
1135
1136 pub fn is_morpheme(&self) -> bool {
1137 matches!(
1138 self,
1139 Token::Tau | Token::Phi | Token::Sigma | Token::Rho |
1140 Token::Lambda | Token::Pi | Token::Hourglass |
1141 Token::Delta | Token::Epsilon | Token::Omega | Token::Alpha | Token::Zeta |
1142 Token::Mu | Token::Chi | Token::Nu | Token::Xi | Token::Parallel | Token::Gpu | Token::Integral | Token::Partial | Token::Sqrt | Token::Cbrt |
1145 Token::Compose
1146 )
1147 }
1148
1149 pub fn is_aspect(&self) -> bool {
1150 matches!(
1151 self,
1152 Token::AspectProgressive
1153 | Token::AspectPerfective
1154 | Token::AspectPotential
1155 | Token::AspectResultative
1156 )
1157 }
1158
1159 pub fn is_data_op(&self) -> bool {
1160 matches!(
1161 self,
1162 Token::Bowtie | Token::ElementSmallVerticalBar | Token::SquareCup | Token::SquareCap
1163 )
1164 }
1165
1166 pub fn is_bitwise_symbol(&self) -> bool {
1167 matches!(self, Token::BitwiseAndSymbol | Token::BitwiseOrSymbol)
1168 }
1169
1170 pub fn is_quantifier(&self) -> bool {
1171 matches!(
1172 self,
1173 Token::ForAll | Token::Exists | Token::ElementOf | Token::NotElementOf
1174 )
1175 }
1176
1177 pub fn is_set_op(&self) -> bool {
1178 matches!(
1179 self,
1180 Token::Union
1181 | Token::Intersection
1182 | Token::SetMinus
1183 | Token::Subset
1184 | Token::SubsetEq
1185 | Token::Superset
1186 | Token::SupersetEq
1187 )
1188 }
1189
1190 pub fn is_logic_op(&self) -> bool {
1191 matches!(
1192 self,
1193 Token::LogicAnd
1194 | Token::LogicOr
1195 | Token::LogicNot
1196 | Token::LogicXor
1197 | Token::Top
1198 | Token::Bottom
1199 )
1200 }
1201
1202 pub fn is_evidentiality(&self) -> bool {
1203 matches!(
1204 self,
1205 Token::Bang | Token::Question | Token::Tilde | Token::Interrobang | Token::Lozenge
1206 )
1207 }
1208
1209 pub fn is_legion_morpheme(&self) -> bool {
1210 matches!(
1211 self,
1212 Token::LegionField | Token::DirectSum | Token::Interfere | Token::ConfidenceHigh | Token::Distribute | Token::Gather | Token::Broadcast | Token::Consensus | Token::Partial )
1222 }
1223
1224 pub fn is_legion_assign(&self) -> bool {
1225 matches!(
1226 self,
1227 Token::DirectSumEq | Token::PartialEq_ | Token::InterfereEq
1228 )
1229 }
1230
1231 pub fn is_affective(&self) -> bool {
1232 matches!(
1233 self,
1234 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral | Token::IronyMark | Token::IntensityUp | Token::IntensityDown | Token::IntensityMax | Token::FormalRegister | Token::InformalRegister | Token::EmotionJoy | Token::EmotionSadness | Token::EmotionAnger | Token::EmotionFear | Token::EmotionSurprise | Token::EmotionLove | Token::ConfidenceHigh | Token::ConfidenceMedium | Token::ConfidenceLow )
1259 }
1260
1261 pub fn is_sentiment(&self) -> bool {
1262 matches!(
1263 self,
1264 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral
1265 )
1266 }
1267
1268 pub fn is_emotion(&self) -> bool {
1269 matches!(
1270 self,
1271 Token::EmotionJoy
1272 | Token::EmotionSadness
1273 | Token::EmotionAnger
1274 | Token::EmotionFear
1275 | Token::EmotionSurprise
1276 | Token::EmotionLove
1277 )
1278 }
1279
1280 pub fn is_intensity(&self) -> bool {
1281 matches!(
1282 self,
1283 Token::IntensityUp | Token::IntensityDown | Token::IntensityMax
1284 )
1285 }
1286}
1287
1288pub struct Lexer<'a> {
1290 inner: logos::Lexer<'a, Token>,
1291 buffer: Vec<Option<(Token, Span)>>,
1293}
1294
1295impl<'a> Lexer<'a> {
1296 pub fn new(source: &'a str) -> Self {
1297 Self {
1298 inner: Token::lexer(source),
1299 buffer: Vec::new(),
1300 }
1301 }
1302
1303 fn read_next(&mut self) -> Option<(Token, Span)> {
1305 match self.inner.next() {
1306 Some(Ok(token)) => {
1307 let span = self.inner.span();
1308 Some((token, Span::new(span.start, span.end)))
1309 }
1310 Some(Err(_)) => {
1311 self.read_next()
1313 }
1314 None => None,
1315 }
1316 }
1317
1318 pub fn next_token(&mut self) -> Option<(Token, Span)> {
1319 if !self.buffer.is_empty() {
1320 return self.buffer.remove(0);
1323 }
1324 self.read_next()
1325 }
1326
1327 pub fn peek(&mut self) -> Option<&(Token, Span)> {
1328 self.peek_n(0)
1329 }
1330
1331 pub fn peek_n(&mut self, n: usize) -> Option<&(Token, Span)> {
1333 while self.buffer.len() <= n {
1335 let token = self.read_next();
1336 self.buffer.push(token);
1337 }
1338 self.buffer.get(n).and_then(|opt| opt.as_ref())
1339 }
1340
1341 pub fn span(&self) -> Span {
1342 let span = self.inner.span();
1343 Span::new(span.start, span.end)
1344 }
1345}
1346
1347#[cfg(test)]
1348mod tests {
1349 use super::*;
1350
1351 #[test]
1352 fn test_morphemes() {
1353 let mut lexer = Lexer::new("τ φ σ ρ λ Σ Π ⌛");
1354 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1355 assert!(matches!(lexer.next_token(), Some((Token::Phi, _))));
1356 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
1357 assert!(matches!(lexer.next_token(), Some((Token::Rho, _))));
1358 assert!(matches!(lexer.next_token(), Some((Token::Lambda, _))));
1359 assert!(matches!(lexer.next_token(), Some((Token::Sigma, _))));
1360 assert!(matches!(lexer.next_token(), Some((Token::Pi, _))));
1361 assert!(matches!(lexer.next_token(), Some((Token::Hourglass, _))));
1362 }
1363
1364 #[test]
1365 fn test_evidentiality() {
1366 let mut lexer = Lexer::new("value! uncertain? reported~ paradox‽");
1367 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "value"));
1368 assert!(matches!(lexer.next_token(), Some((Token::Bang, _))));
1369 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "uncertain"));
1370 assert!(matches!(lexer.next_token(), Some((Token::Question, _))));
1371 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "reported"));
1372 assert!(matches!(lexer.next_token(), Some((Token::Tilde, _))));
1373 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "paradox"));
1374 assert!(matches!(lexer.next_token(), Some((Token::Interrobang, _))));
1375 }
1376
1377 #[test]
1378 fn test_pipe_chain() {
1379 let mut lexer = Lexer::new("data|τ{f}|φ{p}|σ");
1380 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "data"));
1381 assert!(matches!(lexer.next_token(), Some((Token::Pipe, _))));
1382 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1383 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1384 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1385 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1386 }
1387
1388 #[test]
1389 fn test_numbers() {
1390 let mut lexer = Lexer::new("42 0b1010 0o52 0x2A 0v22 0s42 3.14");
1391 assert!(matches!(lexer.next_token(), Some((Token::IntLit(s), _)) if s == "42"));
1392 assert!(matches!(lexer.next_token(), Some((Token::BinaryLit(s), _)) if s == "0b1010"));
1393 assert!(matches!(lexer.next_token(), Some((Token::OctalLit(s), _)) if s == "0o52"));
1394 assert!(matches!(lexer.next_token(), Some((Token::HexLit(s), _)) if s == "0x2A"));
1395 assert!(matches!(lexer.next_token(), Some((Token::VigesimalLit(s), _)) if s == "0v22"));
1396 assert!(matches!(lexer.next_token(), Some((Token::SexagesimalLit(s), _)) if s == "0s42"));
1397 assert!(matches!(lexer.next_token(), Some((Token::FloatLit(s), _)) if s == "3.14"));
1398 }
1399
1400 #[test]
1401 fn test_incorporation() {
1402 let mut lexer = Lexer::new("file·open·read");
1403 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "file"));
1404 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1405 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "open"));
1406 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1407 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "read"));
1408 }
1409
1410 #[test]
1411 fn test_special_symbols() {
1412 let mut lexer = Lexer::new("∅ ◯ ∞");
1413 assert!(matches!(lexer.next_token(), Some((Token::Empty, _))));
1414 assert!(matches!(lexer.next_token(), Some((Token::Circle, _))));
1415 assert!(matches!(lexer.next_token(), Some((Token::Infinity, _))));
1416 }
1417
1418 #[test]
1419 fn test_quantifiers() {
1420 let mut lexer = Lexer::new("∀x ∃y x∈S y∉T");
1421 assert!(matches!(lexer.next_token(), Some((Token::ForAll, _))));
1422 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1423 assert!(matches!(lexer.next_token(), Some((Token::Exists, _))));
1424 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "y"));
1425 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1426 assert!(matches!(lexer.next_token(), Some((Token::ElementOf, _))));
1427 }
1428
1429 #[test]
1430 fn test_set_operations() {
1431 let mut lexer = Lexer::new("A∪B A∩B A∖B A⊂B A⊆B");
1432 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1433 assert!(matches!(lexer.next_token(), Some((Token::Union, _))));
1434 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "B"));
1435 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1436 assert!(matches!(lexer.next_token(), Some((Token::Intersection, _))));
1437 }
1438
1439 #[test]
1440 fn test_logic_operators() {
1441 let mut lexer = Lexer::new("p∧q p∨q ¬p p⊻q ⊤ ⊥");
1442 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1443 assert!(matches!(lexer.next_token(), Some((Token::LogicAnd, _))));
1444 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "q"));
1445 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1446 assert!(matches!(lexer.next_token(), Some((Token::LogicOr, _))));
1447 }
1448
1449 #[test]
1450 fn test_analysis_operators() {
1451 let mut lexer = Lexer::new("∫f ∂g √x ∛y f∘g");
1452 assert!(matches!(lexer.next_token(), Some((Token::Integral, _))));
1453 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1454 assert!(matches!(lexer.next_token(), Some((Token::Partial, _))));
1455 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "g"));
1456 assert!(matches!(lexer.next_token(), Some((Token::Sqrt, _))));
1457 }
1458
1459 #[test]
1460 fn test_additional_morphemes() {
1461 let mut lexer = Lexer::new("δ ε ω α ζ");
1462 assert!(matches!(lexer.next_token(), Some((Token::Delta, _))));
1463 assert!(matches!(lexer.next_token(), Some((Token::Epsilon, _))));
1464 assert!(matches!(lexer.next_token(), Some((Token::Omega, _))));
1465 assert!(matches!(lexer.next_token(), Some((Token::Alpha, _))));
1466 assert!(matches!(lexer.next_token(), Some((Token::Zeta, _))));
1467 }
1468
1469 #[test]
1470 fn test_ffi_keywords() {
1471 let mut lexer = Lexer::new("extern unsafe");
1472 assert!(matches!(lexer.next_token(), Some((Token::Extern, _))));
1473 assert!(matches!(lexer.next_token(), Some((Token::Unsafe, _))));
1474 }
1475
1476 #[test]
1477 fn test_parallel_morphemes() {
1478 let mut lexer = Lexer::new("∥ parallel ⊛ gpu");
1479 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1480 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1481 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1482 assert!(matches!(lexer.next_token(), Some((Token::Gpu, _))));
1483 }
1484
1485 #[test]
1486 fn test_lifetime_labels() {
1487 let mut lexer = Lexer::new("'outer: loop { break 'outer }");
1489 assert!(matches!(lexer.next_token(), Some((Token::Lifetime(s), _)) if s == "outer"));
1490 assert!(matches!(lexer.next_token(), Some((Token::Colon, _))));
1491 assert!(matches!(lexer.next_token(), Some((Token::Loop, _))));
1492 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1493 assert!(matches!(lexer.next_token(), Some((Token::Break, _))));
1494 assert!(matches!(lexer.next_token(), Some((Token::Lifetime(s), _)) if s == "outer"));
1495 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1496 }
1497
1498 #[test]
1501 fn test_string_escape_sequences() {
1502 let mut lexer = Lexer::new(r#""hello\nworld""#);
1504 match lexer.next_token() {
1505 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\nworld"),
1506 other => panic!("Expected StringLit, got {:?}", other),
1507 }
1508
1509 let mut lexer = Lexer::new(r#""hello\tworld""#);
1511 match lexer.next_token() {
1512 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\tworld"),
1513 other => panic!("Expected StringLit, got {:?}", other),
1514 }
1515
1516 let mut lexer = Lexer::new(r#""hello\rworld""#);
1518 match lexer.next_token() {
1519 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\rworld"),
1520 other => panic!("Expected StringLit, got {:?}", other),
1521 }
1522
1523 let mut lexer = Lexer::new(r#""hello\\world""#);
1525 match lexer.next_token() {
1526 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\\world"),
1527 other => panic!("Expected StringLit, got {:?}", other),
1528 }
1529
1530 let mut lexer = Lexer::new(r#""hello\"world""#);
1532 match lexer.next_token() {
1533 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\"world"),
1534 other => panic!("Expected StringLit, got {:?}", other),
1535 }
1536
1537 let mut lexer = Lexer::new(r#""hello\0world""#);
1539 match lexer.next_token() {
1540 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\0world"),
1541 other => panic!("Expected StringLit, got {:?}", other),
1542 }
1543 }
1544
1545 #[test]
1546 fn test_string_hex_escape() {
1547 let mut lexer = Lexer::new(r#""hello\x41world""#);
1549 match lexer.next_token() {
1550 Some((Token::StringLit(s), _)) => assert_eq!(s, "helloAworld"),
1551 other => panic!("Expected StringLit, got {:?}", other),
1552 }
1553 }
1554
1555 #[test]
1556 fn test_string_unicode_escape() {
1557 let mut lexer = Lexer::new(r#""hello\u{1F600}world""#);
1559 match lexer.next_token() {
1560 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello😀world"),
1561 other => panic!("Expected StringLit, got {:?}", other),
1562 }
1563
1564 let mut lexer = Lexer::new(r#""\u{03C4}""#);
1566 match lexer.next_token() {
1567 Some((Token::StringLit(s), _)) => assert_eq!(s, "τ"),
1568 other => panic!("Expected StringLit, got {:?}", other),
1569 }
1570 }
1571
1572 #[test]
1573 fn test_char_escape_sequences() {
1574 let mut lexer = Lexer::new(r"'\n'");
1575 match lexer.next_token() {
1576 Some((Token::CharLit(c), _)) => assert_eq!(c, '\n'),
1577 other => panic!("Expected CharLit, got {:?}", other),
1578 }
1579
1580 let mut lexer = Lexer::new(r"'\t'");
1581 match lexer.next_token() {
1582 Some((Token::CharLit(c), _)) => assert_eq!(c, '\t'),
1583 other => panic!("Expected CharLit, got {:?}", other),
1584 }
1585
1586 let mut lexer = Lexer::new(r"'\\'");
1587 match lexer.next_token() {
1588 Some((Token::CharLit(c), _)) => assert_eq!(c, '\\'),
1589 other => panic!("Expected CharLit, got {:?}", other),
1590 }
1591 }
1592
1593 #[test]
1594 fn test_raw_string() {
1595 let mut lexer = Lexer::new(r#"r"hello\nworld""#);
1597 match lexer.next_token() {
1598 Some((Token::RawStringLit(s), _)) => assert_eq!(s, r"hello\nworld"),
1599 other => panic!("Expected RawStringLit, got {:?}", other),
1600 }
1601 }
1602
1603 #[test]
1604 fn test_raw_string_delimited() {
1605 let mut lexer = Lexer::new(r##"r#"hello "world""#"##);
1607 match lexer.next_token() {
1608 Some((Token::RawStringDelimited(s), _)) => assert_eq!(s, r#"hello "world""#),
1609 other => panic!("Expected RawStringDelimited, got {:?}", other),
1610 }
1611 }
1612
1613 #[test]
1614 fn test_byte_string() {
1615 let mut lexer = Lexer::new(r#"b"hello""#);
1616 match lexer.next_token() {
1617 Some((Token::ByteStringLit(bytes), _)) => {
1618 assert_eq!(bytes, vec![104, 101, 108, 108, 111]); }
1620 other => panic!("Expected ByteStringLit, got {:?}", other),
1621 }
1622 }
1623
1624 #[test]
1625 fn test_interpolated_string() {
1626 let mut lexer = Lexer::new(r#"f"hello {name}""#);
1627 match lexer.next_token() {
1628 Some((Token::InterpolatedStringLit(s), _)) => assert_eq!(s, "hello {name}"),
1629 other => panic!("Expected InterpolatedStringLit, got {:?}", other),
1630 }
1631 }
1632
1633 #[test]
1634 fn test_sigil_string_sql() {
1635 let mut lexer = Lexer::new(r#"σ"SELECT * FROM {table}""#);
1636 match lexer.next_token() {
1637 Some((Token::SigilStringSql(s), _)) => assert_eq!(s, "SELECT * FROM {table}"),
1638 other => panic!("Expected SigilStringSql, got {:?}", other),
1639 }
1640 }
1641
1642 #[test]
1643 fn test_sigil_string_route() {
1644 let mut lexer = Lexer::new(r#"ρ"/api/v1/{resource}/{id}""#);
1645 match lexer.next_token() {
1646 Some((Token::SigilStringRoute(s), _)) => assert_eq!(s, "/api/v1/{resource}/{id}"),
1647 other => panic!("Expected SigilStringRoute, got {:?}", other),
1648 }
1649 }
1650
1651 #[test]
1652 fn test_unicode_in_strings() {
1653 let mut lexer = Lexer::new(r#""τφσρ 你好 🦀""#);
1655 match lexer.next_token() {
1656 Some((Token::StringLit(s), _)) => assert_eq!(s, "τφσρ 你好 🦀"),
1657 other => panic!("Expected StringLit, got {:?}", other),
1658 }
1659 }
1660
1661 #[test]
1662 fn test_empty_string() {
1663 let mut lexer = Lexer::new(r#""""#);
1664 match lexer.next_token() {
1665 Some((Token::StringLit(s), _)) => assert_eq!(s, ""),
1666 other => panic!("Expected empty StringLit, got {:?}", other),
1667 }
1668 }
1669
1670 #[test]
1671 fn test_escape_sequence_helper() {
1672 assert_eq!(process_escape_sequences(r"hello\nworld"), "hello\nworld");
1674 assert_eq!(process_escape_sequences(r"hello\tworld"), "hello\tworld");
1675 assert_eq!(process_escape_sequences(r"hello\\world"), "hello\\world");
1676 assert_eq!(process_escape_sequences(r#"hello\"world"#), "hello\"world");
1677 assert_eq!(process_escape_sequences(r"hello\x41world"), "helloAworld");
1678 assert_eq!(
1679 process_escape_sequences(r"hello\u{1F600}world"),
1680 "hello😀world"
1681 );
1682 }
1683}