1use crate::span::Span;
6use logos::Logos;
7
8fn process_escape_sequences(s: &str) -> String {
12 let mut result = String::with_capacity(s.len());
13 let mut chars = s.chars().peekable();
14
15 while let Some(c) = chars.next() {
16 if c == '\\' {
17 match chars.next() {
18 Some('\n') => {
19 while let Some(&c) = chars.peek() {
21 if c == ' ' || c == '\t' {
22 chars.next();
23 } else {
24 break;
25 }
26 }
27 }
28 Some('n') => result.push('\n'),
29 Some('t') => result.push('\t'),
30 Some('r') => result.push('\r'),
31 Some('\\') => result.push('\\'),
32 Some('"') => result.push('"'),
33 Some('\'') => result.push('\''),
34 Some('0') => result.push('\0'),
35 Some('x') => {
36 let mut hex = String::new();
38 for _ in 0..2 {
39 if let Some(&c) = chars.peek() {
40 if c.is_ascii_hexdigit() {
41 hex.push(chars.next().unwrap());
42 }
43 }
44 }
45 if let Ok(val) = u8::from_str_radix(&hex, 16) {
46 result.push(val as char);
47 }
48 }
49 Some('u') => {
50 if chars.peek() == Some(&'{') {
52 chars.next(); let mut hex = String::new();
54 while let Some(&c) = chars.peek() {
55 if c == '}' {
56 chars.next();
57 break;
58 }
59 if c.is_ascii_hexdigit() {
60 hex.push(chars.next().unwrap());
61 } else {
62 break;
63 }
64 }
65 if let Ok(val) = u32::from_str_radix(&hex, 16) {
66 if let Some(c) = char::from_u32(val) {
67 result.push(c);
68 }
69 }
70 }
71 }
72 Some(other) => {
73 result.push('\\');
75 result.push(other);
76 }
77 None => result.push('\\'),
78 }
79 } else {
80 result.push(c);
81 }
82 }
83 result
84}
85
86fn process_byte_escape_sequences(s: &str) -> Vec<u8> {
88 let mut result = Vec::with_capacity(s.len());
89 let mut chars = s.chars().peekable();
90
91 while let Some(c) = chars.next() {
92 if c == '\\' {
93 match chars.next() {
94 Some('\n') => {
95 while let Some(&c) = chars.peek() {
97 if c == ' ' || c == '\t' {
98 chars.next();
99 } else {
100 break;
101 }
102 }
103 }
104 Some('n') => result.push(b'\n'),
105 Some('t') => result.push(b'\t'),
106 Some('r') => result.push(b'\r'),
107 Some('\\') => result.push(b'\\'),
108 Some('"') => result.push(b'"'),
109 Some('\'') => result.push(b'\''),
110 Some('0') => result.push(0),
111 Some('x') => {
112 let mut hex = String::new();
114 for _ in 0..2 {
115 if let Some(&c) = chars.peek() {
116 if c.is_ascii_hexdigit() {
117 hex.push(chars.next().unwrap());
118 }
119 }
120 }
121 if let Ok(val) = u8::from_str_radix(&hex, 16) {
122 result.push(val);
123 }
124 }
125 Some(other) => {
126 result.push(b'\\');
128 if other.is_ascii() {
129 result.push(other as u8);
130 }
131 }
132 None => result.push(b'\\'),
133 }
134 } else if c.is_ascii() {
135 result.push(c as u8);
136 }
137 }
139 result
140}
141
142fn block_comment_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
147 let remainder = lex.remainder();
148
149 if let Some(end_pos) = remainder.find("*/") {
151 let content = &remainder[..end_pos];
152 lex.bump(end_pos + 2);
154 Some(content.to_string())
155 } else {
156 let len = remainder.len();
158 lex.bump(len);
159 Some(remainder.to_string())
160 }
161}
162
163fn raw_string_delimited_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
164 let remainder = lex.remainder();
165
166 if let Some(end_pos) = remainder.find("\"#") {
168 let content = &remainder[..end_pos];
169 lex.bump(end_pos + 2);
171 Some(content.to_string())
172 } else {
173 None
174 }
175}
176
177fn multiline_string_callback(lex: &mut logos::Lexer<'_, Token>) -> Option<String> {
180 let remainder = lex.remainder();
181
182 if let Some(end_pos) = remainder.find("\"\"\"") {
184 let content = &remainder[..end_pos];
185 lex.bump(end_pos + 3);
187 Some(process_escape_sequences(content))
188 } else {
189 None
191 }
192}
193
194fn process_char_escape(s: &str) -> char {
196 let mut chars = s.chars();
197 match chars.next() {
198 Some('\\') => match chars.next() {
199 Some('n') => '\n',
200 Some('t') => '\t',
201 Some('r') => '\r',
202 Some('\\') => '\\',
203 Some('"') => '"',
204 Some('\'') => '\'',
205 Some('0') => '\0',
206 Some('x') => {
207 let hex: String = chars.take(2).collect();
208 u8::from_str_radix(&hex, 16)
209 .map(|v| v as char)
210 .unwrap_or('?')
211 }
212 Some('u') => {
213 if chars.next() == Some('{') {
214 let hex: String = chars.take_while(|&c| c != '}').collect();
215 u32::from_str_radix(&hex, 16)
216 .ok()
217 .and_then(char::from_u32)
218 .unwrap_or('?')
219 } else {
220 '?'
221 }
222 }
223 Some(c) => c,
224 None => '?',
225 },
226 Some(c) => c,
227 None => '?',
228 }
229}
230
231fn process_byte_char_escape(s: &str) -> u8 {
233 let mut chars = s.chars();
234 match chars.next() {
235 Some('\\') => match chars.next() {
236 Some('n') => b'\n',
237 Some('t') => b'\t',
238 Some('r') => b'\r',
239 Some('\\') => b'\\',
240 Some('"') => b'"',
241 Some('\'') => b'\'',
242 Some('0') => b'\0',
243 Some('x') => {
244 let hex: String = chars.take(2).collect();
245 u8::from_str_radix(&hex, 16).unwrap_or(b'?')
246 }
247 Some(c) => c as u8,
248 None => b'?',
249 },
250 Some(c) => c as u8,
251 None => b'?',
252 }
253}
254
255#[derive(Logos, Debug, Clone, PartialEq)]
257#[logos(skip r"[ \t\r\n\f]+")]
258pub enum Token {
259 #[regex(r"//!![^\n]*", priority = 10, callback = |lex| lex.slice()[4..].trim().to_string())]
267 DocCommentVerifiedInner(String),
268 #[regex(r"//![^!\n][^\n]*", priority = 8, callback = |lex| lex.slice()[3..].trim().to_string())]
269 DocCommentVerified(String),
270
271 #[regex(r"//~~[^\n]*", priority = 10, callback = |lex| lex.slice()[4..].trim().to_string())]
274 DocCommentReportedInner(String),
275 #[regex(r"//~[^~\n][^\n]*", priority = 8, callback = |lex| lex.slice()[3..].trim().to_string())]
276 DocCommentReported(String),
277
278 #[regex(r"//\?\?[^\n]*", priority = 10, callback = |lex| lex.slice()[4..].trim().to_string())]
281 DocCommentUncertainInner(String),
282 #[regex(r"//\?[^\?\n][^\n]*", priority = 8, callback = |lex| lex.slice()[3..].trim().to_string())]
283 DocCommentUncertain(String),
284
285 #[regex(r"//◊◊[^\n]*", priority = 10, callback = |lex| {
288 lex.slice().trim_start_matches("//◊◊").trim().to_string()
290 })]
291 DocCommentPredictedInner(String),
292 #[regex(r"//◊[^◊\n][^\n]*", priority = 8, callback = |lex| {
293 lex.slice().trim_start_matches("//◊").trim().to_string()
294 })]
295 DocCommentPredicted(String),
296
297 #[regex(r"//‽‽[^\n]*", priority = 10, callback = |lex| {
300 lex.slice().trim_start_matches("//‽‽").trim().to_string()
302 })]
303 DocCommentParadoxInner(String),
304 #[regex(r"//‽[^‽\n][^\n]*", priority = 8, callback = |lex| {
305 lex.slice().trim_start_matches("//‽").trim().to_string()
306 })]
307 DocCommentParadox(String),
308
309 #[regex(r"///[^\n]*", priority = 5, callback = |lex| lex.slice()[3..].trim().to_string())]
313 DocComment(String),
314
315 #[regex(r"//[^\n]*", priority = 1, callback = |lex| lex.slice().to_string())]
317 LineComment(String),
318
319 #[regex(r"~~[^\n]*", |lex| lex.slice().to_string())]
321 TildeComment(String),
322
323 #[token("/*", block_comment_callback)]
325 BlockComment(String),
326
327 #[token("fn", |lex| lex.slice().to_string())]
331 #[token("let", |lex| lex.slice().to_string())]
332 #[token("mut", |lex| lex.slice().to_string())]
333 #[token("struct", |lex| lex.slice().to_string())]
334 #[token("enum", |lex| lex.slice().to_string())]
335 #[token("trait", |lex| lex.slice().to_string())]
336 #[token("impl", |lex| lex.slice().to_string())]
337 #[token("mod", |lex| lex.slice().to_string())]
338 #[token("use", |lex| lex.slice().to_string())]
339 #[token("pub", |lex| lex.slice().to_string())]
340 #[token("if", |lex| lex.slice().to_string())]
341 #[token("else", |lex| lex.slice().to_string())]
342 #[token("match", |lex| lex.slice().to_string())]
343 #[token("while", |lex| lex.slice().to_string())]
344 #[token("for", |lex| lex.slice().to_string())]
345 #[token("in", |lex| lex.slice().to_string())]
346 #[token("break", |lex| lex.slice().to_string())]
347 #[token("continue", |lex| lex.slice().to_string())]
348 #[token("return", |lex| lex.slice().to_string())]
349 DeprecatedRustKeyword(String),
350
351 #[token("&mut")]
353 DeprecatedAmpMut,
354
355 #[token("rite")] Fn,
358 #[token("async")]
359 #[token("⌛")] Async,
361 #[token("≔")] Let,
363 #[token("Δ")] #[token("vary")] Mut,
366 #[token("const")]
367 #[token("◆")] Const,
369 #[token("linear")]
370 Linear,
371 #[token("affine")]
372 Affine,
373 #[token("relevant")]
374 Relevant,
375 #[token("type")]
376 Type,
377 #[token("sigil")]
378 #[token("Σ")]
379 Struct,
380 #[token("ᛈ")] Enum,
382 #[token("Θ")] #[token("aspect")] Trait,
385 #[token("⊢")] Impl,
387 #[token("scroll")]
388 Mod,
389 #[token("invoke")]
390 Use,
391 #[token("☉")] Pub,
393 #[token("actor")]
394 Actor,
395 #[token("saga")]
396 Saga,
397 #[token("scope")]
398 Scope,
399 #[token("rune")]
400 Rune,
401 #[token("macro")]
402 Macro,
403 #[token("macro_rules")]
404 MacroRules,
405
406 #[token("⎇")] If,
414 #[token("⎉")] Else,
416 #[token("⌥")] Match,
418 #[token("loop")]
419 #[token("forever")] Loop, #[token("⟳")] While,
423 #[token("⤺")] Return,
429 #[token("yield")]
430 Yield,
431 #[token("await")]
432 Await,
433
434 #[token("self")]
436 #[token("this")] SelfLower,
438 #[token("Self")]
440 #[token("This")] SelfUpper,
442 #[token("super")]
443 Super,
444 #[token("tome")]
445 Crate,
446 #[token("where")]
447 #[token("∋")] Where,
449 #[token("as")]
450 As,
451 #[token("dyn")]
452 Dyn,
453 #[token("move")]
454 Move,
455 #[token("ref")]
456 Ref,
457 #[token("static")]
458 Static,
459 #[token("unsafe")]
460 Unsafe,
461 #[token("extern")]
462 Extern,
463 #[token("asm")]
464 Asm,
465 #[token("volatile")]
466 Volatile,
467 #[token("naked")]
468 Naked,
469 #[token("packed")]
470 Packed,
471 #[token("simd")]
472 Simd,
473 #[token("atomic")]
474 Atomic,
475 #[token("derive")]
476 Derive,
477 #[token("on")]
478 On,
479
480 #[token("alter")]
482 Alter,
483 #[token("switch")]
484 Switch,
485 #[token("headspace")]
486 Headspace,
487 #[token("cocon")]
488 CoCon,
489 #[token("reality")]
490 Reality,
491 #[token("split")]
492 Split,
493 #[token("trigger")]
494 Trigger,
495 #[token("layer")]
496 Layer,
497 #[token("location")]
498 Location,
499 #[token("states")]
500 States,
501 #[token("anima")]
502 Anima,
503 #[token("to")]
504 To,
505 #[token("from")]
506 From,
507
508 #[token("@!")]
510 AlterSourceFronting,
511 #[token("@~")]
512 AlterSourceCoCon,
513 #[token("@?")]
514 AlterSourceDormant,
515 #[token("@‽")]
516 AlterSourceBlended,
517
518 #[token("true")]
520 #[token("yay")]
521 #[token("yea")]
522 True,
523 #[token("false")]
524 #[token("nay")]
525 False,
526
527 #[token("null")]
529 Null,
530
531 #[token("τ")]
533 #[token("Τ")]
534 Tau, #[token("φ")]
537 #[token("Φ")]
538 Phi, #[token("σ")]
541 Sigma, #[token("ρ")]
544 #[token("Ρ")]
545 Rho, #[token("Λ")]
548 Lambda, #[token("λ")]
551 LambdaExpr, #[token("Π")]
554 Pi, #[token("δ")]
560 Delta, #[token("ε")]
563 Epsilon, #[token("ω")]
566 #[token("Ω")]
567 Omega, #[token("α")]
570 Alpha, #[token("ζ")]
573 Zeta, #[token("μ")]
577 #[token("Μ")]
578 Mu, #[token("χ")]
581 #[token("Χ")]
582 Chi, #[token("ν")]
585 #[token("Ν")]
586 Nu, #[token("ξ")]
589 #[token("Ξ")]
590 Xi, #[token("ψ")]
593 #[token("Ψ")]
594 Psi, #[token("θ")]
597 Theta, #[token("κ")]
600 #[token("Κ")]
601 Kappa, #[token("∥")]
605 #[token("parallel")]
606 Parallel, #[token("gpu")]
609 Gpu, #[token("⊛")]
612 Convolve, #[token("∀")]
616 #[token("each")] ForAll, #[token("∃")]
620 Exists, #[token("∈")]
623 #[token("of")] ElementOf, #[token("∉")]
627 NotElementOf, #[token("∪")]
631 Union, #[token("∩")]
634 Intersection, #[token("∖")]
637 SetMinus, #[token("⊂")]
640 Subset, #[token("⊆")]
643 SubsetEq, #[token("⊃")]
646 Superset, #[token("⊇")]
649 SupersetEq, #[token("∧")]
653 LogicAnd, #[token("∨")]
656 LogicOr, #[token("¬")]
659 LogicNot, #[token("⊻")]
662 LogicXor, #[token("⊤")]
665 Top, #[token("⊥")]
668 Bottom, #[token("⋏")]
672 BitwiseAndSymbol, #[token("⋎")]
675 BitwiseOrSymbol, #[token("⊙")]
678 CircledDot, #[token("∷")]
684 TypeAnnotation, #[token("∫")]
688 Integral, #[token("∂")]
691 Partial, #[token("√")]
694 Sqrt, #[token("∛")]
697 Cbrt, #[token("∇")]
700 Nabla, #[token("⍋")]
704 GradeUp, #[token("⍒")]
707 GradeDown, #[token("⌽")]
710 Rotate, #[token("↻")]
713 #[token("⊳")] CycleArrow, #[token("⌺")]
717 QuadDiamond, #[token("⊞")]
720 SquaredPlus, #[token("⍳")]
723 Iota, #[token("∘")]
727 Compose, #[token("⊗")]
730 #[token("⊲")] Tensor, #[token("⊕")]
734 DirectSum, #[token("⋈")]
738 Bowtie, #[token("⋳")]
741 ElementSmallVerticalBar, #[token("⊔")]
744 SquareCup, #[token("⊓")]
747 SquareCap, #[token("‽")]
752 Interrobang, #[token("◊")]
755 Lozenge, #[token("□")]
758 BoxSquare, #[token("∿")]
764 #[token("legion_field")]
765 LegionField, #[token("⫰")]
768 #[token("interfere")]
769 Interfere, #[token("⟁")]
772 #[token("distribute")]
773 Distribute, #[token("⟀")]
776 #[token("gather")]
777 Gather, #[token("↠")]
780 #[token("broadcast")]
781 Broadcast, #[token("⇢")]
784 #[token("consensus")]
785 Consensus, #[token("⊕=")]
789 DirectSumEq, #[token("∂=")]
792 PartialEq_, #[token("⫰=")]
795 InterfereEq, #[token("⊖")]
800 AffectNegative, #[token("⊜")]
803 AffectNeutral, #[token("⸮")]
809 IronyMark, #[token("↑")]
813 IntensityUp, #[token("↓")]
816 IntensityDown, #[token("⇈")]
819 IntensityMax, #[token("♔")]
823 FormalRegister, #[token("♟")]
826 InformalRegister, #[token("☺")]
830 EmotionJoy, #[token("☹")]
833 EmotionSadness, #[token("⚡")]
836 EmotionAnger, #[token("❄")]
839 EmotionFear, #[token("✦")]
842 EmotionSurprise, #[token("♡")]
845 EmotionLove, #[token("◉")]
849 ConfidenceHigh, #[token("◎")]
852 ConfidenceMedium, #[token("○")]
855 ConfidenceLow, #[token("·ing")]
859 AspectProgressive, #[token("·ed")]
862 AspectPerfective, #[token("·able")]
865 AspectPotential, #[token("·ive")]
868 AspectResultative, #[token("|")]
872 Pipe,
873 #[token("·")]
874 MiddleDot, #[token("->")]
876 #[token("→")] Arrow,
878 #[token("=>")]
879 FatArrow,
880 #[token("<-")]
881 LeftArrow,
882 #[token("==")]
883 EqEq,
884 #[token("!=")]
885 NotEq,
886 #[token("<=")]
887 LtEq,
888 #[token(">=")]
889 GtEq,
890 #[token("<")]
891 Lt,
892 #[token(">")]
893 Gt,
894 #[token("+")]
895 Plus,
896 #[token("-")]
897 Minus,
898 #[token("*")]
899 Star,
900 #[token("/")]
901 Slash,
902 #[token("%")]
903 Percent,
904 #[token("**")]
905 StarStar, #[token("&&")]
907 AndAnd,
908 #[token("||")]
909 OrOr,
910 #[token("!")]
911 Bang, #[token("?")]
913 Question, #[token("~")]
915 Tilde, #[token("&")]
917 Amp,
918 #[token("^")]
919 Caret,
920 #[token("<<=")]
921 ShlEq,
922 #[token(">>=")]
923 ShrEq,
924 #[token("<<")]
925 Shl,
926 #[token(">>")]
927 Shr,
928 #[token("=")]
929 Eq,
930 #[token("+=")]
931 PlusEq,
932 #[token("-=")]
933 MinusEq,
934 #[token("*=")]
935 StarEq,
936 #[token("/=")]
937 SlashEq,
938 #[token("%=")]
939 PercentEq,
940 #[token("|=")]
941 PipeEq,
942 #[token("&=")]
943 AmpEq,
944 #[token("^=")]
945 CaretEq,
946 #[token("..")]
947 DotDot,
948 #[token("..=")]
949 DotDotEq,
950 #[token("++")]
951 PlusPlus, #[token("::")]
954 DeprecatedColonColon,
955 #[token(":")]
956 Colon,
957 #[token(";")]
958 Semi,
959 #[token(",")]
960 Comma,
961 #[token(".")]
962 Dot,
963 #[token("@")]
964 At,
965 #[token("$")]
966 Dollar, #[token("#!")]
968 HashBang, #[token("#")]
970 Hash,
971 #[token("_", priority = 3)]
972 Underscore,
973
974 #[token("(")]
976 LParen,
977 #[token(")")]
978 RParen,
979 #[token("{")]
980 LBrace,
981 #[token("}")]
982 RBrace,
983 #[token("[")]
984 LBracket,
985 #[token("]")]
986 RBracket,
987
988 #[token("∅")]
990 Empty, #[token("◯")]
992 Circle, #[token("∞")]
994 Infinity, #[token("⇒")]
998 ProtoSend, #[token("⇐")]
1001 ProtoRecv, #[token("≋")]
1004 ProtoStream, #[token("⊸")]
1007 ProtoConnect, #[token("⏱")]
1010 ProtoTimeout, #[token("send")]
1016 Send,
1017 #[token("recv")]
1018 Recv,
1019 #[token("stream")]
1020 Stream,
1021 #[token("connect")]
1022 Connect,
1023 #[token("close")]
1024 Close,
1025 #[token("timeout")]
1026 Timeout,
1027 #[token("retry")]
1028 Retry,
1029 #[token("header")]
1030 Header,
1031 #[token("body")]
1032 Body,
1033
1034 #[token("http")]
1036 Http,
1037 #[token("https")]
1038 Https,
1039 #[token("ws")]
1040 Ws,
1041 #[token("wss")]
1042 Wss,
1043 #[token("grpc")]
1044 Grpc,
1045 #[token("kafka")]
1046 Kafka,
1047 #[token("amqp")]
1048 Amqp,
1049 #[token("graphql")]
1050 GraphQL,
1051
1052 #[regex(r"0b[01_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
1055 BinaryLit(String),
1056
1057 #[regex(r"0o[0-7_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
1059 OctalLit(String),
1060
1061 #[regex(r"0x[0-9a-fA-F_]+(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
1063 HexLit(String),
1064
1065 #[regex(r"0v[0-9a-jA-J_]+", |lex| lex.slice().to_string())]
1067 VigesimalLit(String),
1068
1069 #[regex(r"0s[0-9a-zA-Z_]+", |lex| lex.slice().to_string())]
1071 SexagesimalLit(String),
1072
1073 #[regex(r"0z[0-9a-bA-B_]+", |lex| lex.slice().to_string())]
1075 DuodecimalLit(String),
1076
1077 #[regex(r"([0-9][0-9_]*\.[0-9][0-9_]*([eE][+-]?[0-9_]+)?|[0-9][0-9_]*[eE][+-]?[0-9_]+)_?(f16|f32|f64|f128)?", |lex| lex.slice().to_string())]
1080 FloatLit(String),
1081
1082 #[regex(r"[0-9][0-9_]*_?(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize)?", |lex| lex.slice().to_string())]
1085 IntLit(String),
1086
1087 #[regex(r#""([^"\\]|\\(.|\n))*""#, |lex| {
1091 let s = lex.slice();
1092 let inner = &s[1..s.len()-1];
1093 process_escape_sequences(inner)
1094 })]
1095 StringLit(String),
1096
1097 #[token(r#"""""#, multiline_string_callback)]
1099 MultiLineStringLit(String),
1100
1101 #[regex(r#"b"([^"\\]|\\.)*""#, |lex| {
1103 let s = lex.slice();
1104 let inner = &s[2..s.len()-1];
1105 process_byte_escape_sequences(inner)
1106 })]
1107 ByteStringLit(Vec<u8>),
1108
1109 #[regex(r#"f"([^"\\]|\\.)*""#, |lex| {
1111 let s = lex.slice();
1112 let inner = &s[2..s.len()-1];
1113 process_escape_sequences(inner)
1114 })]
1115 InterpolatedStringLit(String),
1116
1117 #[regex(r#"σ"([^"\\]|\\.)*""#, |lex| {
1119 let s = lex.slice();
1120 let start = "σ".len() + 1; let inner = &s[start..s.len()-1];
1123 process_escape_sequences(inner)
1124 })]
1125 SigilStringSql(String),
1126
1127 #[regex(r#"ρ"([^"\\]|\\.)*""#, |lex| {
1129 let s = lex.slice();
1130 let start = "ρ".len() + 1; let inner = &s[start..s.len()-1];
1133 process_escape_sequences(inner)
1134 })]
1135 SigilStringRoute(String),
1136
1137 #[regex(r"'([^'\\]|\\x[0-9a-fA-F]{2}|\\u\{[0-9a-fA-F]{1,6}\}|\\.)'", |lex| {
1140 let s = lex.slice();
1141 let inner = &s[1..s.len()-1];
1142 process_char_escape(inner)
1143 })]
1144 CharLit(char),
1145
1146 #[regex(r"b'([^'\\]|\\x[0-9a-fA-F]{2}|\\.)'", |lex| {
1148 let s = lex.slice();
1149 let inner = &s[2..s.len()-1];
1151 process_byte_char_escape(inner)
1152 })]
1153 ByteCharLit(u8),
1154
1155 #[regex(r#"r"([^"\\]|\\.)*""#, |lex| {
1157 let s = lex.slice();
1158 s[2..s.len()-1].to_string()
1159 })]
1160 RawStringLit(String),
1161
1162 #[token(r##"r#""##, raw_string_delimited_callback)]
1164 RawStringDelimited(String),
1165
1166 #[regex(r"'[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice()[1..].to_string())]
1168 Lifetime(String),
1169
1170 #[regex(r"[a-zA-Z_αΑβΒγΓδΔεΕζΖηΗθΘιΙκΚλΛμΜνΝξΞοΟπΠρΡσΣτΤυΥφΦχΧψΨωΩ][a-zA-Z0-9_αΑβΒγΓδΔεΕζΖηΗθΘιΙκΚλΛμΜνΝξΞοΟπΠρΡσΣτΤυΥφΦχΧψΨωΩ]*", |lex| lex.slice().to_string())]
1174 Ident(String),
1175
1176 #[regex(r"//@\s*rune:\s*[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
1178 RuneAnnotation(String),
1179}
1180
1181impl Token {
1182 pub fn is_keyword(&self) -> bool {
1183 matches!(
1184 self,
1185 Token::Fn
1186 | Token::Async
1187 | Token::Let
1188 | Token::Mut
1189 | Token::Const
1190 | Token::Linear
1191 | Token::Affine
1192 | Token::Relevant
1193 | Token::Type
1194 | Token::Struct
1195 | Token::Enum
1196 | Token::Trait
1197 | Token::Impl
1198 | Token::Mod
1199 | Token::Use
1200 | Token::Pub
1201 | Token::Actor
1202 | Token::Saga
1203 | Token::Scope
1204 | Token::Rune
1205 | Token::If
1206 | Token::Else
1207 | Token::Match
1208 | Token::Loop
1209 | Token::While
1210 | Token::ForAll | Token::ElementOf | Token::Tensor | Token::CycleArrow | Token::Return
1215 | Token::Yield
1216 | Token::Await
1217 ) || self.is_plurality_keyword()
1218 }
1219
1220 pub fn is_plurality_keyword(&self) -> bool {
1221 matches!(
1222 self,
1223 Token::Alter
1224 | Token::Switch
1225 | Token::Headspace
1226 | Token::CoCon
1227 | Token::Reality
1228 | Token::Split
1229 | Token::Trigger
1230 | Token::Layer
1231 | Token::Location
1232 | Token::States
1233 | Token::Anima
1234 | Token::To
1235 | Token::From
1236 )
1237 }
1238
1239 pub fn is_alter_source(&self) -> bool {
1240 matches!(
1241 self,
1242 Token::AlterSourceFronting
1243 | Token::AlterSourceCoCon
1244 | Token::AlterSourceDormant
1245 | Token::AlterSourceBlended
1246 )
1247 }
1248
1249 pub fn is_morpheme(&self) -> bool {
1250 matches!(
1251 self,
1252 Token::Tau | Token::Phi | Token::Sigma | Token::Rho |
1253 Token::Lambda | Token::Pi | Token::Async |
1254 Token::Delta | Token::Epsilon | Token::Omega | Token::Alpha | Token::Zeta |
1255 Token::Mu | Token::Chi | Token::Nu | Token::Xi | Token::Parallel | Token::Gpu | Token::Integral | Token::Partial | Token::Sqrt | Token::Cbrt |
1258 Token::Compose
1259 )
1260 }
1261
1262 pub fn is_aspect(&self) -> bool {
1263 matches!(
1264 self,
1265 Token::AspectProgressive
1266 | Token::AspectPerfective
1267 | Token::AspectPotential
1268 | Token::AspectResultative
1269 )
1270 }
1271
1272 pub fn is_data_op(&self) -> bool {
1273 matches!(
1274 self,
1275 Token::Bowtie | Token::ElementSmallVerticalBar | Token::SquareCup | Token::SquareCap
1276 )
1277 }
1278
1279 pub fn is_bitwise_symbol(&self) -> bool {
1280 matches!(self, Token::BitwiseAndSymbol | Token::BitwiseOrSymbol)
1281 }
1282
1283 pub fn is_quantifier(&self) -> bool {
1284 matches!(
1285 self,
1286 Token::ForAll | Token::Exists | Token::ElementOf | Token::NotElementOf
1287 )
1288 }
1289
1290 pub fn is_set_op(&self) -> bool {
1291 matches!(
1292 self,
1293 Token::Union
1294 | Token::Intersection
1295 | Token::SetMinus
1296 | Token::Subset
1297 | Token::SubsetEq
1298 | Token::Superset
1299 | Token::SupersetEq
1300 )
1301 }
1302
1303 pub fn is_logic_op(&self) -> bool {
1304 matches!(
1305 self,
1306 Token::LogicAnd
1307 | Token::LogicOr
1308 | Token::LogicNot
1309 | Token::LogicXor
1310 | Token::Top
1311 | Token::Bottom
1312 )
1313 }
1314
1315 pub fn is_evidentiality(&self) -> bool {
1316 matches!(
1317 self,
1318 Token::Bang | Token::Question | Token::Tilde | Token::Interrobang | Token::Lozenge
1319 )
1320 }
1321
1322 pub fn is_legion_morpheme(&self) -> bool {
1323 matches!(
1324 self,
1325 Token::LegionField | Token::DirectSum | Token::Interfere | Token::ConfidenceHigh | Token::Distribute | Token::Gather | Token::Broadcast | Token::Consensus | Token::Partial )
1335 }
1336
1337 pub fn is_legion_assign(&self) -> bool {
1338 matches!(
1339 self,
1340 Token::DirectSumEq | Token::PartialEq_ | Token::InterfereEq
1341 )
1342 }
1343
1344 pub fn is_affective(&self) -> bool {
1345 matches!(
1346 self,
1347 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral | Token::IronyMark | Token::IntensityUp | Token::IntensityDown | Token::IntensityMax | Token::FormalRegister | Token::InformalRegister | Token::EmotionJoy | Token::EmotionSadness | Token::EmotionAnger | Token::EmotionFear | Token::EmotionSurprise | Token::EmotionLove | Token::ConfidenceHigh | Token::ConfidenceMedium | Token::ConfidenceLow )
1372 }
1373
1374 pub fn is_sentiment(&self) -> bool {
1375 matches!(
1376 self,
1377 Token::DirectSum | Token::AffectNegative | Token::AffectNeutral
1378 )
1379 }
1380
1381 pub fn is_emotion(&self) -> bool {
1382 matches!(
1383 self,
1384 Token::EmotionJoy
1385 | Token::EmotionSadness
1386 | Token::EmotionAnger
1387 | Token::EmotionFear
1388 | Token::EmotionSurprise
1389 | Token::EmotionLove
1390 )
1391 }
1392
1393 pub fn is_intensity(&self) -> bool {
1394 matches!(
1395 self,
1396 Token::IntensityUp | Token::IntensityDown | Token::IntensityMax
1397 )
1398 }
1399
1400 pub fn is_doc_comment(&self) -> bool {
1402 matches!(
1403 self,
1404 Token::DocCommentVerified(_)
1405 | Token::DocCommentVerifiedInner(_)
1406 | Token::DocCommentReported(_)
1407 | Token::DocCommentReportedInner(_)
1408 | Token::DocCommentUncertain(_)
1409 | Token::DocCommentUncertainInner(_)
1410 | Token::DocCommentPredicted(_)
1411 | Token::DocCommentPredictedInner(_)
1412 | Token::DocCommentParadox(_)
1413 | Token::DocCommentParadoxInner(_)
1414 | Token::DocComment(_) )
1416 }
1417
1418 pub fn is_inner_doc_comment(&self) -> bool {
1420 matches!(
1421 self,
1422 Token::DocCommentVerifiedInner(_)
1423 | Token::DocCommentReportedInner(_)
1424 | Token::DocCommentUncertainInner(_)
1425 | Token::DocCommentPredictedInner(_)
1426 | Token::DocCommentParadoxInner(_)
1427 )
1428 }
1429
1430 pub fn doc_comment_evidentiality(&self) -> Option<char> {
1433 match self {
1434 Token::DocCommentVerified(_) | Token::DocCommentVerifiedInner(_) => Some('!'),
1435 Token::DocCommentReported(_) | Token::DocCommentReportedInner(_) => Some('~'),
1436 Token::DocCommentUncertain(_) | Token::DocCommentUncertainInner(_) => Some('?'),
1437 Token::DocCommentPredicted(_) | Token::DocCommentPredictedInner(_) => Some('◊'),
1438 Token::DocCommentParadox(_) | Token::DocCommentParadoxInner(_) => Some('‽'),
1439 Token::DocComment(_) => Some('~'), _ => None,
1441 }
1442 }
1443
1444 pub fn doc_comment_content(&self) -> Option<&str> {
1446 match self {
1447 Token::DocCommentVerified(s)
1448 | Token::DocCommentVerifiedInner(s)
1449 | Token::DocCommentReported(s)
1450 | Token::DocCommentReportedInner(s)
1451 | Token::DocCommentUncertain(s)
1452 | Token::DocCommentUncertainInner(s)
1453 | Token::DocCommentPredicted(s)
1454 | Token::DocCommentPredictedInner(s)
1455 | Token::DocCommentParadox(s)
1456 | Token::DocCommentParadoxInner(s)
1457 | Token::DocComment(s) => Some(s.as_str()),
1458 _ => None,
1459 }
1460 }
1461}
1462
1463pub struct Lexer<'a> {
1465 inner: logos::Lexer<'a, Token>,
1466 buffer: Vec<Option<(Token, Span)>>,
1468}
1469
1470impl<'a> Lexer<'a> {
1471 pub fn new(source: &'a str) -> Self {
1472 Self {
1473 inner: Token::lexer(source),
1474 buffer: Vec::new(),
1475 }
1476 }
1477
1478 fn read_next(&mut self) -> Option<(Token, Span)> {
1480 match self.inner.next() {
1481 Some(Ok(token)) => {
1482 let span = self.inner.span();
1483 Some((token, Span::new(span.start, span.end)))
1484 }
1485 Some(Err(_)) => {
1486 self.read_next()
1488 }
1489 None => None,
1490 }
1491 }
1492
1493 pub fn next_token(&mut self) -> Option<(Token, Span)> {
1494 if !self.buffer.is_empty() {
1495 return self.buffer.remove(0);
1498 }
1499 self.read_next()
1500 }
1501
1502 pub fn peek(&mut self) -> Option<&(Token, Span)> {
1503 self.peek_n(0)
1504 }
1505
1506 pub fn peek_n(&mut self, n: usize) -> Option<&(Token, Span)> {
1508 while self.buffer.len() <= n {
1510 let token = self.read_next();
1511 self.buffer.push(token);
1512 }
1513 self.buffer.get(n).and_then(|opt| opt.as_ref())
1514 }
1515
1516 pub fn span(&self) -> Span {
1517 let span = self.inner.span();
1518 Span::new(span.start, span.end)
1519 }
1520}
1521
1522#[cfg(test)]
1523mod tests {
1524 use super::*;
1525
1526 #[test]
1527 fn test_morphemes() {
1528 let mut lexer = Lexer::new("τ φ σ ρ Λ Π ⌛");
1534 assert!(matches!(lexer.next_token(), Some((Token::Tau, _)))); assert!(matches!(lexer.next_token(), Some((Token::Phi, _)))); assert!(matches!(lexer.next_token(), Some((Token::Sigma, _)))); assert!(matches!(lexer.next_token(), Some((Token::Rho, _)))); assert!(matches!(lexer.next_token(), Some((Token::Lambda, _)))); assert!(matches!(lexer.next_token(), Some((Token::Pi, _)))); assert!(matches!(lexer.next_token(), Some((Token::Async, _)))); }
1542
1543 #[test]
1544 fn test_evidentiality() {
1545 let mut lexer = Lexer::new("value! uncertain? reported~ paradox‽");
1546 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "value"));
1547 assert!(matches!(lexer.next_token(), Some((Token::Bang, _))));
1548 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "uncertain"));
1549 assert!(matches!(lexer.next_token(), Some((Token::Question, _))));
1550 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "reported"));
1551 assert!(matches!(lexer.next_token(), Some((Token::Tilde, _))));
1552 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "paradox"));
1553 assert!(matches!(lexer.next_token(), Some((Token::Interrobang, _))));
1554 }
1555
1556 #[test]
1557 fn test_pipe_chain() {
1558 let mut lexer = Lexer::new("data|τ{f}|φ{p}|σ");
1559 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "data"));
1560 assert!(matches!(lexer.next_token(), Some((Token::Pipe, _))));
1561 assert!(matches!(lexer.next_token(), Some((Token::Tau, _))));
1562 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1563 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1564 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1565 }
1566
1567 #[test]
1568 fn test_numbers() {
1569 let mut lexer = Lexer::new("42 0b1010 0o52 0x2A 0v22 0s42 3.14");
1570 assert!(matches!(lexer.next_token(), Some((Token::IntLit(s), _)) if s == "42"));
1571 assert!(matches!(lexer.next_token(), Some((Token::BinaryLit(s), _)) if s == "0b1010"));
1572 assert!(matches!(lexer.next_token(), Some((Token::OctalLit(s), _)) if s == "0o52"));
1573 assert!(matches!(lexer.next_token(), Some((Token::HexLit(s), _)) if s == "0x2A"));
1574 assert!(matches!(lexer.next_token(), Some((Token::VigesimalLit(s), _)) if s == "0v22"));
1575 assert!(matches!(lexer.next_token(), Some((Token::SexagesimalLit(s), _)) if s == "0s42"));
1576 assert!(matches!(lexer.next_token(), Some((Token::FloatLit(s), _)) if s == "3.14"));
1577 }
1578
1579 #[test]
1580 fn test_incorporation() {
1581 let mut lexer = Lexer::new("file·open·read");
1582 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "file"));
1583 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1584 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "open"));
1585 assert!(matches!(lexer.next_token(), Some((Token::MiddleDot, _))));
1586 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "read"));
1587 }
1588
1589 #[test]
1590 fn test_special_symbols() {
1591 let mut lexer = Lexer::new("∅ ◯ ∞");
1592 assert!(matches!(lexer.next_token(), Some((Token::Empty, _))));
1593 assert!(matches!(lexer.next_token(), Some((Token::Circle, _))));
1594 assert!(matches!(lexer.next_token(), Some((Token::Infinity, _))));
1595 }
1596
1597 #[test]
1598 fn test_quantifiers() {
1599 let mut lexer = Lexer::new("∀x ∃y x∈S y∉T");
1600 assert!(matches!(lexer.next_token(), Some((Token::ForAll, _))));
1601 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1602 assert!(matches!(lexer.next_token(), Some((Token::Exists, _))));
1603 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "y"));
1604 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "x"));
1605 assert!(matches!(lexer.next_token(), Some((Token::ElementOf, _))));
1606 }
1607
1608 #[test]
1609 fn test_set_operations() {
1610 let mut lexer = Lexer::new("A∪B A∩B A∖B A⊂B A⊆B");
1611 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1612 assert!(matches!(lexer.next_token(), Some((Token::Union, _))));
1613 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "B"));
1614 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "A"));
1615 assert!(matches!(lexer.next_token(), Some((Token::Intersection, _))));
1616 }
1617
1618 #[test]
1619 fn test_logic_operators() {
1620 let mut lexer = Lexer::new("p∧q p∨q ¬p p⊻q ⊤ ⊥");
1621 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1622 assert!(matches!(lexer.next_token(), Some((Token::LogicAnd, _))));
1623 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "q"));
1624 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "p"));
1625 assert!(matches!(lexer.next_token(), Some((Token::LogicOr, _))));
1626 }
1627
1628 #[test]
1629 fn test_analysis_operators() {
1630 let mut lexer = Lexer::new("∫f ∂g √x ∛y f∘g");
1631 assert!(matches!(lexer.next_token(), Some((Token::Integral, _))));
1632 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "f"));
1633 assert!(matches!(lexer.next_token(), Some((Token::Partial, _))));
1634 assert!(matches!(lexer.next_token(), Some((Token::Ident(s), _)) if s == "g"));
1635 assert!(matches!(lexer.next_token(), Some((Token::Sqrt, _))));
1636 }
1637
1638 #[test]
1639 fn test_additional_morphemes() {
1640 let mut lexer = Lexer::new("δ ε ω α ζ");
1641 assert!(matches!(lexer.next_token(), Some((Token::Delta, _))));
1642 assert!(matches!(lexer.next_token(), Some((Token::Epsilon, _))));
1643 assert!(matches!(lexer.next_token(), Some((Token::Omega, _))));
1644 assert!(matches!(lexer.next_token(), Some((Token::Alpha, _))));
1645 assert!(matches!(lexer.next_token(), Some((Token::Zeta, _))));
1646 }
1647
1648 #[test]
1649 fn test_ffi_keywords() {
1650 let mut lexer = Lexer::new("extern unsafe");
1651 assert!(matches!(lexer.next_token(), Some((Token::Extern, _))));
1652 assert!(matches!(lexer.next_token(), Some((Token::Unsafe, _))));
1653 }
1654
1655 #[test]
1656 fn test_parallel_morphemes() {
1657 let mut lexer = Lexer::new("∥ parallel ⊛ gpu");
1660 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1661 assert!(matches!(lexer.next_token(), Some((Token::Parallel, _))));
1662 assert!(matches!(lexer.next_token(), Some((Token::Convolve, _)))); assert!(matches!(lexer.next_token(), Some((Token::Gpu, _)))); }
1665
1666 #[test]
1667 fn test_lifetime_labels() {
1668 let mut lexer = Lexer::new("'outer: loop { break 'outer }");
1670 assert!(matches!(lexer.next_token(), Some((Token::Lifetime(s), _)) if s == "outer"));
1671 assert!(matches!(lexer.next_token(), Some((Token::Colon, _))));
1672 assert!(matches!(lexer.next_token(), Some((Token::Loop, _))));
1673 assert!(matches!(lexer.next_token(), Some((Token::LBrace, _))));
1674 assert!(matches!(lexer.next_token(), Some((Token::DeprecatedRustKeyword(s), _)) if s == "break"));
1675 assert!(matches!(lexer.next_token(), Some((Token::Lifetime(s), _)) if s == "outer"));
1676 assert!(matches!(lexer.next_token(), Some((Token::RBrace, _))));
1677 }
1678
1679 #[test]
1682 fn test_string_escape_sequences() {
1683 let mut lexer = Lexer::new(r#""hello\nworld""#);
1685 match lexer.next_token() {
1686 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\nworld"),
1687 other => panic!("Expected StringLit, got {:?}", other),
1688 }
1689
1690 let mut lexer = Lexer::new(r#""hello\tworld""#);
1692 match lexer.next_token() {
1693 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\tworld"),
1694 other => panic!("Expected StringLit, got {:?}", other),
1695 }
1696
1697 let mut lexer = Lexer::new(r#""hello\rworld""#);
1699 match lexer.next_token() {
1700 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\rworld"),
1701 other => panic!("Expected StringLit, got {:?}", other),
1702 }
1703
1704 let mut lexer = Lexer::new(r#""hello\\world""#);
1706 match lexer.next_token() {
1707 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\\world"),
1708 other => panic!("Expected StringLit, got {:?}", other),
1709 }
1710
1711 let mut lexer = Lexer::new(r#""hello\"world""#);
1713 match lexer.next_token() {
1714 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\"world"),
1715 other => panic!("Expected StringLit, got {:?}", other),
1716 }
1717
1718 let mut lexer = Lexer::new(r#""hello\0world""#);
1720 match lexer.next_token() {
1721 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello\0world"),
1722 other => panic!("Expected StringLit, got {:?}", other),
1723 }
1724 }
1725
1726 #[test]
1727 fn test_string_hex_escape() {
1728 let mut lexer = Lexer::new(r#""hello\x41world""#);
1730 match lexer.next_token() {
1731 Some((Token::StringLit(s), _)) => assert_eq!(s, "helloAworld"),
1732 other => panic!("Expected StringLit, got {:?}", other),
1733 }
1734 }
1735
1736 #[test]
1737 fn test_string_unicode_escape() {
1738 let mut lexer = Lexer::new(r#""hello\u{1F600}world""#);
1740 match lexer.next_token() {
1741 Some((Token::StringLit(s), _)) => assert_eq!(s, "hello😀world"),
1742 other => panic!("Expected StringLit, got {:?}", other),
1743 }
1744
1745 let mut lexer = Lexer::new(r#""\u{03C4}""#);
1747 match lexer.next_token() {
1748 Some((Token::StringLit(s), _)) => assert_eq!(s, "τ"),
1749 other => panic!("Expected StringLit, got {:?}", other),
1750 }
1751 }
1752
1753 #[test]
1754 fn test_char_escape_sequences() {
1755 let mut lexer = Lexer::new(r"'\n'");
1756 match lexer.next_token() {
1757 Some((Token::CharLit(c), _)) => assert_eq!(c, '\n'),
1758 other => panic!("Expected CharLit, got {:?}", other),
1759 }
1760
1761 let mut lexer = Lexer::new(r"'\t'");
1762 match lexer.next_token() {
1763 Some((Token::CharLit(c), _)) => assert_eq!(c, '\t'),
1764 other => panic!("Expected CharLit, got {:?}", other),
1765 }
1766
1767 let mut lexer = Lexer::new(r"'\\'");
1768 match lexer.next_token() {
1769 Some((Token::CharLit(c), _)) => assert_eq!(c, '\\'),
1770 other => panic!("Expected CharLit, got {:?}", other),
1771 }
1772 }
1773
1774 #[test]
1775 fn test_raw_string() {
1776 let mut lexer = Lexer::new(r#"r"hello\nworld""#);
1778 match lexer.next_token() {
1779 Some((Token::RawStringLit(s), _)) => assert_eq!(s, r"hello\nworld"),
1780 other => panic!("Expected RawStringLit, got {:?}", other),
1781 }
1782 }
1783
1784 #[test]
1785 fn test_raw_string_delimited() {
1786 let mut lexer = Lexer::new(r##"r#"hello "world""#"##);
1788 match lexer.next_token() {
1789 Some((Token::RawStringDelimited(s), _)) => assert_eq!(s, r#"hello "world""#),
1790 other => panic!("Expected RawStringDelimited, got {:?}", other),
1791 }
1792 }
1793
1794 #[test]
1795 fn test_byte_string() {
1796 let mut lexer = Lexer::new(r#"b"hello""#);
1797 match lexer.next_token() {
1798 Some((Token::ByteStringLit(bytes), _)) => {
1799 assert_eq!(bytes, vec![104, 101, 108, 108, 111]); }
1801 other => panic!("Expected ByteStringLit, got {:?}", other),
1802 }
1803 }
1804
1805 #[test]
1806 fn test_interpolated_string() {
1807 let mut lexer = Lexer::new(r#"f"hello {name}""#);
1808 match lexer.next_token() {
1809 Some((Token::InterpolatedStringLit(s), _)) => assert_eq!(s, "hello {name}"),
1810 other => panic!("Expected InterpolatedStringLit, got {:?}", other),
1811 }
1812 }
1813
1814 #[test]
1815 fn test_sigil_string_sql() {
1816 let mut lexer = Lexer::new(r#"σ"SELECT * FROM {table}""#);
1817 match lexer.next_token() {
1818 Some((Token::SigilStringSql(s), _)) => assert_eq!(s, "SELECT * FROM {table}"),
1819 other => panic!("Expected SigilStringSql, got {:?}", other),
1820 }
1821 }
1822
1823 #[test]
1824 fn test_sigil_string_route() {
1825 let mut lexer = Lexer::new(r#"ρ"/api/v1/{resource}/{id}""#);
1826 match lexer.next_token() {
1827 Some((Token::SigilStringRoute(s), _)) => assert_eq!(s, "/api/v1/{resource}/{id}"),
1828 other => panic!("Expected SigilStringRoute, got {:?}", other),
1829 }
1830 }
1831
1832 #[test]
1833 fn test_unicode_in_strings() {
1834 let mut lexer = Lexer::new(r#""τφσρ 你好 🦀""#);
1836 match lexer.next_token() {
1837 Some((Token::StringLit(s), _)) => assert_eq!(s, "τφσρ 你好 🦀"),
1838 other => panic!("Expected StringLit, got {:?}", other),
1839 }
1840 }
1841
1842 #[test]
1843 fn test_empty_string() {
1844 let mut lexer = Lexer::new(r#""""#);
1845 match lexer.next_token() {
1846 Some((Token::StringLit(s), _)) => assert_eq!(s, ""),
1847 other => panic!("Expected empty StringLit, got {:?}", other),
1848 }
1849 }
1850
1851 #[test]
1852 fn test_escape_sequence_helper() {
1853 assert_eq!(process_escape_sequences(r"hello\nworld"), "hello\nworld");
1855 assert_eq!(process_escape_sequences(r"hello\tworld"), "hello\tworld");
1856 assert_eq!(process_escape_sequences(r"hello\\world"), "hello\\world");
1857 assert_eq!(process_escape_sequences(r#"hello\"world"#), "hello\"world");
1858 assert_eq!(process_escape_sequences(r"hello\x41world"), "helloAworld");
1859 assert_eq!(
1860 process_escape_sequences(r"hello\u{1F600}world"),
1861 "hello😀world"
1862 );
1863 }
1864}