1use crate::{
33 ast::Number,
34 error::LexicalError,
35 utils::{parse_number_base, parse_number_sci},
36};
37
38use logos::Logos;
39use std::ops::Range;
40
41fn symbolic_string_prefix_and_length<'input>(
42 lex: &mut logos::Lexer<'input, NormalToken<'input>>,
43) -> SymbolicStringStart<'input> {
44 let slice = lex.slice();
45 let (prefix, postfix) = slice
46 .rsplit_once('-')
47 .expect("The logos regexp ensures this succeeds");
48 SymbolicStringStart {
49 prefix,
50 length: postfix.len(),
51 }
52}
53
54#[derive(Logos, Debug, PartialEq, Clone)]
59#[logos(skip "((\r\n)+|[ \t\n]+)")]
60pub enum NormalToken<'input> {
61 #[regex("'m(%)+\"")]
64 #[regex("\r[^\n]")]
66 Error,
67
68 #[regex("_*[a-zA-Z][_a-zA-Z0-9-']*")]
72 Identifier(&'input str),
73 #[regex("[0-9]*\\.?[0-9]+([eE][+\\-]?[0-9]+)?", |lex| parse_number_sci(lex.slice()).ok())]
74 DecNumLiteral(Number),
75 #[regex("0x[A-Fa-f0-9]+", |lex| parse_number_base(16, &lex.slice()[2..]).ok())]
76 HexNumLiteral(Number),
77 #[regex("0o[0-7]+", |lex| parse_number_base(8, &lex.slice()[2..]).ok())]
78 OctNumLiteral(Number),
79 #[regex("0b[01]+", |lex| parse_number_base(2, &lex.slice()[2..]).ok())]
80 BinNumLiteral(Number),
81
82 #[regex("'_*[a-zA-Z][_a-zA-Z0-9-']*", |lex| lex.slice().split_at(1).1)]
85 RawEnumTag(&'input str),
86 #[token("'\"")]
87 StrEnumTagBegin,
88
89 #[token("Dyn")]
90 Dyn,
91 #[token("Number")]
92 Number,
93 #[token("Bool")]
94 Bool,
95 #[token("String")]
96 String,
97 #[token("Array")]
98 Array,
99
100 #[token("if")]
101 If,
102 #[token("then")]
103 Then,
104 #[token("else")]
105 Else,
106 #[token("forall")]
107 Forall,
108 #[token("in")]
109 In,
110 #[token("let")]
111 Let,
112 #[token("rec")]
113 Rec,
114 #[token("match")]
115 Match,
116
117 #[token("null")]
118 Null,
119 #[token("true")]
120 True,
121 #[token("false")]
122 False,
123 #[token("or")]
126 Or,
127 #[token("as")]
130 As,
131 #[token("include")]
134 Include,
135
136 #[token("?")]
137 QuestionMark,
138 #[token(",")]
139 Comma,
140 #[token(";")]
141 Semicolon,
142 #[token(":")]
143 Colon,
144 #[token("$")]
145 Dollar,
146 #[token("=")]
147 Equals,
148 #[token("!=")]
149 NotEquals,
150 #[token("&")]
151 Ampersand,
152 #[token(".")]
153 Dot,
154 #[token("\"")]
155 DoubleQuote,
156
157 #[token("+")]
158 Plus,
159 #[token("-")]
160 Minus,
161 #[token("*")]
162 Times,
163 #[token("/")]
164 Div,
165 #[token("%")]
166 Percent,
167 #[token("++")]
168 DoublePlus,
169 #[token("==")]
170 DoubleEq,
171 #[token("@")]
172 At,
173 #[token("&&")]
174 DoubleAnd,
175 #[token("||")]
176 DoublePipe,
177 #[token("!")]
178 Bang,
179 #[token("..")]
180 Ellipsis,
181
182 #[token("fun")]
183 Fun,
184 #[token("import")]
185 Import,
186 #[token("|")]
187 Pipe,
188 #[token("|>")]
189 RightPipe,
190 #[token("->")]
191 SimpleArrow,
192 #[token("=>")]
193 DoubleArrow,
194 #[token("_")]
195 Underscore,
196 #[regex("m(%+)\"", |lex| lex.slice().len())]
197 MultiStringStart(usize),
198 #[regex("[a-zA-Z][_a-zA-Z0-9-']*-s(%+)\"", symbolic_string_prefix_and_length)]
199 SymbolicStringStart(SymbolicStringStart<'input>),
200
201 #[token("%typeof%")]
202 Typeof,
203 #[token("%cast%")]
204 Cast,
205
206 #[token("%contract/apply%")]
207 ContractApply,
208 #[token("%contract/check%")]
209 ContractCheck,
210 #[token("%contract/array_lazy_apply%")]
211 ContractArrayLazyApp,
212 #[token("%contract/record_lazy_apply%")]
213 ContractRecordLazyApp,
214 #[token("%contract/custom%")]
215 ContractCustom,
216 #[token("%blame%")]
217 Blame,
218 #[token("%label/flip_polarity%")]
219 LabelFlipPol,
220 #[token("%label/polarity%")]
221 LabelPol,
222 #[token("%label/go_dom%")]
223 LabelGoDom,
224 #[token("%label/go_codom%")]
225 LabelGoCodom,
226 #[token("%label/go_field%")]
227 LabelGoField,
228 #[token("%label/go_array%")]
229 LabelGoArray,
230 #[token("%label/go_dict%")]
231 LabelGoDict,
232 #[token("%label/insert_type_variable%")]
233 LabelInsertTypeVar,
234 #[token("%label/lookup_type_variable%")]
235 LabelLookupTypeVar,
236
237 #[token("%seal%")]
238 Seal,
239 #[token("%unseal%")]
240 Unseal,
241 #[token("%enum/embed%")]
242 EnumEmbed,
243 #[token("%record/map%")]
244 RecordMap,
245 #[token("%record/insert%")]
246 RecordInsert,
247 #[token("%record/insert_with_opts%")]
248 RecordInsertWithOpts,
249 #[token("%record/remove%")]
250 RecordRemove,
251 #[token("%record/remove_with_opts%")]
252 RecordRemoveWithOpts,
253 #[token("%record/empty_with_tail%")]
254 RecordEmptyWithTail,
255 #[token("%record/seal_tail%")]
256 RecordSealTail,
257 #[token("%record/unseal_tail%")]
258 RecordUnsealTail,
259 #[token("%seq%")]
260 Seq,
261 #[token("%deep_seq%")]
262 DeepSeq,
263 #[token("%force%")]
264 OpForce,
265 #[token("%array/length%")]
266 ArrayLength,
267 #[token("%record/fields%")]
268 RecordFields,
269 #[token("%record/fields_with_opts%")]
270 RecordFieldsWithOpts,
271 #[token("%record/values%")]
272 RecordValues,
273
274 #[token("%number/arccos%")]
275 NumberArcCos,
276 #[token("%number/arcsin%")]
277 NumberArcSin,
278 #[token("%number/arctan%")]
279 NumberArcTan,
280 #[token("%number/arctan2%")]
281 NumberArcTan2,
282 #[token("%number/cos%")]
283 NumberCos,
284 #[token("%number/sin%")]
285 NumberSin,
286 #[token("%number/tan%")]
287 NumberTan,
288 #[token("%number/log%")]
289 NumberLog,
290 #[token("%pow%")]
291 Pow,
292 #[token("%trace%")]
293 Trace,
294
295 #[token("%record/has_field%")]
296 RecordHasField,
297 #[token("%record/has_field_with_opts%")]
298 RecordHasFieldWithOpts,
299 #[token("%array/map%")]
300 ArrayMap,
301 #[token("%array/at%")]
302 ArrayAt,
303 #[token("%array/generate%")]
304 ArrayGen,
305 #[token("%rec_force%")]
306 OpRecForce,
307 #[token("%rec_default%")]
308 OpRecDefault,
309 #[token("%record/field_is_defined%")]
310 RecordFieldIsDefined,
311 #[token("%record/field_is_defined_with_opts%")]
312 RecordFieldIsDefinedWithOpts,
313 #[token("%record/split_pair%")]
314 RecordSplitPair,
315 #[token("%record/disjoint_merge%")]
316 RecordDisjointMerge,
317 #[token("%record/merge_contract%")]
318 RecordMergeContract,
319 #[token("%record/freeze%")]
320 RecordFreeze,
321
322 #[token("default")]
323 Default,
324 #[token("doc")]
325 Doc,
326 #[token("optional")]
327 Optional,
328 #[token("priority")]
329 Priority,
330 #[token("force")]
331 Force,
332 #[token("not_exported")]
333 NotExported,
334
335 #[token("%hash%")]
336 OpHash,
337 #[token("%serialize%")]
338 Serialize,
339 #[token("%deserialize%")]
340 Deserialize,
341 #[token("%string/split%")]
342 StringSplit,
343 #[token("%string/trim%")]
344 StringTrim,
345 #[token("%string/chars%")]
346 StringChars,
347 #[token("%string/uppercase%")]
348 StringUppercase,
349 #[token("%string/lowercase%")]
350 StringLowercase,
351 #[token("%string/contains%")]
352 StringContains,
353 #[token("%string/compare%")]
354 StringCompare,
355 #[token("%string/replace%")]
356 StringReplace,
357 #[token("%string/replace_regex%")]
358 StringReplaceRegex,
359 #[token("%string/is_match%")]
360 StringIsMatch,
361 #[token("%string/find%")]
362 StringFind,
363 #[token("%string/find_all%")]
364 StringFindAll,
365 #[token("%string/length%")]
366 StringLength,
367 #[token("%string/substr%")]
368 StringSubstr,
369 #[token("%string/base64_encode%")]
370 StringBase64Encode,
371 #[token("%string/base64_decode%")]
372 StringBase64Decode,
373 #[token("%to_string%")]
374 ToString,
375 #[token("%number/from_string%")]
376 NumberFromString,
377 #[token("%enum/from_string%")]
378 EnumFromString,
379 #[token("%enum/get_arg%")]
380 EnumGetArg,
381 #[token("%enum/make_variant%")]
382 EnumMakeVariant,
383 #[token("%enum/is_variant%")]
384 EnumIsVariant,
385 #[token("%enum/get_tag%")]
386 EnumGetTag,
387
388 #[token("%label/with_message%")]
389 LabelWithMessage,
390 #[token("%label/with_notes%")]
391 LabelWithNotes,
392 #[token("%label/append_note%")]
393 LabelAppendNote,
394 #[token("%label/push_diag%")]
395 LabelPushDiag,
396 #[token("%array/slice%")]
397 ArraySlice,
398 #[token("%eval_nix%")]
399 EvalNix,
400
401 #[token("{")]
402 LBrace,
403 #[token("}")]
404 RBrace,
405 #[token("[")]
406 LBracket,
407 #[token("]")]
408 RBracket,
409 #[token("(")]
410 LParen,
411 #[token(")")]
412 RParen,
413 #[token("<")]
414 LAngleBracket,
415 #[token("<=")]
416 LessOrEq,
417 #[token(">")]
418 RAngleBracket,
419 #[token(">=")]
420 GreaterOrEq,
421 #[token("[|")]
422 EnumOpen,
423 #[token("|]")]
424 EnumClose,
425 #[regex("#[^\n]*", allow_greedy = true)]
426 LineComment,
427}
428
429pub const KEYWORDS: &[&str] = &[
430 "Dyn",
431 "Number",
432 "Bool",
433 "String",
434 "Array",
435 "if",
436 "then",
437 "else",
438 "forall",
439 "in",
440 "let",
441 "rec",
442 "match",
443 "null",
444 "true",
445 "false",
446 "fun",
447 "import",
448 "merge",
449 "default",
450 "doc",
451 "optional",
452 "priority",
453 "force",
454 "not_exported",
455];
456
457#[derive(Debug, Clone, PartialEq)]
458pub struct SymbolicStringStart<'input> {
459 pub prefix: &'input str,
461 pub length: usize,
464}
465
466#[derive(Logos, Debug, PartialEq, Eq, Clone)]
468pub enum StringToken<'input> {
469 #[regex("\r[^\n]")]
471 Error,
472
473 #[regex("[^\"%\\\\]+", |lex| normalize_line_endings(lex.slice()))]
474 #[token("%", |lex| String::from(lex.slice()))]
476 Literal(String),
477
478 #[token("\"")]
479 DoubleQuote,
480 #[token("%{")]
481 Interpolation,
482 #[regex("\\\\.", |lex| lex.slice().chars().nth(1))]
483 EscapedChar(char),
484 #[regex("\\\\x[A-Fa-f0-9][A-Fa-f0-9]", |lex| &lex.slice()[2..4])]
486 EscapedAscii(&'input str),
487 #[regex("\\\\u\\{[A-Fa-f0-9]{1,6}\\}", |lex| {
488 let len = lex.slice().len();
489 &lex.slice()[3..(len - 1)]
490 })]
491 EscapedUnicode(&'input str),
492}
493
494#[derive(Logos, Debug, PartialEq, Eq, Clone)]
496pub enum MultiStringToken<'input> {
497 #[regex("\r[^\n]")]
499 Error,
500
501 #[regex("[^\"%]+", |lex| normalize_line_endings(lex.slice()))]
502 #[token("\"", |lex| String::from(lex.slice()))]
506 #[regex("%+", |lex| String::from(lex.slice()))]
507 Literal(String),
508
509 #[regex("\"%+")]
514 CandidateEnd(&'input str),
515
516 #[regex("%+\\{")]
518 CandidateInterpolation(&'input str),
519
520 #[regex("\"%+\\{")]
525 QuotesCandidateInterpolation(&'input str),
526
527 End,
530
531 Interpolation,
532}
533
534#[derive(Debug, PartialEq, Clone)]
536pub enum Token<'input> {
537 Normal(NormalToken<'input>),
538 Str(StringToken<'input>),
539 MultiStr(MultiStringToken<'input>),
540}
541
542pub type SpannedToken<'input> = (usize, Token<'input>, usize);
543type NormalLexer<'input> = logos::Lexer<'input, NormalToken<'input>>;
544type StringLexer<'input> = logos::Lexer<'input, StringToken<'input>>;
545type MultiStringLexer<'input> = logos::Lexer<'input, MultiStringToken<'input>>;
546
547pub enum ModalLexer<'input> {
548 Normal {
549 mode_data: NormalData,
550 logos_lexer: NormalLexer<'input>,
551 },
552 String {
553 logos_lexer: StringLexer<'input>,
554 },
555 MultiString {
556 mode_data: MultiStrData,
557 buffer: Option<(MultiStringToken<'input>, Range<usize>)>,
563 logos_lexer: MultiStringLexer<'input>,
564 },
565}
566
567impl<'input> Iterator for ModalLexer<'input> {
569 type Item = Result<Token<'input>, ()>;
570
571 fn next(&mut self) -> Option<Self::Item> {
572 match self {
573 ModalLexer::Normal { logos_lexer, .. } => Some(logos_lexer.next()?.map(Token::Normal)),
574 ModalLexer::String { logos_lexer } => Some(logos_lexer.next()?.map(Token::Str)),
575 ModalLexer::MultiString { logos_lexer, .. } => {
576 Some(logos_lexer.next()?.map(Token::MultiStr))
577 }
578 }
579 }
580}
581
582#[derive(Clone, PartialEq, Eq, Debug)]
584pub struct MultiStrData {
585 percent_count: usize,
588 opening_delimiter: Range<usize>,
591}
592
593#[derive(Clone, PartialEq, Eq, Debug, Default)]
595pub struct NormalData {
596 brace_count: usize,
599}
600
601impl NormalData {
602 pub fn new() -> Self {
603 Default::default()
604 }
605}
606
607#[derive(Clone, PartialEq, Eq, Debug)]
611pub enum Mode {
612 String,
614 MultiString(MultiStrData),
616 Normal(NormalData),
618}
619
620pub struct Lexer<'input> {
621 pub lexer: Option<ModalLexer<'input>>,
629 pub modes: Vec<Mode>,
633}
634
635impl<'input> Lexer<'input> {
636 pub fn new(s: &'input str) -> Self {
637 Lexer {
638 lexer: Some(ModalLexer::Normal {
639 mode_data: NormalData { brace_count: 0 },
640 logos_lexer: NormalToken::lexer(s),
641 }),
642 modes: Vec::new(),
643 }
644 }
645
646 fn enter_strlike<F>(&mut self, morph: F)
647 where
648 F: FnOnce(NormalLexer<'input>) -> ModalLexer<'input>,
649 {
650 match self.lexer.take() {
651 Some(ModalLexer::Normal {
654 mode_data,
655 logos_lexer,
656 }) => {
657 self.modes.push(Mode::Normal(mode_data));
658 self.lexer = Some(morph(logos_lexer));
659 }
660 _ => panic!("lexer::enter_strlike"),
661 }
662 }
663
664 fn enter_str(&mut self) {
665 self.enter_strlike(|lexer| ModalLexer::String {
666 logos_lexer: lexer.morph(),
667 });
668 }
669
670 fn enter_indstr(&mut self, percent_count: usize, opening_delimiter: Range<usize>) {
671 self.enter_strlike(|lexer| ModalLexer::MultiString {
672 mode_data: MultiStrData {
673 percent_count,
674 opening_delimiter,
675 },
676 buffer: None,
677 logos_lexer: lexer.morph(),
678 });
679 }
680
681 fn enter_normal(&mut self) {
682 match self.lexer.take() {
683 Some(ModalLexer::String { logos_lexer }) => {
684 self.lexer = Some(ModalLexer::Normal {
685 mode_data: NormalData::new(),
686 logos_lexer: logos_lexer.morph(),
687 });
688
689 self.modes.push(Mode::String);
690 }
691 Some(ModalLexer::MultiString {
692 mode_data,
693 logos_lexer,
694 buffer: _,
695 }) => {
696 self.lexer = Some(ModalLexer::Normal {
697 mode_data: NormalData::new(),
698 logos_lexer: logos_lexer.morph(),
699 });
700
701 self.modes.push(Mode::MultiString(mode_data));
702 }
703 _ => panic!("lexer::enter_normal"),
704 }
705 }
706
707 fn leave_str(&mut self) {
708 match self.lexer.take() {
709 Some(ModalLexer::String { logos_lexer }) => {
710 let Some(Mode::Normal(mode_data)) = self.modes.pop() else {
712 panic!("lexer::leave_str (popped wrong mode)");
713 };
714
715 self.lexer = Some(ModalLexer::Normal {
716 mode_data,
717 logos_lexer: logos_lexer.morph(),
718 });
719 }
720 _ => panic!("lexer::leave_str"),
721 }
722 }
723
724 fn leave_indstr(&mut self) {
725 match self.lexer.take() {
726 Some(ModalLexer::MultiString { logos_lexer, .. }) => {
727 let Some(Mode::Normal(data)) = self.modes.pop() else {
729 panic!("lexer::leave_str (popped wrong mode)");
730 };
731
732 self.lexer = Some(ModalLexer::Normal {
733 mode_data: data,
734 logos_lexer: logos_lexer.morph(),
735 });
736 }
737 _ => panic!("lexer::leave_str"),
738 }
739 }
740
741 fn leave_normal(&mut self) {
742 match self.lexer.take() {
743 Some(ModalLexer::Normal { logos_lexer, .. }) => {
744 match self.modes.pop() {
745 Some(Mode::String) => {
746 self.lexer = Some(ModalLexer::String {
747 logos_lexer: logos_lexer.morph(),
748 })
749 }
750 Some(Mode::MultiString(data)) => {
751 self.lexer = Some(ModalLexer::MultiString {
752 mode_data: data,
753 buffer: None,
754 logos_lexer: logos_lexer.morph(),
755 })
756 }
757 mode => panic!("lexer::leave_normal (popped mode {mode:?})"),
758 };
759 }
760 _ => panic!("lexer::leave_normal"),
761 }
762 }
763
764 fn split_candidate_interp(
772 &mut self,
773 s: &'input str,
774 span: Range<usize>,
775 percent_count: usize,
776 ) -> (Token<'input>, Range<usize>) {
777 let split_at = s.len() - percent_count;
778 let next_token = MultiStringToken::Interpolation;
779 let next_span = Range {
780 start: span.start + split_at,
781 end: span.end,
782 };
783 self.bufferize(next_token, next_span);
784
785 let token = Token::MultiStr(MultiStringToken::Literal(s[0..split_at].to_owned()));
786 let span = Range {
787 start: span.start,
788 end: span.start + split_at,
789 };
790
791 (token, span)
792 }
793
794 fn handle_normal_token(
796 &mut self,
797 span: Range<usize>,
798 token: NormalToken<'input>,
799 ) -> Option<Result<SpannedToken<'input>, LexicalError>> {
800 match token {
801 NormalToken::DoubleQuote | NormalToken::StrEnumTagBegin => self.enter_str(),
802 NormalToken::MultiStringStart(delim_size)
803 | NormalToken::SymbolicStringStart(SymbolicStringStart {
804 length: delim_size, ..
805 }) => {
806 let size_without_kind_marker = delim_size - 1;
810 self.enter_indstr(size_without_kind_marker, span.clone())
811 }
812 NormalToken::LBrace => {
813 self.normal_mode_data_mut().brace_count += 1;
814 }
815 NormalToken::RBrace => {
816 let data = self.normal_mode_data_mut();
817 if data.brace_count == 0 {
818 if self.modes.is_empty() {
819 return Some(Err(LexicalError::UnmatchedCloseBrace(span.start)));
820 }
821
822 self.leave_normal();
823 } else {
824 data.brace_count -= 1;
825 }
826 }
827 NormalToken::LineComment => return self.next(),
829 NormalToken::Error => {
830 return Some(Err(LexicalError::Generic(span)));
831 }
832 _ => (),
833 };
834
835 Some(Ok((span.start, Token::Normal(token), span.end)))
836 }
837
838 fn handle_string_token(
841 &mut self,
842 span: Range<usize>,
843 token: StringToken<'input>,
844 ) -> Option<Result<SpannedToken<'input>, LexicalError>> {
845 let result = match token {
846 StringToken::DoubleQuote => {
847 self.leave_str();
848 Token::Normal(NormalToken::DoubleQuote)
851 }
852 tok @ StringToken::Interpolation => {
853 self.enter_normal();
854 Token::Str(tok)
855 }
856 StringToken::EscapedChar(c) => {
858 if let Some(esc) = escape_char(c) {
859 Token::Str(StringToken::EscapedChar(esc))
860 } else {
861 return Some(Err(LexicalError::InvalidEscapeSequence(span.start + 1)));
862 }
863 }
864 StringToken::EscapedAscii(code) => {
865 if let Some(esc) = escape_ascii(code) {
866 Token::Str(StringToken::EscapedChar(esc))
867 } else {
868 return Some(Err(LexicalError::InvalidAsciiEscapeCode(span.start + 2)));
869 }
870 }
871 StringToken::EscapedUnicode(code) => {
872 if let Some(esc) = escape_unicode(code) {
873 Token::Str(StringToken::EscapedChar(esc))
874 } else {
875 let start = span.start + 3;
876 let end = start + code.len();
877 return Some(Err(LexicalError::InvalidUnicodeEscapeCode(start..end)));
878 }
879 }
880 StringToken::Error => {
881 return Some(Err(LexicalError::Generic(span)));
882 }
883 token => Token::Str(token),
884 };
885
886 Some(Ok((span.start, result, span.end)))
887 }
888
889 fn handle_multistr_token(
891 &mut self,
892 mut span: Range<usize>,
893 token: MultiStringToken<'input>,
894 ) -> Option<Result<SpannedToken<'input>, LexicalError>> {
895 let data = self.multistring_mode_data();
896
897 let result = match token {
898 MultiStringToken::CandidateInterpolation(s) if s.len() >= data.percent_count => {
906 if s.len() == data.percent_count {
907 self.enter_normal();
908 Token::MultiStr(MultiStringToken::Interpolation)
909 } else {
910 let (token_fst, span_fst) =
911 self.split_candidate_interp(s, span, data.percent_count);
912 span = span_fst;
913 token_fst
914 }
915 }
916 tok @ MultiStringToken::Interpolation => {
921 self.enter_normal();
922 Token::MultiStr(tok)
923 }
924 MultiStringToken::QuotesCandidateInterpolation(s) if s.len() > data.percent_count => {
936 let (token_fst, span_fst) =
937 self.split_candidate_interp(s, span, data.percent_count);
938 span = span_fst;
939 token_fst
940 }
941 MultiStringToken::CandidateInterpolation(s)
944 | MultiStringToken::QuotesCandidateInterpolation(s) => {
945 Token::MultiStr(MultiStringToken::Literal(s.to_owned()))
946 }
947 MultiStringToken::CandidateEnd(s) if s.len() > data.percent_count => {
956 return Some(Err(LexicalError::StringDelimiterMismatch {
957 opening_delimiter: data.opening_delimiter.clone(),
958 closing_delimiter: span,
959 }));
960 }
961 MultiStringToken::CandidateEnd(s) if s.len() == data.percent_count => {
964 self.leave_indstr();
965 Token::MultiStr(MultiStringToken::End)
966 }
967 MultiStringToken::CandidateEnd(s) => {
970 Token::MultiStr(MultiStringToken::Literal(s.to_owned()))
971 }
972 MultiStringToken::Error => {
974 return Some(Err(LexicalError::Generic(span)));
975 }
976 token => Token::MultiStr(token),
977 };
978
979 Some(Ok((span.start, result, span.end)))
980 }
981
982 fn normal_mode_data_mut(&mut self) -> &mut NormalData {
993 match self.lexer {
994 Some(ModalLexer::Normal {
995 ref mut mode_data, ..
996 }) => mode_data,
997 _ => panic!("lexer: normal_mode_data() called while not in normal mode"),
998 }
999 }
1000
1001 fn multistring_mode_data(&self) -> &MultiStrData {
1002 match self.lexer {
1003 Some(ModalLexer::MultiString { ref mode_data, .. }) => mode_data,
1004 _ => panic!("lexer: multistring_mode_data() called while not in multistring mode"),
1005 }
1006 }
1007
1008 fn bufferize(&mut self, token: MultiStringToken<'input>, span: Range<usize>) {
1010 match self.lexer {
1011 Some(ModalLexer::MultiString { ref mut buffer, .. }) => *buffer = Some((token, span)),
1012 _ => panic!("lexer: bufferize() called while not in normal mode"),
1013 }
1014 }
1015}
1016
1017impl<'input> Iterator for Lexer<'input> {
1018 type Item = Result<SpannedToken<'input>, LexicalError>;
1019
1020 fn next(&mut self) -> Option<Self::Item> {
1021 match self.lexer.as_mut().unwrap() {
1022 ModalLexer::Normal { logos_lexer, .. } => {
1023 let normal_token = logos_lexer.next()?.unwrap_or(NormalToken::Error);
1024 let span = logos_lexer.span();
1025 self.handle_normal_token(span, normal_token)
1026 }
1027 ModalLexer::String { logos_lexer } => {
1028 let string_token = logos_lexer.next()?.unwrap_or(StringToken::Error);
1029 let span = logos_lexer.span();
1030 self.handle_string_token(span, string_token)
1031 }
1032 ModalLexer::MultiString {
1033 buffer,
1034 logos_lexer,
1035 ..
1036 } => {
1037 let (multistr_token, span) = buffer.take().or_else(|| {
1038 Some((
1039 logos_lexer.next()?.unwrap_or(MultiStringToken::Error),
1040 logos_lexer.span(),
1041 ))
1042 })?;
1043
1044 self.handle_multistr_token(span, multistr_token)
1045 }
1046 }
1047 }
1048}
1049
1050pub struct OffsetLexer<'input> {
1053 lexer: Lexer<'input>,
1054 offset: usize,
1055}
1056
1057impl<'input> OffsetLexer<'input> {
1058 pub fn new(s: &'input str, offset: usize) -> Self {
1059 OffsetLexer {
1060 lexer: Lexer::new(s),
1061 offset,
1062 }
1063 }
1064}
1065
1066impl<'input> Iterator for OffsetLexer<'input> {
1067 type Item = Result<SpannedToken<'input>, LexicalError>;
1068
1069 fn next(&mut self) -> Option<Self::Item> {
1070 self.lexer.next().map(|result| {
1071 result.map(|(start, tok, end)| (start + self.offset, tok, end + self.offset))
1072 })
1073 }
1074}
1075
1076fn escape_char(chr: char) -> Option<char> {
1078 match chr {
1079 '\'' => Some('\''),
1080 '"' => Some('"'),
1081 '\\' => Some('\\'),
1082 '%' => Some('%'),
1083 'n' => Some('\n'),
1084 'r' => Some('\r'),
1085 't' => Some('\t'),
1086 _ => None,
1087 }
1088}
1089
1090fn escape_ascii(code: &str) -> Option<char> {
1095 let code = u8::from_str_radix(code, 16).ok()?;
1096 if code > 0x7F {
1097 None
1098 } else {
1099 Some(code as char)
1100 }
1101}
1102
1103fn escape_unicode(code: &str) -> Option<char> {
1104 u32::from_str_radix(code, 16).ok().and_then(char::from_u32)
1105}
1106
1107pub fn normalize_line_endings(s: impl AsRef<str>) -> String {
1110 let normalized = s.as_ref().replace("\r\n", "\n");
1111 debug_assert!(
1112 normalized.find('\r').is_none(),
1113 "The lexer throws an error when it finds a lone carriage return"
1114 );
1115 normalized
1116}