1use crate::pt::{Comment, Loc};
10use itertools::{peek_nth, PeekNth};
11use phf::phf_map;
12use std::{fmt, str::CharIndices};
13use thiserror::Error;
14use unicode_xid::UnicodeXID;
15
16pub type Spanned<'a> = (usize, Token<'a>, usize);
18
19pub type Result<'a, T = Spanned<'a>, E = LexicalError> = std::result::Result<T, E>;
21
22#[derive(Copy, Clone, PartialEq, Eq, Debug)]
24#[allow(missing_docs)]
25pub enum Token<'input> {
26 Identifier(&'input str),
27 StringLiteral(bool, &'input str),
29 AddressLiteral(&'input str),
30 HexLiteral(&'input str),
31 Number(&'input str, &'input str),
33 RationalNumber(&'input str, &'input str, &'input str),
35 HexNumber(&'input str),
36 Divide,
37 Contract,
38 Library,
39 Interface,
40 Function,
41 Pragma,
42 Import,
43
44 Struct,
45 Event,
46 Enum,
47 Type,
48
49 Memory,
50 Storage,
51 Calldata,
52
53 Public,
54 Private,
55 Internal,
56 External,
57
58 Constant,
59
60 New,
61 Delete,
62
63 Pure,
64 View,
65 Payable,
66
67 Do,
68 Continue,
69 Break,
70
71 Throw,
72 Emit,
73 Return,
74 Returns,
75 Revert,
76
77 Uint(u16),
78 Int(u16),
79 Bytes(u8),
80 Byte,
82 DynamicBytes,
83 Bool,
84 Address,
85 String,
86
87 Semicolon,
88 Comma,
89 OpenParenthesis,
90 CloseParenthesis,
91 OpenCurlyBrace,
92 CloseCurlyBrace,
93
94 BitwiseOr,
95 BitwiseOrAssign,
96 Or,
97
98 BitwiseXor,
99 BitwiseXorAssign,
100
101 BitwiseAnd,
102 BitwiseAndAssign,
103 And,
104
105 AddAssign,
106 Increment,
107 Add,
108
109 SubtractAssign,
110 Decrement,
111 Subtract,
112
113 MulAssign,
114 Mul,
115 Power,
116 DivideAssign,
117 ModuloAssign,
118 Modulo,
119
120 Equal,
121 Assign,
122 ColonAssign,
123
124 NotEqual,
125 Not,
126
127 True,
128 False,
129 Else,
130 Anonymous,
131 For,
132 While,
133 If,
134
135 ShiftRight,
136 ShiftRightAssign,
137 Less,
138 LessEqual,
139
140 ShiftLeft,
141 ShiftLeftAssign,
142 More,
143 MoreEqual,
144
145 Constructor,
146 Indexed,
147
148 Member,
149 Colon,
150 OpenBracket,
151 CloseBracket,
152 BitwiseNot,
153 Question,
154
155 Mapping,
156 Arrow,
157
158 Try,
159 Catch,
160
161 Receive,
162 Fallback,
163
164 As,
165 Is,
166 Abstract,
167 Virtual,
168 Override,
169 Using,
170 Modifier,
171 Immutable,
172 Unchecked,
173
174 Assembly,
175 Let,
176 Leave,
177 Switch,
178 Case,
179 Default,
180 YulArrow,
181
182 Persistent,
184 Temporary,
185 Instance,
186
187 Annotation(&'input str),
188}
189
190impl fmt::Display for Token<'_> {
191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192 match self {
193 Token::Identifier(id) => write!(f, "{id}"),
194 Token::StringLiteral(false, s) => write!(f, "\"{s}\""),
195 Token::StringLiteral(true, s) => write!(f, "unicode\"{s}\""),
196 Token::HexLiteral(hex) => write!(f, "{hex}"),
197 Token::AddressLiteral(address) => write!(f, "{address}"),
198 Token::Number(integer, "") => write!(f, "{integer}"),
199 Token::Number(integer, exp) => write!(f, "{integer}e{exp}"),
200 Token::RationalNumber(integer, fraction, "") => {
201 write!(f, "{integer}.{fraction}")
202 }
203 Token::RationalNumber(integer, fraction, exp) => {
204 write!(f, "{integer}.{fraction}e{exp}")
205 }
206 Token::HexNumber(n) => write!(f, "{n}"),
207 Token::Uint(w) => write!(f, "uint{w}"),
208 Token::Int(w) => write!(f, "int{w}"),
209 Token::Bytes(w) => write!(f, "bytes{w}"),
210 Token::Byte => write!(f, "byte"),
211 Token::DynamicBytes => write!(f, "bytes"),
212 Token::Semicolon => write!(f, ";"),
213 Token::Comma => write!(f, ","),
214 Token::OpenParenthesis => write!(f, "("),
215 Token::CloseParenthesis => write!(f, ")"),
216 Token::OpenCurlyBrace => write!(f, "{{"),
217 Token::CloseCurlyBrace => write!(f, "}}"),
218 Token::BitwiseOr => write!(f, "|"),
219 Token::BitwiseOrAssign => write!(f, "|="),
220 Token::Or => write!(f, "||"),
221 Token::BitwiseXor => write!(f, "^"),
222 Token::BitwiseXorAssign => write!(f, "^="),
223 Token::BitwiseAnd => write!(f, "&"),
224 Token::BitwiseAndAssign => write!(f, "&="),
225 Token::And => write!(f, "&&"),
226 Token::AddAssign => write!(f, "+="),
227 Token::Increment => write!(f, "++"),
228 Token::Add => write!(f, "+"),
229 Token::SubtractAssign => write!(f, "-="),
230 Token::Decrement => write!(f, "--"),
231 Token::Subtract => write!(f, "-"),
232 Token::MulAssign => write!(f, "*="),
233 Token::Mul => write!(f, "*"),
234 Token::Power => write!(f, "**"),
235 Token::Divide => write!(f, "/"),
236 Token::DivideAssign => write!(f, "/="),
237 Token::ModuloAssign => write!(f, "%="),
238 Token::Modulo => write!(f, "%"),
239 Token::Equal => write!(f, "=="),
240 Token::Assign => write!(f, "="),
241 Token::ColonAssign => write!(f, ":="),
242 Token::NotEqual => write!(f, "!="),
243 Token::Not => write!(f, "!"),
244 Token::ShiftLeft => write!(f, "<<"),
245 Token::ShiftLeftAssign => write!(f, "<<="),
246 Token::More => write!(f, ">"),
247 Token::MoreEqual => write!(f, ">="),
248 Token::Member => write!(f, "."),
249 Token::Colon => write!(f, ":"),
250 Token::OpenBracket => write!(f, "["),
251 Token::CloseBracket => write!(f, "]"),
252 Token::BitwiseNot => write!(f, "~"),
253 Token::Question => write!(f, "?"),
254 Token::ShiftRightAssign => write!(f, ">>="),
255 Token::ShiftRight => write!(f, ">>"),
256 Token::Less => write!(f, "<"),
257 Token::LessEqual => write!(f, "<="),
258 Token::Bool => write!(f, "bool"),
259 Token::Address => write!(f, "address"),
260 Token::String => write!(f, "string"),
261 Token::Contract => write!(f, "contract"),
262 Token::Library => write!(f, "library"),
263 Token::Interface => write!(f, "interface"),
264 Token::Function => write!(f, "function"),
265 Token::Pragma => write!(f, "pragma"),
266 Token::Import => write!(f, "import"),
267 Token::Struct => write!(f, "struct"),
268 Token::Event => write!(f, "event"),
269 Token::Enum => write!(f, "enum"),
270 Token::Type => write!(f, "type"),
271 Token::Memory => write!(f, "memory"),
272 Token::Storage => write!(f, "storage"),
273 Token::Calldata => write!(f, "calldata"),
274 Token::Public => write!(f, "public"),
275 Token::Private => write!(f, "private"),
276 Token::Internal => write!(f, "internal"),
277 Token::External => write!(f, "external"),
278 Token::Constant => write!(f, "constant"),
279 Token::New => write!(f, "new"),
280 Token::Delete => write!(f, "delete"),
281 Token::Pure => write!(f, "pure"),
282 Token::View => write!(f, "view"),
283 Token::Payable => write!(f, "payable"),
284 Token::Do => write!(f, "do"),
285 Token::Continue => write!(f, "continue"),
286 Token::Break => write!(f, "break"),
287 Token::Throw => write!(f, "throw"),
288 Token::Emit => write!(f, "emit"),
289 Token::Return => write!(f, "return"),
290 Token::Returns => write!(f, "returns"),
291 Token::Revert => write!(f, "revert"),
292 Token::True => write!(f, "true"),
293 Token::False => write!(f, "false"),
294 Token::Else => write!(f, "else"),
295 Token::Anonymous => write!(f, "anonymous"),
296 Token::For => write!(f, "for"),
297 Token::While => write!(f, "while"),
298 Token::If => write!(f, "if"),
299 Token::Constructor => write!(f, "constructor"),
300 Token::Indexed => write!(f, "indexed"),
301 Token::Mapping => write!(f, "mapping"),
302 Token::Arrow => write!(f, "=>"),
303 Token::Try => write!(f, "try"),
304 Token::Catch => write!(f, "catch"),
305 Token::Receive => write!(f, "receive"),
306 Token::Fallback => write!(f, "fallback"),
307 Token::As => write!(f, "as"),
308 Token::Is => write!(f, "is"),
309 Token::Abstract => write!(f, "abstract"),
310 Token::Virtual => write!(f, "virtual"),
311 Token::Override => write!(f, "override"),
312 Token::Using => write!(f, "using"),
313 Token::Modifier => write!(f, "modifier"),
314 Token::Immutable => write!(f, "immutable"),
315 Token::Unchecked => write!(f, "unchecked"),
316 Token::Assembly => write!(f, "assembly"),
317 Token::Let => write!(f, "let"),
318 Token::Leave => write!(f, "leave"),
319 Token::Switch => write!(f, "switch"),
320 Token::Case => write!(f, "case"),
321 Token::Default => write!(f, "default"),
322 Token::YulArrow => write!(f, "->"),
323 Token::Annotation(name) => write!(f, "@{name}"),
324 Token::Persistent => write!(f, "persistent"),
325 Token::Temporary => write!(f, "temporary"),
326 Token::Instance => write!(f, "instance"),
327 }
328 }
329}
330
331#[derive(Debug)]
354pub struct Lexer<'input> {
355 input: &'input str,
356 chars: PeekNth<CharIndices<'input>>,
357 comments: &'input mut Vec<Comment>,
358 file_no: usize,
359 parse_semver: bool,
361 last_tokens: [Option<Token<'input>>; 2],
362 pub errors: &'input mut Vec<LexicalError>,
364}
365
366#[derive(Debug, Clone, PartialEq, Eq, Error)]
368#[allow(missing_docs)]
369pub enum LexicalError {
370 #[error("end of file found in comment")]
371 EndOfFileInComment(Loc),
372
373 #[error("end of file found in string literal")]
374 EndOfFileInString(Loc),
375
376 #[error("end of file found in hex literal string")]
377 EndofFileInHex(Loc),
378
379 #[error("missing number")]
380 MissingNumber(Loc),
381
382 #[error("invalid character '{1}' in hex literal string")]
383 InvalidCharacterInHexLiteral(Loc, char),
384
385 #[error("unrecognised token '{1}'")]
386 UnrecognisedToken(Loc, String),
387
388 #[error("missing exponent")]
389 MissingExponent(Loc),
390
391 #[error("'{1}' found where 'from' expected")]
392 ExpectedFrom(Loc, String),
393}
394
395pub fn is_keyword(word: &str) -> bool {
397 KEYWORDS.contains_key(word)
398}
399
400static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
401 "address" => Token::Address,
402 "anonymous" => Token::Anonymous,
403 "bool" => Token::Bool,
404 "break" => Token::Break,
405 "bytes1" => Token::Bytes(1),
406 "bytes2" => Token::Bytes(2),
407 "bytes3" => Token::Bytes(3),
408 "bytes4" => Token::Bytes(4),
409 "bytes5" => Token::Bytes(5),
410 "bytes6" => Token::Bytes(6),
411 "bytes7" => Token::Bytes(7),
412 "bytes8" => Token::Bytes(8),
413 "bytes9" => Token::Bytes(9),
414 "bytes10" => Token::Bytes(10),
415 "bytes11" => Token::Bytes(11),
416 "bytes12" => Token::Bytes(12),
417 "bytes13" => Token::Bytes(13),
418 "bytes14" => Token::Bytes(14),
419 "bytes15" => Token::Bytes(15),
420 "bytes16" => Token::Bytes(16),
421 "bytes17" => Token::Bytes(17),
422 "bytes18" => Token::Bytes(18),
423 "bytes19" => Token::Bytes(19),
424 "bytes20" => Token::Bytes(20),
425 "bytes21" => Token::Bytes(21),
426 "bytes22" => Token::Bytes(22),
427 "bytes23" => Token::Bytes(23),
428 "bytes24" => Token::Bytes(24),
429 "bytes25" => Token::Bytes(25),
430 "bytes26" => Token::Bytes(26),
431 "bytes27" => Token::Bytes(27),
432 "bytes28" => Token::Bytes(28),
433 "bytes29" => Token::Bytes(29),
434 "bytes30" => Token::Bytes(30),
435 "bytes31" => Token::Bytes(31),
436 "bytes32" => Token::Bytes(32),
437 "bytes" => Token::DynamicBytes,
438 "byte" => Token::Byte,
439 "calldata" => Token::Calldata,
440 "case" => Token::Case,
441 "constant" => Token::Constant,
442 "constructor" => Token::Constructor,
443 "continue" => Token::Continue,
444 "contract" => Token::Contract,
445 "default" => Token::Default,
446 "delete" => Token::Delete,
447 "do" => Token::Do,
448 "else" => Token::Else,
449 "emit" => Token::Emit,
450 "enum" => Token::Enum,
451 "event" => Token::Event,
452 "external" => Token::External,
453 "false" => Token::False,
454 "for" => Token::For,
455 "function" => Token::Function,
456 "if" => Token::If,
457 "import" => Token::Import,
458 "indexed" => Token::Indexed,
459 "int8" => Token::Int(8),
460 "int16" => Token::Int(16),
461 "int24" => Token::Int(24),
462 "int32" => Token::Int(32),
463 "int40" => Token::Int(40),
464 "int48" => Token::Int(48),
465 "int56" => Token::Int(56),
466 "int64" => Token::Int(64),
467 "int72" => Token::Int(72),
468 "int80" => Token::Int(80),
469 "int88" => Token::Int(88),
470 "int96" => Token::Int(96),
471 "int104" => Token::Int(104),
472 "int112" => Token::Int(112),
473 "int120" => Token::Int(120),
474 "int128" => Token::Int(128),
475 "int136" => Token::Int(136),
476 "int144" => Token::Int(144),
477 "int152" => Token::Int(152),
478 "int160" => Token::Int(160),
479 "int168" => Token::Int(168),
480 "int176" => Token::Int(176),
481 "int184" => Token::Int(184),
482 "int192" => Token::Int(192),
483 "int200" => Token::Int(200),
484 "int208" => Token::Int(208),
485 "int216" => Token::Int(216),
486 "int224" => Token::Int(224),
487 "int232" => Token::Int(232),
488 "int240" => Token::Int(240),
489 "int248" => Token::Int(248),
490 "int256" => Token::Int(256),
491 "interface" => Token::Interface,
492 "internal" => Token::Internal,
493 "int" => Token::Int(256),
494 "leave" => Token::Leave,
495 "library" => Token::Library,
496 "mapping" => Token::Mapping,
497 "memory" => Token::Memory,
498 "new" => Token::New,
499 "payable" => Token::Payable,
500 "pragma" => Token::Pragma,
501 "private" => Token::Private,
502 "public" => Token::Public,
503 "pure" => Token::Pure,
504 "returns" => Token::Returns,
505 "return" => Token::Return,
506 "revert" => Token::Revert,
507 "storage" => Token::Storage,
508 "string" => Token::String,
509 "struct" => Token::Struct,
510 "switch" => Token::Switch,
511 "throw" => Token::Throw,
512 "true" => Token::True,
513 "type" => Token::Type,
514 "uint8" => Token::Uint(8),
515 "uint16" => Token::Uint(16),
516 "uint24" => Token::Uint(24),
517 "uint32" => Token::Uint(32),
518 "uint40" => Token::Uint(40),
519 "uint48" => Token::Uint(48),
520 "uint56" => Token::Uint(56),
521 "uint64" => Token::Uint(64),
522 "uint72" => Token::Uint(72),
523 "uint80" => Token::Uint(80),
524 "uint88" => Token::Uint(88),
525 "uint96" => Token::Uint(96),
526 "uint104" => Token::Uint(104),
527 "uint112" => Token::Uint(112),
528 "uint120" => Token::Uint(120),
529 "uint128" => Token::Uint(128),
530 "uint136" => Token::Uint(136),
531 "uint144" => Token::Uint(144),
532 "uint152" => Token::Uint(152),
533 "uint160" => Token::Uint(160),
534 "uint168" => Token::Uint(168),
535 "uint176" => Token::Uint(176),
536 "uint184" => Token::Uint(184),
537 "uint192" => Token::Uint(192),
538 "uint200" => Token::Uint(200),
539 "uint208" => Token::Uint(208),
540 "uint216" => Token::Uint(216),
541 "uint224" => Token::Uint(224),
542 "uint232" => Token::Uint(232),
543 "uint240" => Token::Uint(240),
544 "uint248" => Token::Uint(248),
545 "uint256" => Token::Uint(256),
546 "uint" => Token::Uint(256),
547 "view" => Token::View,
548 "while" => Token::While,
549 "try" => Token::Try,
550 "catch" => Token::Catch,
551 "receive" => Token::Receive,
552 "fallback" => Token::Fallback,
553 "as" => Token::As,
554 "is" => Token::Is,
555 "abstract" => Token::Abstract,
556 "virtual" => Token::Virtual,
557 "override" => Token::Override,
558 "using" => Token::Using,
559 "modifier" => Token::Modifier,
560 "immutable" => Token::Immutable,
561 "unchecked" => Token::Unchecked,
562 "assembly" => Token::Assembly,
563 "let" => Token::Let,
564 "persistent" => Token::Persistent,
565 "temporary" => Token::Temporary,
566 "instance" => Token::Instance,
567};
568
569impl<'input> Lexer<'input> {
570 pub fn new(
583 input: &'input str,
584 file_no: usize,
585 comments: &'input mut Vec<Comment>,
586 errors: &'input mut Vec<LexicalError>,
587 ) -> Self {
588 Lexer {
589 input,
590 chars: peek_nth(input.char_indices()),
591 comments,
592 file_no,
593 parse_semver: false,
594 last_tokens: [None, None],
595 errors,
596 }
597 }
598
599 fn parse_number(&mut self, mut start: usize, ch: char) -> Result<'input> {
600 let mut is_rational = false;
601 if ch == '0' {
602 if let Some((_, 'x')) = self.chars.peek() {
603 self.chars.next();
605
606 let mut end = match self.chars.next() {
607 Some((end, ch)) if ch.is_ascii_hexdigit() => end,
608 Some((..)) => {
609 return Err(LexicalError::MissingNumber(Loc::File(
610 self.file_no,
611 start,
612 start + 1,
613 )));
614 }
615 None => {
616 return Err(LexicalError::EndofFileInHex(Loc::File(
617 self.file_no,
618 start,
619 self.input.len(),
620 )));
621 }
622 };
623
624 while let Some((i, ch)) = self.chars.peek() {
625 if !ch.is_ascii_hexdigit() && *ch != '_' {
626 break;
627 }
628 end = *i;
629 self.chars.next();
630 }
631
632 return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
633 }
634 }
635
636 if ch == '.' {
637 is_rational = true;
638 start -= 1;
639 }
640
641 let mut end = start;
642 while let Some((i, ch)) = self.chars.peek() {
643 if !ch.is_ascii_digit() && *ch != '_' {
644 break;
645 }
646 end = *i;
647 self.chars.next();
648 }
649
650 if self.parse_semver {
651 let integer = &self.input[start..=end];
652 let exp = &self.input[0..0];
653
654 return Ok((start, Token::Number(integer, exp), end + 1));
655 }
656
657 let mut rational_end = end;
658 let mut end_before_rational = end + 1;
659 let mut rational_start = end;
660 if is_rational {
661 end_before_rational = start;
662 rational_start = start + 1;
663 }
664
665 if let Some((_, '.')) = self.chars.peek() {
666 if let Some((i, ch)) = self.chars.peek_nth(1) {
667 if ch.is_ascii_digit() && !is_rational {
668 rational_start = *i;
669 rational_end = *i;
670 is_rational = true;
671 self.chars.next(); while let Some((i, ch)) = self.chars.peek() {
673 if !ch.is_ascii_digit() && *ch != '_' {
674 break;
675 }
676 rational_end = *i;
677 end = *i;
678 self.chars.next();
679 }
680 }
681 }
682 }
683
684 let old_end = end;
685 let mut exp_start = end + 1;
686
687 if let Some((i, 'e' | 'E')) = self.chars.peek() {
688 exp_start = *i + 1;
689 self.chars.next();
690 while matches!(self.chars.peek(), Some((_, '-'))) {
692 self.chars.next();
693 }
694 while let Some((i, ch)) = self.chars.peek() {
695 if !ch.is_ascii_digit() && *ch != '_' {
696 break;
697 }
698 end = *i;
699 self.chars.next();
700 }
701
702 if exp_start > end {
703 return Err(LexicalError::MissingExponent(Loc::File(
704 self.file_no,
705 start,
706 self.input.len(),
707 )));
708 }
709 }
710
711 if is_rational {
712 let integer = &self.input[start..end_before_rational];
713 let fraction = &self.input[rational_start..=rational_end];
714 let exp = &self.input[exp_start..=end];
715
716 return Ok((
717 start,
718 Token::RationalNumber(integer, fraction, exp),
719 end + 1,
720 ));
721 }
722
723 let integer = &self.input[start..=old_end];
724 let exp = &self.input[exp_start..=end];
725
726 Ok((start, Token::Number(integer, exp), end + 1))
727 }
728
729 fn string(
730 &mut self,
731 unicode: bool,
732 token_start: usize,
733 string_start: usize,
734 quote_char: char,
735 ) -> Result<'input> {
736 let mut end;
737
738 let mut last_was_escape = false;
739
740 loop {
741 if let Some((i, ch)) = self.chars.next() {
742 end = i;
743 if !last_was_escape {
744 if ch == quote_char {
745 break;
746 }
747 last_was_escape = ch == '\\';
748 } else {
749 last_was_escape = false;
750 }
751 } else {
752 return Err(LexicalError::EndOfFileInString(Loc::File(
753 self.file_no,
754 token_start,
755 self.input.len(),
756 )));
757 }
758 }
759
760 Ok((
761 token_start,
762 Token::StringLiteral(unicode, &self.input[string_start..end]),
763 end + 1,
764 ))
765 }
766
767 fn next(&mut self) -> Option<Spanned<'input>> {
768 'toplevel: loop {
769 match self.chars.next() {
770 Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
771 let (id, end) = self.match_identifier(start);
772
773 if id == "unicode" {
774 match self.chars.peek() {
775 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
776 let quote_char = *quote_char;
777
778 self.chars.next();
779 let str_res = self.string(true, start, start + 8, quote_char);
780 match str_res {
781 Err(lex_err) => self.errors.push(lex_err),
782 Ok(val) => return Some(val),
783 }
784 }
785 _ => (),
786 }
787 }
788
789 if id == "hex" {
790 match self.chars.peek() {
791 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
792 let quote_char = *quote_char;
793
794 self.chars.next();
795
796 for (i, ch) in &mut self.chars {
797 if ch == quote_char {
798 return Some((
799 start,
800 Token::HexLiteral(&self.input[start..=i]),
801 i + 1,
802 ));
803 }
804
805 if !ch.is_ascii_hexdigit() && ch != '_' {
806 for (_, ch) in &mut self.chars {
808 if ch == quote_char {
809 break;
810 }
811 }
812
813 self.errors.push(
814 LexicalError::InvalidCharacterInHexLiteral(
815 Loc::File(self.file_no, i, i + 1),
816 ch,
817 ),
818 );
819 continue 'toplevel;
820 }
821 }
822
823 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
824 self.file_no,
825 start,
826 self.input.len(),
827 )));
828 return None;
829 }
830 _ => (),
831 }
832 }
833
834 if id == "address" {
835 match self.chars.peek() {
836 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
837 let quote_char = *quote_char;
838
839 self.chars.next();
840
841 for (i, ch) in &mut self.chars {
842 if ch == quote_char {
843 return Some((
844 start,
845 Token::AddressLiteral(&self.input[start..=i]),
846 i + 1,
847 ));
848 }
849 }
850
851 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
852 self.file_no,
853 start,
854 self.input.len(),
855 )));
856 return None;
857 }
858 _ => (),
859 }
860 }
861
862 return if let Some(w) = KEYWORDS.get(id) {
863 Some((start, *w, end))
864 } else {
865 Some((start, Token::Identifier(id), end))
866 };
867 }
868 Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
869 let str_res = self.string(false, start, start + 1, quote_char);
870 match str_res {
871 Err(lex_err) => self.errors.push(lex_err),
872 Ok(val) => return Some(val),
873 }
874 }
875 Some((start, '/')) => {
876 match self.chars.peek() {
877 Some((_, '=')) => {
878 self.chars.next();
879 return Some((start, Token::DivideAssign, start + 2));
880 }
881 Some((_, '/')) => {
882 self.chars.next();
884
885 let mut newline = false;
886
887 let doc_comment = match self.chars.next() {
888 Some((_, '/')) => {
889 !matches!(self.chars.peek(), Some((_, '/')))
891 }
892 Some((_, ch)) if ch == '\n' || ch == '\r' => {
893 newline = true;
894 false
895 }
896 _ => false,
897 };
898
899 let mut last = start + 3;
900
901 if !newline {
902 loop {
903 match self.chars.next() {
904 None => {
905 last = self.input.len();
906 break;
907 }
908 Some((offset, '\n' | '\r')) => {
909 last = offset;
910 break;
911 }
912 Some(_) => (),
913 }
914 }
915 }
916
917 if doc_comment {
918 self.comments.push(Comment::DocLine(
919 Loc::File(self.file_no, start, last),
920 self.input[start..last].to_owned(),
921 ));
922 } else {
923 self.comments.push(Comment::Line(
924 Loc::File(self.file_no, start, last),
925 self.input[start..last].to_owned(),
926 ));
927 }
928 }
929 Some((_, '*')) => {
930 self.chars.next();
932
933 let doc_comment_start = matches!(self.chars.peek(), Some((_, '*')));
934
935 let mut last = start + 3;
936 let mut seen_star = false;
937
938 loop {
939 if let Some((i, ch)) = self.chars.next() {
940 if seen_star && ch == '/' {
941 break;
942 }
943 seen_star = ch == '*';
944 last = i;
945 } else {
946 self.errors.push(LexicalError::EndOfFileInComment(Loc::File(
947 self.file_no,
948 start,
949 self.input.len(),
950 )));
951 return None;
952 }
953 }
954
955 if doc_comment_start && last > start + 2 {
957 self.comments.push(Comment::DocBlock(
958 Loc::File(self.file_no, start, last + 2),
959 self.input[start..last + 2].to_owned(),
960 ));
961 } else {
962 self.comments.push(Comment::Block(
963 Loc::File(self.file_no, start, last + 2),
964 self.input[start..last + 2].to_owned(),
965 ));
966 }
967 }
968 _ => {
969 return Some((start, Token::Divide, start + 1));
970 }
971 }
972 }
973 Some((start, ch)) if ch.is_ascii_digit() => {
974 let parse_result = self.parse_number(start, ch);
975 match parse_result {
976 Err(lex_err) => {
977 self.errors.push(lex_err.clone());
978 if matches!(lex_err, LexicalError::EndofFileInHex(_)) {
979 return None;
980 }
981 }
982 Ok(parse_result) => return Some(parse_result),
983 }
984 }
985 Some((start, '@')) => {
986 let (id, end) = self.match_identifier(start);
987 if id.len() == 1 {
988 self.errors.push(LexicalError::UnrecognisedToken(
989 Loc::File(self.file_no, start, start + 1),
990 id.to_owned(),
991 ));
992 } else {
993 return Some((start, Token::Annotation(&id[1..]), end));
994 };
995 }
996 Some((i, ';')) => {
997 self.parse_semver = false;
998 return Some((i, Token::Semicolon, i + 1));
999 }
1000 Some((i, ',')) => return Some((i, Token::Comma, i + 1)),
1001 Some((i, '(')) => return Some((i, Token::OpenParenthesis, i + 1)),
1002 Some((i, ')')) => return Some((i, Token::CloseParenthesis, i + 1)),
1003 Some((i, '{')) => return Some((i, Token::OpenCurlyBrace, i + 1)),
1004 Some((i, '}')) => return Some((i, Token::CloseCurlyBrace, i + 1)),
1005 Some((i, '~')) => return Some((i, Token::BitwiseNot, i + 1)),
1006 Some((i, '=')) => {
1007 return match self.chars.peek() {
1008 Some((_, '=')) => {
1009 self.chars.next();
1010 Some((i, Token::Equal, i + 2))
1011 }
1012 Some((_, '>')) => {
1013 self.chars.next();
1014 Some((i, Token::Arrow, i + 2))
1015 }
1016 _ => Some((i, Token::Assign, i + 1)),
1017 }
1018 }
1019 Some((i, '!')) => {
1020 return if let Some((_, '=')) = self.chars.peek() {
1021 self.chars.next();
1022 Some((i, Token::NotEqual, i + 2))
1023 } else {
1024 Some((i, Token::Not, i + 1))
1025 }
1026 }
1027 Some((i, '|')) => {
1028 return match self.chars.peek() {
1029 Some((_, '=')) => {
1030 self.chars.next();
1031 Some((i, Token::BitwiseOrAssign, i + 2))
1032 }
1033 Some((_, '|')) => {
1034 self.chars.next();
1035 Some((i, Token::Or, i + 2))
1036 }
1037 _ => Some((i, Token::BitwiseOr, i + 1)),
1038 };
1039 }
1040 Some((i, '&')) => {
1041 return match self.chars.peek() {
1042 Some((_, '=')) => {
1043 self.chars.next();
1044 Some((i, Token::BitwiseAndAssign, i + 2))
1045 }
1046 Some((_, '&')) => {
1047 self.chars.next();
1048 Some((i, Token::And, i + 2))
1049 }
1050 _ => Some((i, Token::BitwiseAnd, i + 1)),
1051 };
1052 }
1053 Some((i, '^')) => {
1054 return match self.chars.peek() {
1055 Some((_, '=')) => {
1056 self.chars.next();
1057 Some((i, Token::BitwiseXorAssign, i + 2))
1058 }
1059 _ => Some((i, Token::BitwiseXor, i + 1)),
1060 };
1061 }
1062 Some((i, '+')) => {
1063 return match self.chars.peek() {
1064 Some((_, '=')) => {
1065 self.chars.next();
1066 Some((i, Token::AddAssign, i + 2))
1067 }
1068 Some((_, '+')) => {
1069 self.chars.next();
1070 Some((i, Token::Increment, i + 2))
1071 }
1072 _ => Some((i, Token::Add, i + 1)),
1073 };
1074 }
1075 Some((i, '-')) => {
1076 return match self.chars.peek() {
1077 Some((_, '=')) => {
1078 self.chars.next();
1079 Some((i, Token::SubtractAssign, i + 2))
1080 }
1081 Some((_, '-')) => {
1082 self.chars.next();
1083 Some((i, Token::Decrement, i + 2))
1084 }
1085 Some((_, '>')) => {
1086 self.chars.next();
1087 Some((i, Token::YulArrow, i + 2))
1088 }
1089 _ => Some((i, Token::Subtract, i + 1)),
1090 };
1091 }
1092 Some((i, '*')) => {
1093 return match self.chars.peek() {
1094 Some((_, '=')) => {
1095 self.chars.next();
1096 Some((i, Token::MulAssign, i + 2))
1097 }
1098 Some((_, '*')) => {
1099 self.chars.next();
1100 Some((i, Token::Power, i + 2))
1101 }
1102 _ => Some((i, Token::Mul, i + 1)),
1103 };
1104 }
1105 Some((i, '%')) => {
1106 return match self.chars.peek() {
1107 Some((_, '=')) => {
1108 self.chars.next();
1109 Some((i, Token::ModuloAssign, i + 2))
1110 }
1111 _ => Some((i, Token::Modulo, i + 1)),
1112 };
1113 }
1114 Some((i, '<')) => {
1115 return match self.chars.peek() {
1116 Some((_, '<')) => {
1117 self.chars.next();
1118 if let Some((_, '=')) = self.chars.peek() {
1119 self.chars.next();
1120 Some((i, Token::ShiftLeftAssign, i + 3))
1121 } else {
1122 Some((i, Token::ShiftLeft, i + 2))
1123 }
1124 }
1125 Some((_, '=')) => {
1126 self.chars.next();
1127 Some((i, Token::LessEqual, i + 2))
1128 }
1129 _ => Some((i, Token::Less, i + 1)),
1130 };
1131 }
1132 Some((i, '>')) => {
1133 return match self.chars.peek() {
1134 Some((_, '>')) => {
1135 self.chars.next();
1136 if let Some((_, '=')) = self.chars.peek() {
1137 self.chars.next();
1138 Some((i, Token::ShiftRightAssign, i + 3))
1139 } else {
1140 Some((i, Token::ShiftRight, i + 2))
1141 }
1142 }
1143 Some((_, '=')) => {
1144 self.chars.next();
1145 Some((i, Token::MoreEqual, i + 2))
1146 }
1147 _ => Some((i, Token::More, i + 1)),
1148 };
1149 }
1150 Some((i, '.')) => {
1151 if let Some((_, a)) = self.chars.peek() {
1152 if a.is_ascii_digit() && !self.parse_semver {
1153 return match self.parse_number(i + 1, '.') {
1154 Err(lex_error) => {
1155 self.errors.push(lex_error);
1156 None
1157 }
1158 Ok(parse_result) => Some(parse_result),
1159 };
1160 }
1161 }
1162 return Some((i, Token::Member, i + 1));
1163 }
1164 Some((i, '[')) => return Some((i, Token::OpenBracket, i + 1)),
1165 Some((i, ']')) => return Some((i, Token::CloseBracket, i + 1)),
1166 Some((i, ':')) => {
1167 return match self.chars.peek() {
1168 Some((_, '=')) => {
1169 self.chars.next();
1170 Some((i, Token::ColonAssign, i + 2))
1171 }
1172 _ => Some((i, Token::Colon, i + 1)),
1173 };
1174 }
1175 Some((i, '?')) => return Some((i, Token::Question, i + 1)),
1176 Some((_, ch)) if ch.is_whitespace() => (),
1177 Some((start, _)) => {
1178 let mut end;
1179
1180 loop {
1181 if let Some((i, ch)) = self.chars.next() {
1182 end = i;
1183
1184 if ch.is_whitespace() {
1185 break;
1186 }
1187 } else {
1188 end = self.input.len();
1189 break;
1190 }
1191 }
1192
1193 self.errors.push(LexicalError::UnrecognisedToken(
1194 Loc::File(self.file_no, start, end),
1195 self.input[start..end].to_owned(),
1196 ));
1197 }
1198 None => return None, }
1200 }
1201 }
1202
1203 fn match_identifier(&mut self, start: usize) -> (&'input str, usize) {
1204 let end;
1205 loop {
1206 if let Some((i, ch)) = self.chars.peek() {
1207 if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
1208 end = *i;
1209 break;
1210 }
1211 self.chars.next();
1212 } else {
1213 end = self.input.len();
1214 break;
1215 }
1216 }
1217
1218 (&self.input[start..end], end)
1219 }
1220}
1221
1222impl<'input> Iterator for Lexer<'input> {
1223 type Item = Spanned<'input>;
1224
1225 fn next(&mut self) -> Option<Self::Item> {
1226 if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
1230 self.parse_semver = true;
1231 }
1232
1233 let token = self.next();
1234
1235 self.last_tokens = [
1236 self.last_tokens[1],
1237 match token {
1238 Some((_, n, _)) => Some(n),
1239 _ => None,
1240 },
1241 ];
1242
1243 token
1244 }
1245}
1246
1247#[cfg(test)]
1248mod tests {
1249 use super::*;
1250
1251 #[test]
1252 fn test_lexer() {
1253 let mut comments = Vec::new();
1254 let mut errors = Vec::new();
1255
1256 let multiple_errors = r#" 9ea -9e € bool hex uint8 hex"g" /** "#;
1257 let tokens = Lexer::new(multiple_errors, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1258 assert_eq!(
1259 tokens,
1260 vec![
1261 (3, Token::Identifier("a"), 4),
1262 (5, Token::Subtract, 6),
1263 (13, Token::Bool, 17),
1264 (18, Token::Identifier("hex"), 21),
1265 (22, Token::Uint(8), 27),
1266 ]
1267 );
1268
1269 assert_eq!(
1270 errors,
1271 vec![
1272 LexicalError::MissingExponent(Loc::File(0, 1, 42)),
1273 LexicalError::MissingExponent(Loc::File(0, 6, 42)),
1274 LexicalError::UnrecognisedToken(Loc::File(0, 9, 12), '€'.to_string()),
1275 LexicalError::InvalidCharacterInHexLiteral(Loc::File(0, 32, 33), 'g'),
1276 LexicalError::EndOfFileInComment(Loc::File(0, 37, 42)),
1277 ]
1278 );
1279
1280 let mut errors = Vec::new();
1281 let tokens = Lexer::new("bool", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1282
1283 assert_eq!(tokens, vec!((0, Token::Bool, 4)));
1284
1285 let tokens = Lexer::new("uint8", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1286
1287 assert_eq!(tokens, vec!((0, Token::Uint(8), 5)));
1288
1289 let tokens = Lexer::new("hex", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1290
1291 assert_eq!(tokens, vec!((0, Token::Identifier("hex"), 3)));
1292
1293 let tokens = Lexer::new(
1294 "hex\"cafe_dead\" /* adad*** */",
1295 0,
1296 &mut comments,
1297 &mut errors,
1298 )
1299 .collect::<Vec<_>>();
1300
1301 assert_eq!(tokens, vec!((0, Token::HexLiteral("hex\"cafe_dead\""), 14)));
1302
1303 let tokens = Lexer::new(
1304 "// foo bar\n0x00fead0_12 00090 0_0",
1305 0,
1306 &mut comments,
1307 &mut errors,
1308 )
1309 .collect::<Vec<_>>();
1310
1311 assert_eq!(
1312 tokens,
1313 vec!(
1314 (11, Token::HexNumber("0x00fead0_12"), 23),
1315 (24, Token::Number("00090", ""), 29),
1316 (30, Token::Number("0_0", ""), 33)
1317 )
1318 );
1319
1320 let tokens = Lexer::new(
1321 "// foo bar\n0x00fead0_12 9.0008 0_0",
1322 0,
1323 &mut comments,
1324 &mut errors,
1325 )
1326 .collect::<Vec<_>>();
1327
1328 assert_eq!(
1329 tokens,
1330 vec!(
1331 (11, Token::HexNumber("0x00fead0_12"), 23),
1332 (24, Token::RationalNumber("9", "0008", ""), 30),
1333 (31, Token::Number("0_0", ""), 34)
1334 )
1335 );
1336
1337 let tokens = Lexer::new(
1338 "// foo bar\n0x00fead0_12 .0008 0.9e2",
1339 0,
1340 &mut comments,
1341 &mut errors,
1342 )
1343 .collect::<Vec<_>>();
1344
1345 assert_eq!(
1346 tokens,
1347 vec!(
1348 (11, Token::HexNumber("0x00fead0_12"), 23),
1349 (24, Token::RationalNumber("", "0008", ""), 29),
1350 (30, Token::RationalNumber("0", "9", "2"), 35)
1351 )
1352 );
1353
1354 let tokens = Lexer::new(
1355 "// foo bar\n0x00fead0_12 .0008 0.9e-2-2",
1356 0,
1357 &mut comments,
1358 &mut errors,
1359 )
1360 .collect::<Vec<_>>();
1361
1362 assert_eq!(
1363 tokens,
1364 vec!(
1365 (11, Token::HexNumber("0x00fead0_12"), 23),
1366 (24, Token::RationalNumber("", "0008", ""), 29),
1367 (30, Token::RationalNumber("0", "9", "-2"), 36),
1368 (36, Token::Subtract, 37),
1369 (37, Token::Number("2", ""), 38)
1370 )
1371 );
1372
1373 let tokens = Lexer::new("1.2_3e2-", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1374
1375 assert_eq!(
1376 tokens,
1377 vec!(
1378 (0, Token::RationalNumber("1", "2_3", "2"), 7),
1379 (7, Token::Subtract, 8)
1380 )
1381 );
1382
1383 let tokens = Lexer::new("\"foo\"", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1384
1385 assert_eq!(tokens, vec!((0, Token::StringLiteral(false, "foo"), 5)));
1386
1387 let tokens = Lexer::new(
1388 "pragma solidity >=0.5.0 <0.7.0;",
1389 0,
1390 &mut comments,
1391 &mut errors,
1392 )
1393 .collect::<Vec<_>>();
1394
1395 assert_eq!(
1396 tokens,
1397 vec!(
1398 (0, Token::Pragma, 6),
1399 (7, Token::Identifier("solidity"), 15),
1400 (16, Token::MoreEqual, 18),
1401 (18, Token::Number("0", ""), 19),
1402 (19, Token::Member, 20),
1403 (20, Token::Number("5", ""), 21),
1404 (21, Token::Member, 22),
1405 (22, Token::Number("0", ""), 23),
1406 (24, Token::Less, 25),
1407 (25, Token::Number("0", ""), 26),
1408 (26, Token::Member, 27),
1409 (27, Token::Number("7", ""), 28),
1410 (28, Token::Member, 29),
1411 (29, Token::Number("0", ""), 30),
1412 (30, Token::Semicolon, 31),
1413 )
1414 );
1415
1416 let tokens = Lexer::new(
1417 "pragma solidity \t>=0.5.0 <0.7.0 \n ;",
1418 0,
1419 &mut comments,
1420 &mut errors,
1421 )
1422 .collect::<Vec<_>>();
1423
1424 assert_eq!(
1425 tokens,
1426 vec!(
1427 (0, Token::Pragma, 6),
1428 (7, Token::Identifier("solidity"), 15),
1429 (17, Token::MoreEqual, 19),
1430 (19, Token::Number("0", ""), 20),
1431 (20, Token::Member, 21),
1432 (21, Token::Number("5", ""), 22),
1433 (22, Token::Member, 23),
1434 (23, Token::Number("0", ""), 24),
1435 (25, Token::Less, 26),
1436 (26, Token::Number("0", ""), 27),
1437 (27, Token::Member, 28),
1438 (28, Token::Number("7", ""), 29),
1439 (29, Token::Member, 30),
1440 (30, Token::Number("0", ""), 31),
1441 (34, Token::Semicolon, 35),
1442 )
1443 );
1444
1445 let tokens =
1446 Lexer::new("pragma solidity 赤;", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1447
1448 assert_eq!(
1449 tokens,
1450 vec!(
1451 (0, Token::Pragma, 6),
1452 (7, Token::Identifier("solidity"), 15),
1453 (16, Token::Identifier("赤"), 19),
1454 (19, Token::Semicolon, 20)
1455 )
1456 );
1457
1458 let tokens = Lexer::new(">>= >> >= >", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1459
1460 assert_eq!(
1461 tokens,
1462 vec!(
1463 (0, Token::ShiftRightAssign, 3),
1464 (4, Token::ShiftRight, 6),
1465 (7, Token::MoreEqual, 9),
1466 (10, Token::More, 11),
1467 )
1468 );
1469
1470 let tokens = Lexer::new("<<= << <= <", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1471
1472 assert_eq!(
1473 tokens,
1474 vec!(
1475 (0, Token::ShiftLeftAssign, 3),
1476 (4, Token::ShiftLeft, 6),
1477 (7, Token::LessEqual, 9),
1478 (10, Token::Less, 11),
1479 )
1480 );
1481
1482 let tokens = Lexer::new("-16 -- - -=", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1483
1484 assert_eq!(
1485 tokens,
1486 vec!(
1487 (0, Token::Subtract, 1),
1488 (1, Token::Number("16", ""), 3),
1489 (4, Token::Decrement, 6),
1490 (7, Token::Subtract, 8),
1491 (9, Token::SubtractAssign, 11),
1492 )
1493 );
1494
1495 let tokens = Lexer::new("-4 ", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1496
1497 assert_eq!(
1498 tokens,
1499 vec!((0, Token::Subtract, 1), (1, Token::Number("4", ""), 2),)
1500 );
1501
1502 let mut errors = Vec::new();
1503 let _ = Lexer::new(r#"hex"abcdefg""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1504
1505 assert_eq!(
1506 errors,
1507 vec![LexicalError::InvalidCharacterInHexLiteral(
1508 Loc::File(0, 10, 11),
1509 'g'
1510 )]
1511 );
1512
1513 let mut errors = Vec::new();
1514 let _ = Lexer::new(r#" € "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1515
1516 assert_eq!(
1517 errors,
1518 vec!(LexicalError::UnrecognisedToken(
1519 Loc::File(0, 1, 4),
1520 "€".to_owned()
1521 ))
1522 );
1523
1524 let mut errors = Vec::new();
1525 let _ = Lexer::new(r#"€"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1526
1527 assert_eq!(
1528 errors,
1529 vec!(LexicalError::UnrecognisedToken(
1530 Loc::File(0, 0, 3),
1531 "€".to_owned()
1532 ))
1533 );
1534
1535 let tokens =
1536 Lexer::new(r#"pragma foo bar"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1537
1538 assert_eq!(
1539 tokens,
1540 vec!(
1541 (0, Token::Pragma, 6),
1542 (7, Token::Identifier("foo"), 10),
1543 (11, Token::Identifier("bar"), 14),
1544 )
1545 );
1546
1547 comments.truncate(0);
1548
1549 let tokens = Lexer::new(r#"/// foo"#, 0, &mut comments, &mut errors).count();
1550
1551 assert_eq!(tokens, 0);
1552 assert_eq!(
1553 comments,
1554 vec![Comment::DocLine(Loc::File(0, 0, 7), "/// foo".to_owned())],
1555 );
1556
1557 comments.truncate(0);
1558
1559 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1560
1561 assert_eq!(tokens, 0);
1562 assert_eq!(
1563 comments,
1564 vec!(
1565 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1566 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1567 )
1568 );
1569
1570 comments.truncate(0);
1571
1572 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1573
1574 assert_eq!(tokens, 0);
1575 assert_eq!(
1576 comments,
1577 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1578 );
1579
1580 comments.truncate(0);
1581
1582 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1583
1584 assert_eq!(tokens, 0);
1585 assert_eq!(
1586 comments,
1587 vec!(Comment::DocBlock(
1588 Loc::File(0, 0, 10),
1589 "/** foo */".to_owned()
1590 ))
1591 );
1592
1593 comments.truncate(0);
1594
1595 let tokens = Lexer::new(
1596 "/** jadajadadjada */\n/* bar */",
1597 0,
1598 &mut comments,
1599 &mut errors,
1600 )
1601 .count();
1602
1603 assert_eq!(tokens, 0);
1604 assert_eq!(
1605 comments,
1606 vec!(
1607 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1608 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1609 )
1610 );
1611
1612 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1613 assert_eq!(tokens, None);
1614
1615 let mut errors = Vec::new();
1616 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1617 assert_eq!(
1618 errors,
1619 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1620 );
1621
1622 let mut errors = Vec::new();
1623 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1624 assert_eq!(tokens, None);
1625
1626 let tokens = Lexer::new(
1628 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1629 0,
1630 &mut comments,
1631 &mut errors,
1632 )
1633 .collect::<Vec<_>>();
1634
1635 assert_eq!(
1636 tokens,
1637 vec!(
1638 (0, Token::MoreEqual, 2),
1639 (5, Token::Member, 6),
1640 (7, Token::Identifier("très"), 12),
1641 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1642 (65, Token::Identifier("カラス"), 74)
1643 )
1644 );
1645
1646 let tokens = Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1647
1648 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1649
1650 let tokens =
1651 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1652
1653 assert_eq!(
1654 tokens,
1655 vec!(
1656 (0, Token::Identifier("unicode"), 7),
1657 (8, Token::StringLiteral(false, "€"), 13),
1658 )
1659 );
1660
1661 let tokens = Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1663
1664 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1665
1666 let tokens = Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1667
1668 assert_eq!(
1669 tokens,
1670 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1671 );
1672
1673 let mut errors = Vec::new();
1674 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1675
1676 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1677 assert_eq!(
1678 errors,
1679 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1680 );
1681
1682 let mut errors = Vec::new();
1683 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1684
1685 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1686 assert_eq!(
1687 errors,
1688 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1689 );
1690
1691 let mut errors = Vec::new();
1692 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1693
1694 assert_eq!(
1695 tokens,
1696 vec!(
1697 (0, Token::Number("42", ""), 2),
1698 (2, Token::Member, 3),
1699 (3, Token::Identifier("a"), 4)
1700 )
1701 );
1702
1703 let tokens = Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1704
1705 assert_eq!(
1706 tokens,
1707 vec!(
1708 (0, Token::Number("42", ""), 2),
1709 (2, Token::Member, 3),
1710 (3, Token::Member, 4),
1711 (4, Token::Identifier("a"), 5)
1712 )
1713 );
1714
1715 comments.truncate(0);
1716
1717 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1718
1719 assert_eq!(tokens, 0);
1720 assert_eq!(
1721 comments,
1722 vec!(
1723 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1724 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1725 )
1726 );
1727
1728 comments.truncate(0);
1729
1730 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1731
1732 assert_eq!(tokens, 0);
1733 assert_eq!(
1734 comments,
1735 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1736 );
1737
1738 comments.truncate(0);
1739
1740 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1741
1742 assert_eq!(tokens, 0);
1743 assert_eq!(
1744 comments,
1745 vec!(Comment::DocBlock(
1746 Loc::File(0, 0, 10),
1747 "/** foo */".to_owned()
1748 ))
1749 );
1750
1751 comments.truncate(0);
1752
1753 let tokens = Lexer::new(
1754 "/** jadajadadjada */\n/* bar */",
1755 0,
1756 &mut comments,
1757 &mut errors,
1758 )
1759 .count();
1760
1761 assert_eq!(tokens, 0);
1762 assert_eq!(
1763 comments,
1764 vec!(
1765 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1766 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1767 )
1768 );
1769
1770 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1771 assert_eq!(tokens, None);
1772
1773 let mut errors = Vec::new();
1774 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1775 assert_eq!(
1776 errors,
1777 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1778 );
1779
1780 let mut errors = Vec::new();
1781 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1782 assert_eq!(tokens, None);
1783
1784 let tokens = Lexer::new(
1786 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1787 0,
1788 &mut comments,
1789 &mut errors,
1790 )
1791 .collect::<Vec<(usize, Token, usize)>>();
1792
1793 assert_eq!(
1794 tokens,
1795 vec!(
1796 (0, Token::MoreEqual, 2),
1797 (5, Token::Member, 6),
1798 (7, Token::Identifier("très"), 12),
1799 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1800 (65, Token::Identifier("カラス"), 74)
1801 )
1802 );
1803
1804 let tokens =
1805 Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors)
1806 .collect::<Vec<(usize, Token, usize)>>();
1807
1808 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1809
1810 let tokens =
1811 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors)
1812 .collect::<Vec<(usize, Token, usize)>>();
1813
1814 assert_eq!(
1815 tokens,
1816 vec!(
1817 (0, Token::Identifier("unicode"), 7),
1818 (8, Token::StringLiteral(false, "€"), 13),
1819 )
1820 );
1821
1822 let tokens =
1824 Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors)
1825 .collect::<Vec<(usize, Token, usize)>>();
1826
1827 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1828
1829 let tokens =
1830 Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors)
1831 .collect::<Vec<(usize, Token, usize)>>();
1832
1833 assert_eq!(
1834 tokens,
1835 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1836 );
1837
1838 let mut errors = Vec::new();
1839 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors)
1840 .collect::<Vec<(usize, Token, usize)>>();
1841
1842 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1843 assert_eq!(
1844 errors,
1845 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1846 );
1847
1848 let mut errors = Vec::new();
1849 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors)
1850 .collect::<Vec<(usize, Token, usize)>>();
1851
1852 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1853 assert_eq!(
1854 errors,
1855 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1856 );
1857
1858 let mut errors = Vec::new();
1859 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors)
1860 .collect::<Vec<(usize, Token, usize)>>();
1861
1862 assert_eq!(
1863 tokens,
1864 vec!(
1865 (0, Token::Number("42", ""), 2),
1866 (2, Token::Member, 3),
1867 (3, Token::Identifier("a"), 4)
1868 )
1869 );
1870
1871 let tokens =
1872 Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors)
1873 .collect::<Vec<(usize, Token, usize)>>();
1874
1875 assert_eq!(
1876 tokens,
1877 vec!(
1878 (0, Token::Number("42", ""), 2),
1879 (2, Token::Member, 3),
1880 (3, Token::Member, 4),
1881 (4, Token::Identifier("a"), 5)
1882 )
1883 );
1884
1885 let mut errors = Vec::new();
1886 let _ = Lexer::new(r#"hex"g""#, 0, &mut comments, &mut errors)
1887 .collect::<Vec<(usize, Token, usize)>>();
1888 assert_eq!(
1889 errors,
1890 vec!(LexicalError::InvalidCharacterInHexLiteral(
1891 Loc::File(0, 4, 5),
1892 'g'
1893 ),)
1894 );
1895
1896 let mut errors = Vec::new();
1897 let tokens =
1898 Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<(usize, Token, usize)>>();
1899
1900 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1901
1902 let mut errors = Vec::new();
1903 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors)
1904 .collect::<Vec<(usize, Token, usize)>>();
1905
1906 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1907
1908 let mut errors = Vec::new();
1909 let tokens = Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1910
1911 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1912
1913 let mut errors = Vec::new();
1914 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1915
1916 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1917
1918 errors.clear();
1919 comments.clear();
1920 let tokens =
1921 Lexer::new("@my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1922 assert_eq!(tokens, vec![(0, Token::Annotation("my_annotation"), 14)]);
1923 assert!(errors.is_empty());
1924 assert!(comments.is_empty());
1925
1926 errors.clear();
1927 comments.clear();
1928 let tokens =
1929 Lexer::new("@ my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1930 assert_eq!(tokens, vec![(2, Token::Identifier("my_annotation"), 15)]);
1931 assert_eq!(
1932 errors,
1933 vec![LexicalError::UnrecognisedToken(
1934 Loc::File(0, 0, 1),
1935 "@".to_string()
1936 )]
1937 );
1938 assert!(comments.is_empty());
1939 }
1940}