1use crate::pt::{Comment, Loc};
10use itertools::{peek_nth, PeekNth};
11use phf::phf_map;
12use std::{fmt, str::CharIndices};
13use thiserror::Error;
14use unicode_xid::UnicodeXID;
15
16pub type Spanned<'a> = (usize, Token<'a>, usize);
18
19pub type Result<'a, T = Spanned<'a>, E = LexicalError> = std::result::Result<T, E>;
21
22#[derive(Copy, Clone, PartialEq, Eq, Debug)]
24#[allow(missing_docs)]
25pub enum Token<'input> {
26 Identifier(&'input str),
27 StringLiteral(bool, &'input str),
29 AddressLiteral(&'input str),
30 HexLiteral(&'input str),
31 Number(&'input str, &'input str),
33 RationalNumber(&'input str, &'input str, &'input str),
35 HexNumber(&'input str),
36 Divide,
37 Contract,
38 Library,
39 Interface,
40 Function,
41 Pragma,
42 Import,
43
44 Struct,
45 Event,
46 Enum,
47 Type,
48
49 Layout,
50 KwAt,
51
52 Memory,
53 Storage,
54 Calldata,
55 Transient,
56
57 Public,
58 Private,
59 Internal,
60 External,
61
62 Constant,
63
64 New,
65 Delete,
66
67 Pure,
68 View,
69 Payable,
70
71 Do,
72 Continue,
73 Break,
74
75 Throw,
76 Emit,
77 Return,
78 Returns,
79 Revert,
80
81 Uint(u16),
82 Int(u16),
83 Bytes(u8),
84 Byte,
86 DynamicBytes,
87 Bool,
88 Address,
89 String,
90
91 Semicolon,
92 Comma,
93 OpenParenthesis,
94 CloseParenthesis,
95 OpenCurlyBrace,
96 CloseCurlyBrace,
97
98 BitwiseOr,
99 BitwiseOrAssign,
100 Or,
101
102 BitwiseXor,
103 BitwiseXorAssign,
104
105 BitwiseAnd,
106 BitwiseAndAssign,
107 And,
108
109 AddAssign,
110 Increment,
111 Add,
112
113 SubtractAssign,
114 Decrement,
115 Subtract,
116
117 MulAssign,
118 Mul,
119 Power,
120 DivideAssign,
121 ModuloAssign,
122 Modulo,
123
124 Equal,
125 Assign,
126 ColonAssign,
127
128 NotEqual,
129 Not,
130
131 True,
132 False,
133 Else,
134 Anonymous,
135 For,
136 While,
137 If,
138
139 ShiftRight,
140 ShiftRightAssign,
141 Less,
142 LessEqual,
143
144 ShiftLeft,
145 ShiftLeftAssign,
146 More,
147 MoreEqual,
148
149 Constructor,
150 Indexed,
151
152 Member,
153 Colon,
154 OpenBracket,
155 CloseBracket,
156 BitwiseNot,
157 Question,
158
159 Mapping,
160 Arrow,
161
162 Try,
163 Catch,
164
165 Receive,
166 Fallback,
167
168 As,
169 Is,
170 Abstract,
171 Virtual,
172 Override,
173 Using,
174 Modifier,
175 Immutable,
176 Unchecked,
177
178 Assembly,
179 Let,
180 Leave,
181 Switch,
182 Case,
183 Default,
184 YulArrow,
185
186 Persistent,
188 Temporary,
189 Instance,
190
191 Annotation(&'input str),
192}
193
194impl fmt::Display for Token<'_> {
195 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196 match self {
197 Token::Identifier(id) => write!(f, "{id}"),
198 Token::StringLiteral(false, s) => write!(f, "\"{s}\""),
199 Token::StringLiteral(true, s) => write!(f, "unicode\"{s}\""),
200 Token::HexLiteral(hex) => write!(f, "{hex}"),
201 Token::AddressLiteral(address) => write!(f, "{address}"),
202 Token::Number(integer, "") => write!(f, "{integer}"),
203 Token::Number(integer, exp) => write!(f, "{integer}e{exp}"),
204 Token::RationalNumber(integer, fraction, "") => {
205 write!(f, "{integer}.{fraction}")
206 }
207 Token::RationalNumber(integer, fraction, exp) => {
208 write!(f, "{integer}.{fraction}e{exp}")
209 }
210 Token::HexNumber(n) => write!(f, "{n}"),
211 Token::Uint(w) => write!(f, "uint{w}"),
212 Token::Int(w) => write!(f, "int{w}"),
213 Token::Bytes(w) => write!(f, "bytes{w}"),
214 Token::Byte => write!(f, "byte"),
215 Token::DynamicBytes => write!(f, "bytes"),
216 Token::Semicolon => write!(f, ";"),
217 Token::Comma => write!(f, ","),
218 Token::OpenParenthesis => write!(f, "("),
219 Token::CloseParenthesis => write!(f, ")"),
220 Token::OpenCurlyBrace => write!(f, "{{"),
221 Token::CloseCurlyBrace => write!(f, "}}"),
222 Token::BitwiseOr => write!(f, "|"),
223 Token::BitwiseOrAssign => write!(f, "|="),
224 Token::Or => write!(f, "||"),
225 Token::BitwiseXor => write!(f, "^"),
226 Token::BitwiseXorAssign => write!(f, "^="),
227 Token::BitwiseAnd => write!(f, "&"),
228 Token::BitwiseAndAssign => write!(f, "&="),
229 Token::And => write!(f, "&&"),
230 Token::AddAssign => write!(f, "+="),
231 Token::Increment => write!(f, "++"),
232 Token::Add => write!(f, "+"),
233 Token::SubtractAssign => write!(f, "-="),
234 Token::Decrement => write!(f, "--"),
235 Token::Subtract => write!(f, "-"),
236 Token::MulAssign => write!(f, "*="),
237 Token::Mul => write!(f, "*"),
238 Token::Power => write!(f, "**"),
239 Token::Divide => write!(f, "/"),
240 Token::DivideAssign => write!(f, "/="),
241 Token::ModuloAssign => write!(f, "%="),
242 Token::Modulo => write!(f, "%"),
243 Token::Equal => write!(f, "=="),
244 Token::Assign => write!(f, "="),
245 Token::ColonAssign => write!(f, ":="),
246 Token::NotEqual => write!(f, "!="),
247 Token::Not => write!(f, "!"),
248 Token::ShiftLeft => write!(f, "<<"),
249 Token::ShiftLeftAssign => write!(f, "<<="),
250 Token::More => write!(f, ">"),
251 Token::MoreEqual => write!(f, ">="),
252 Token::Member => write!(f, "."),
253 Token::Colon => write!(f, ":"),
254 Token::OpenBracket => write!(f, "["),
255 Token::CloseBracket => write!(f, "]"),
256 Token::BitwiseNot => write!(f, "~"),
257 Token::Question => write!(f, "?"),
258 Token::ShiftRightAssign => write!(f, ">>="),
259 Token::ShiftRight => write!(f, ">>"),
260 Token::Less => write!(f, "<"),
261 Token::LessEqual => write!(f, "<="),
262 Token::Bool => write!(f, "bool"),
263 Token::Address => write!(f, "address"),
264 Token::String => write!(f, "string"),
265 Token::Contract => write!(f, "contract"),
266 Token::Library => write!(f, "library"),
267 Token::Interface => write!(f, "interface"),
268 Token::Function => write!(f, "function"),
269 Token::Pragma => write!(f, "pragma"),
270 Token::Import => write!(f, "import"),
271 Token::Struct => write!(f, "struct"),
272 Token::Event => write!(f, "event"),
273 Token::Enum => write!(f, "enum"),
274 Token::Type => write!(f, "type"),
275 Token::Memory => write!(f, "memory"),
276 Token::Storage => write!(f, "storage"),
277 Token::Calldata => write!(f, "calldata"),
278 Token::Public => write!(f, "public"),
279 Token::Private => write!(f, "private"),
280 Token::Internal => write!(f, "internal"),
281 Token::External => write!(f, "external"),
282 Token::Constant => write!(f, "constant"),
283 Token::New => write!(f, "new"),
284 Token::Delete => write!(f, "delete"),
285 Token::Pure => write!(f, "pure"),
286 Token::View => write!(f, "view"),
287 Token::Payable => write!(f, "payable"),
288 Token::Do => write!(f, "do"),
289 Token::Continue => write!(f, "continue"),
290 Token::Break => write!(f, "break"),
291 Token::Throw => write!(f, "throw"),
292 Token::Emit => write!(f, "emit"),
293 Token::Return => write!(f, "return"),
294 Token::Returns => write!(f, "returns"),
295 Token::Revert => write!(f, "revert"),
296 Token::True => write!(f, "true"),
297 Token::False => write!(f, "false"),
298 Token::Else => write!(f, "else"),
299 Token::Anonymous => write!(f, "anonymous"),
300 Token::For => write!(f, "for"),
301 Token::While => write!(f, "while"),
302 Token::If => write!(f, "if"),
303 Token::Constructor => write!(f, "constructor"),
304 Token::Indexed => write!(f, "indexed"),
305 Token::Mapping => write!(f, "mapping"),
306 Token::Arrow => write!(f, "=>"),
307 Token::Try => write!(f, "try"),
308 Token::Catch => write!(f, "catch"),
309 Token::Receive => write!(f, "receive"),
310 Token::Fallback => write!(f, "fallback"),
311 Token::As => write!(f, "as"),
312 Token::Is => write!(f, "is"),
313 Token::Abstract => write!(f, "abstract"),
314 Token::Virtual => write!(f, "virtual"),
315 Token::Override => write!(f, "override"),
316 Token::Using => write!(f, "using"),
317 Token::Modifier => write!(f, "modifier"),
318 Token::Immutable => write!(f, "immutable"),
319 Token::Unchecked => write!(f, "unchecked"),
320 Token::Assembly => write!(f, "assembly"),
321 Token::Let => write!(f, "let"),
322 Token::Leave => write!(f, "leave"),
323 Token::Switch => write!(f, "switch"),
324 Token::Case => write!(f, "case"),
325 Token::Default => write!(f, "default"),
326 Token::YulArrow => write!(f, "->"),
327 Token::Annotation(name) => write!(f, "@{name}"),
328 Token::Persistent => write!(f, "persistent"),
329 Token::Temporary => write!(f, "temporary"),
330 Token::Instance => write!(f, "instance"),
331 Token::Transient => write!(f, "transient"),
332 Token::Layout => write!(f, "layout"),
333 Token::KwAt => write!(f, "at"),
334 }
335 }
336}
337
338#[derive(Debug)]
361pub struct Lexer<'input> {
362 input: &'input str,
363 chars: PeekNth<CharIndices<'input>>,
364 comments: &'input mut Vec<Comment>,
365 file_no: usize,
366 parse_semver: bool,
368 last_tokens: [Option<Token<'input>>; 2],
369 pub errors: &'input mut Vec<LexicalError>,
371}
372
373#[derive(Debug, Clone, PartialEq, Eq, Error)]
375#[allow(missing_docs)]
376pub enum LexicalError {
377 #[error("end of file found in comment")]
378 EndOfFileInComment(Loc),
379
380 #[error("end of file found in string literal")]
381 EndOfFileInString(Loc),
382
383 #[error("end of file found in hex literal string")]
384 EndofFileInHex(Loc),
385
386 #[error("missing number")]
387 MissingNumber(Loc),
388
389 #[error("invalid character '{1}' in hex literal string")]
390 InvalidCharacterInHexLiteral(Loc, char),
391
392 #[error("unrecognised token '{1}'")]
393 UnrecognisedToken(Loc, String),
394
395 #[error("missing exponent")]
396 MissingExponent(Loc),
397
398 #[error("'{1}' found where 'from' expected")]
399 ExpectedFrom(Loc, String),
400}
401
402pub fn is_keyword(word: &str) -> bool {
404 KEYWORDS.contains_key(word)
405}
406
407static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
408 "address" => Token::Address,
409 "anonymous" => Token::Anonymous,
410 "bool" => Token::Bool,
411 "break" => Token::Break,
412 "bytes1" => Token::Bytes(1),
413 "bytes2" => Token::Bytes(2),
414 "bytes3" => Token::Bytes(3),
415 "bytes4" => Token::Bytes(4),
416 "bytes5" => Token::Bytes(5),
417 "bytes6" => Token::Bytes(6),
418 "bytes7" => Token::Bytes(7),
419 "bytes8" => Token::Bytes(8),
420 "bytes9" => Token::Bytes(9),
421 "bytes10" => Token::Bytes(10),
422 "bytes11" => Token::Bytes(11),
423 "bytes12" => Token::Bytes(12),
424 "bytes13" => Token::Bytes(13),
425 "bytes14" => Token::Bytes(14),
426 "bytes15" => Token::Bytes(15),
427 "bytes16" => Token::Bytes(16),
428 "bytes17" => Token::Bytes(17),
429 "bytes18" => Token::Bytes(18),
430 "bytes19" => Token::Bytes(19),
431 "bytes20" => Token::Bytes(20),
432 "bytes21" => Token::Bytes(21),
433 "bytes22" => Token::Bytes(22),
434 "bytes23" => Token::Bytes(23),
435 "bytes24" => Token::Bytes(24),
436 "bytes25" => Token::Bytes(25),
437 "bytes26" => Token::Bytes(26),
438 "bytes27" => Token::Bytes(27),
439 "bytes28" => Token::Bytes(28),
440 "bytes29" => Token::Bytes(29),
441 "bytes30" => Token::Bytes(30),
442 "bytes31" => Token::Bytes(31),
443 "bytes32" => Token::Bytes(32),
444 "bytes" => Token::DynamicBytes,
445 "byte" => Token::Byte,
446 "calldata" => Token::Calldata,
447 "case" => Token::Case,
448 "constant" => Token::Constant,
449 "constructor" => Token::Constructor,
450 "continue" => Token::Continue,
451 "contract" => Token::Contract,
452 "default" => Token::Default,
453 "delete" => Token::Delete,
454 "do" => Token::Do,
455 "else" => Token::Else,
456 "emit" => Token::Emit,
457 "enum" => Token::Enum,
458 "event" => Token::Event,
459 "external" => Token::External,
460 "false" => Token::False,
461 "for" => Token::For,
462 "function" => Token::Function,
463 "if" => Token::If,
464 "import" => Token::Import,
465 "indexed" => Token::Indexed,
466 "int8" => Token::Int(8),
467 "int16" => Token::Int(16),
468 "int24" => Token::Int(24),
469 "int32" => Token::Int(32),
470 "int40" => Token::Int(40),
471 "int48" => Token::Int(48),
472 "int56" => Token::Int(56),
473 "int64" => Token::Int(64),
474 "int72" => Token::Int(72),
475 "int80" => Token::Int(80),
476 "int88" => Token::Int(88),
477 "int96" => Token::Int(96),
478 "int104" => Token::Int(104),
479 "int112" => Token::Int(112),
480 "int120" => Token::Int(120),
481 "int128" => Token::Int(128),
482 "int136" => Token::Int(136),
483 "int144" => Token::Int(144),
484 "int152" => Token::Int(152),
485 "int160" => Token::Int(160),
486 "int168" => Token::Int(168),
487 "int176" => Token::Int(176),
488 "int184" => Token::Int(184),
489 "int192" => Token::Int(192),
490 "int200" => Token::Int(200),
491 "int208" => Token::Int(208),
492 "int216" => Token::Int(216),
493 "int224" => Token::Int(224),
494 "int232" => Token::Int(232),
495 "int240" => Token::Int(240),
496 "int248" => Token::Int(248),
497 "int256" => Token::Int(256),
498 "interface" => Token::Interface,
499 "internal" => Token::Internal,
500 "int" => Token::Int(256),
501 "leave" => Token::Leave,
502 "library" => Token::Library,
503 "mapping" => Token::Mapping,
504 "memory" => Token::Memory,
505 "new" => Token::New,
506 "payable" => Token::Payable,
507 "pragma" => Token::Pragma,
508 "private" => Token::Private,
509 "public" => Token::Public,
510 "pure" => Token::Pure,
511 "returns" => Token::Returns,
512 "return" => Token::Return,
513 "revert" => Token::Revert,
514 "storage" => Token::Storage,
515 "string" => Token::String,
516 "struct" => Token::Struct,
517 "switch" => Token::Switch,
518 "throw" => Token::Throw,
519 "true" => Token::True,
520 "type" => Token::Type,
521 "uint8" => Token::Uint(8),
522 "uint16" => Token::Uint(16),
523 "uint24" => Token::Uint(24),
524 "uint32" => Token::Uint(32),
525 "uint40" => Token::Uint(40),
526 "uint48" => Token::Uint(48),
527 "uint56" => Token::Uint(56),
528 "uint64" => Token::Uint(64),
529 "uint72" => Token::Uint(72),
530 "uint80" => Token::Uint(80),
531 "uint88" => Token::Uint(88),
532 "uint96" => Token::Uint(96),
533 "uint104" => Token::Uint(104),
534 "uint112" => Token::Uint(112),
535 "uint120" => Token::Uint(120),
536 "uint128" => Token::Uint(128),
537 "uint136" => Token::Uint(136),
538 "uint144" => Token::Uint(144),
539 "uint152" => Token::Uint(152),
540 "uint160" => Token::Uint(160),
541 "uint168" => Token::Uint(168),
542 "uint176" => Token::Uint(176),
543 "uint184" => Token::Uint(184),
544 "uint192" => Token::Uint(192),
545 "uint200" => Token::Uint(200),
546 "uint208" => Token::Uint(208),
547 "uint216" => Token::Uint(216),
548 "uint224" => Token::Uint(224),
549 "uint232" => Token::Uint(232),
550 "uint240" => Token::Uint(240),
551 "uint248" => Token::Uint(248),
552 "uint256" => Token::Uint(256),
553 "uint" => Token::Uint(256),
554 "view" => Token::View,
555 "while" => Token::While,
556 "try" => Token::Try,
557 "catch" => Token::Catch,
558 "receive" => Token::Receive,
559 "fallback" => Token::Fallback,
560 "as" => Token::As,
561 "is" => Token::Is,
562 "layout" => Token::Layout,
563 "at" => Token::KwAt,
564 "abstract" => Token::Abstract,
565 "virtual" => Token::Virtual,
566 "override" => Token::Override,
567 "using" => Token::Using,
568 "modifier" => Token::Modifier,
569 "immutable" => Token::Immutable,
570 "unchecked" => Token::Unchecked,
571 "assembly" => Token::Assembly,
572 "let" => Token::Let,
573 "transient" => Token::Transient,
574};
575
576impl<'input> Lexer<'input> {
577 pub fn new(
590 input: &'input str,
591 file_no: usize,
592 comments: &'input mut Vec<Comment>,
593 errors: &'input mut Vec<LexicalError>,
594 ) -> Self {
595 Lexer {
596 input,
597 chars: peek_nth(input.char_indices()),
598 comments,
599 file_no,
600 parse_semver: false,
601 last_tokens: [None, None],
602 errors,
603 }
604 }
605
606 fn parse_number(&mut self, mut start: usize, ch: char) -> Result<'input> {
607 let mut is_rational = false;
608 if ch == '0' {
609 if let Some((_, 'x')) = self.chars.peek() {
610 self.chars.next();
612
613 let mut end = match self.chars.next() {
614 Some((end, ch)) if ch.is_ascii_hexdigit() => end,
615 Some((..)) => {
616 return Err(LexicalError::MissingNumber(Loc::File(
617 self.file_no,
618 start,
619 start + 1,
620 )));
621 }
622 None => {
623 return Err(LexicalError::EndofFileInHex(Loc::File(
624 self.file_no,
625 start,
626 self.input.len(),
627 )));
628 }
629 };
630
631 while let Some((i, ch)) = self.chars.peek() {
632 if !ch.is_ascii_hexdigit() && *ch != '_' {
633 break;
634 }
635 end = *i;
636 self.chars.next();
637 }
638
639 return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
640 }
641 }
642
643 if ch == '.' {
644 is_rational = true;
645 start -= 1;
646 }
647
648 let mut end = start;
649 while let Some((i, ch)) = self.chars.peek() {
650 if !ch.is_ascii_digit() && *ch != '_' {
651 break;
652 }
653 end = *i;
654 self.chars.next();
655 }
656
657 if self.parse_semver {
658 let integer = &self.input[start..=end];
659 let exp = &self.input[0..0];
660
661 return Ok((start, Token::Number(integer, exp), end + 1));
662 }
663
664 let mut rational_end = end;
665 let mut end_before_rational = end + 1;
666 let mut rational_start = end;
667 if is_rational {
668 end_before_rational = start;
669 rational_start = start + 1;
670 }
671
672 if let Some((_, '.')) = self.chars.peek() {
673 if let Some((i, ch)) = self.chars.peek_nth(1) {
674 if ch.is_ascii_digit() && !is_rational {
675 rational_start = *i;
676 rational_end = *i;
677 is_rational = true;
678 self.chars.next(); while let Some((i, ch)) = self.chars.peek() {
680 if !ch.is_ascii_digit() && *ch != '_' {
681 break;
682 }
683 rational_end = *i;
684 end = *i;
685 self.chars.next();
686 }
687 }
688 }
689 }
690
691 let old_end = end;
692 let mut exp_start = end + 1;
693
694 if let Some((i, 'e' | 'E')) = self.chars.peek() {
695 exp_start = *i + 1;
696 self.chars.next();
697 while matches!(self.chars.peek(), Some((_, '-'))) {
699 self.chars.next();
700 }
701 while let Some((i, ch)) = self.chars.peek() {
702 if !ch.is_ascii_digit() && *ch != '_' {
703 break;
704 }
705 end = *i;
706 self.chars.next();
707 }
708
709 if exp_start > end {
710 return Err(LexicalError::MissingExponent(Loc::File(
711 self.file_no,
712 start,
713 self.input.len(),
714 )));
715 }
716 }
717
718 if is_rational {
719 let integer = &self.input[start..end_before_rational];
720 let fraction = &self.input[rational_start..=rational_end];
721 let exp = &self.input[exp_start..=end];
722
723 return Ok((
724 start,
725 Token::RationalNumber(integer, fraction, exp),
726 end + 1,
727 ));
728 }
729
730 let integer = &self.input[start..=old_end];
731 let exp = &self.input[exp_start..=end];
732
733 Ok((start, Token::Number(integer, exp), end + 1))
734 }
735
736 fn string(
737 &mut self,
738 unicode: bool,
739 token_start: usize,
740 string_start: usize,
741 quote_char: char,
742 ) -> Result<'input> {
743 let mut end;
744
745 let mut last_was_escape = false;
746
747 loop {
748 if let Some((i, ch)) = self.chars.next() {
749 end = i;
750 if !last_was_escape {
751 if ch == quote_char {
752 break;
753 }
754 last_was_escape = ch == '\\';
755 } else {
756 last_was_escape = false;
757 }
758 } else {
759 return Err(LexicalError::EndOfFileInString(Loc::File(
760 self.file_no,
761 token_start,
762 self.input.len(),
763 )));
764 }
765 }
766
767 Ok((
768 token_start,
769 Token::StringLiteral(unicode, &self.input[string_start..end]),
770 end + 1,
771 ))
772 }
773
774 fn next(&mut self) -> Option<Spanned<'input>> {
775 'toplevel: loop {
776 match self.chars.next() {
777 Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
778 let (id, end) = self.match_identifier(start);
779
780 if id == "unicode" {
781 match self.chars.peek() {
782 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
783 let quote_char = *quote_char;
784
785 self.chars.next();
786 let str_res = self.string(true, start, start + 8, quote_char);
787 match str_res {
788 Err(lex_err) => self.errors.push(lex_err),
789 Ok(val) => return Some(val),
790 }
791 }
792 _ => (),
793 }
794 }
795
796 if id == "hex" {
797 match self.chars.peek() {
798 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
799 let quote_char = *quote_char;
800
801 self.chars.next();
802
803 for (i, ch) in &mut self.chars {
804 if ch == quote_char {
805 return Some((
806 start,
807 Token::HexLiteral(&self.input[start..=i]),
808 i + 1,
809 ));
810 }
811
812 if !ch.is_ascii_hexdigit() && ch != '_' {
813 for (_, ch) in &mut self.chars {
815 if ch == quote_char {
816 break;
817 }
818 }
819
820 self.errors.push(
821 LexicalError::InvalidCharacterInHexLiteral(
822 Loc::File(self.file_no, i, i + 1),
823 ch,
824 ),
825 );
826 continue 'toplevel;
827 }
828 }
829
830 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
831 self.file_no,
832 start,
833 self.input.len(),
834 )));
835 return None;
836 }
837 _ => (),
838 }
839 }
840
841 if id == "address" {
842 match self.chars.peek() {
843 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
844 let quote_char = *quote_char;
845
846 self.chars.next();
847
848 for (i, ch) in &mut self.chars {
849 if ch == quote_char {
850 return Some((
851 start,
852 Token::AddressLiteral(&self.input[start..=i]),
853 i + 1,
854 ));
855 }
856 }
857
858 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
859 self.file_no,
860 start,
861 self.input.len(),
862 )));
863 return None;
864 }
865 _ => (),
866 }
867 }
868
869 return if let Some(w) = KEYWORDS.get(id) {
870 Some((start, *w, end))
871 } else {
872 Some((start, Token::Identifier(id), end))
873 };
874 }
875 Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
876 let str_res = self.string(false, start, start + 1, quote_char);
877 match str_res {
878 Err(lex_err) => self.errors.push(lex_err),
879 Ok(val) => return Some(val),
880 }
881 }
882 Some((start, '/')) => {
883 match self.chars.peek() {
884 Some((_, '=')) => {
885 self.chars.next();
886 return Some((start, Token::DivideAssign, start + 2));
887 }
888 Some((_, '/')) => {
889 self.chars.next();
891
892 let mut newline = false;
893
894 let doc_comment = match self.chars.next() {
895 Some((_, '/')) => {
896 !matches!(self.chars.peek(), Some((_, '/')))
898 }
899 Some((_, ch)) if ch == '\n' || ch == '\r' => {
900 newline = true;
901 false
902 }
903 _ => false,
904 };
905
906 let mut last = start + 3;
907
908 if !newline {
909 loop {
910 match self.chars.next() {
911 None => {
912 last = self.input.len();
913 break;
914 }
915 Some((offset, '\n' | '\r')) => {
916 last = offset;
917 break;
918 }
919 Some(_) => (),
920 }
921 }
922 }
923
924 if doc_comment {
925 self.comments.push(Comment::DocLine(
926 Loc::File(self.file_no, start, last),
927 self.input[start..last].to_owned(),
928 ));
929 } else {
930 self.comments.push(Comment::Line(
931 Loc::File(self.file_no, start, last),
932 self.input[start..last].to_owned(),
933 ));
934 }
935 }
936 Some((_, '*')) => {
937 self.chars.next();
939
940 let doc_comment_start = matches!(self.chars.peek(), Some((_, '*')));
941
942 let mut last = start + 3;
943 let mut seen_star = false;
944
945 loop {
946 if let Some((i, ch)) = self.chars.next() {
947 if seen_star && ch == '/' {
948 break;
949 }
950 seen_star = ch == '*';
951 last = i;
952 } else {
953 self.errors.push(LexicalError::EndOfFileInComment(Loc::File(
954 self.file_no,
955 start,
956 self.input.len(),
957 )));
958 return None;
959 }
960 }
961
962 if doc_comment_start && last > start + 2 {
964 self.comments.push(Comment::DocBlock(
965 Loc::File(self.file_no, start, last + 2),
966 self.input[start..last + 2].to_owned(),
967 ));
968 } else {
969 self.comments.push(Comment::Block(
970 Loc::File(self.file_no, start, last + 2),
971 self.input[start..last + 2].to_owned(),
972 ));
973 }
974 }
975 _ => {
976 return Some((start, Token::Divide, start + 1));
977 }
978 }
979 }
980 Some((start, ch)) if ch.is_ascii_digit() => {
981 let parse_result = self.parse_number(start, ch);
982 match parse_result {
983 Err(lex_err) => {
984 self.errors.push(lex_err.clone());
985 if matches!(lex_err, LexicalError::EndofFileInHex(_)) {
986 return None;
987 }
988 }
989 Ok(parse_result) => return Some(parse_result),
990 }
991 }
992 Some((start, '@')) => {
993 let (id, end) = self.match_identifier(start);
994 if id.len() == 1 {
995 self.errors.push(LexicalError::UnrecognisedToken(
996 Loc::File(self.file_no, start, start + 1),
997 id.to_owned(),
998 ));
999 } else {
1000 return Some((start, Token::Annotation(&id[1..]), end));
1001 };
1002 }
1003 Some((i, ';')) => {
1004 self.parse_semver = false;
1005 return Some((i, Token::Semicolon, i + 1));
1006 }
1007 Some((i, ',')) => return Some((i, Token::Comma, i + 1)),
1008 Some((i, '(')) => return Some((i, Token::OpenParenthesis, i + 1)),
1009 Some((i, ')')) => return Some((i, Token::CloseParenthesis, i + 1)),
1010 Some((i, '{')) => return Some((i, Token::OpenCurlyBrace, i + 1)),
1011 Some((i, '}')) => return Some((i, Token::CloseCurlyBrace, i + 1)),
1012 Some((i, '~')) => return Some((i, Token::BitwiseNot, i + 1)),
1013 Some((i, '=')) => {
1014 return match self.chars.peek() {
1015 Some((_, '=')) => {
1016 self.chars.next();
1017 Some((i, Token::Equal, i + 2))
1018 }
1019 Some((_, '>')) => {
1020 self.chars.next();
1021 Some((i, Token::Arrow, i + 2))
1022 }
1023 _ => Some((i, Token::Assign, i + 1)),
1024 }
1025 }
1026 Some((i, '!')) => {
1027 return if let Some((_, '=')) = self.chars.peek() {
1028 self.chars.next();
1029 Some((i, Token::NotEqual, i + 2))
1030 } else {
1031 Some((i, Token::Not, i + 1))
1032 }
1033 }
1034 Some((i, '|')) => {
1035 return match self.chars.peek() {
1036 Some((_, '=')) => {
1037 self.chars.next();
1038 Some((i, Token::BitwiseOrAssign, i + 2))
1039 }
1040 Some((_, '|')) => {
1041 self.chars.next();
1042 Some((i, Token::Or, i + 2))
1043 }
1044 _ => Some((i, Token::BitwiseOr, i + 1)),
1045 };
1046 }
1047 Some((i, '&')) => {
1048 return match self.chars.peek() {
1049 Some((_, '=')) => {
1050 self.chars.next();
1051 Some((i, Token::BitwiseAndAssign, i + 2))
1052 }
1053 Some((_, '&')) => {
1054 self.chars.next();
1055 Some((i, Token::And, i + 2))
1056 }
1057 _ => Some((i, Token::BitwiseAnd, i + 1)),
1058 };
1059 }
1060 Some((i, '^')) => {
1061 return match self.chars.peek() {
1062 Some((_, '=')) => {
1063 self.chars.next();
1064 Some((i, Token::BitwiseXorAssign, i + 2))
1065 }
1066 _ => Some((i, Token::BitwiseXor, i + 1)),
1067 };
1068 }
1069 Some((i, '+')) => {
1070 return match self.chars.peek() {
1071 Some((_, '=')) => {
1072 self.chars.next();
1073 Some((i, Token::AddAssign, i + 2))
1074 }
1075 Some((_, '+')) => {
1076 self.chars.next();
1077 Some((i, Token::Increment, i + 2))
1078 }
1079 _ => Some((i, Token::Add, i + 1)),
1080 };
1081 }
1082 Some((i, '-')) => {
1083 return match self.chars.peek() {
1084 Some((_, '=')) => {
1085 self.chars.next();
1086 Some((i, Token::SubtractAssign, i + 2))
1087 }
1088 Some((_, '-')) => {
1089 self.chars.next();
1090 Some((i, Token::Decrement, i + 2))
1091 }
1092 Some((_, '>')) => {
1093 self.chars.next();
1094 Some((i, Token::YulArrow, i + 2))
1095 }
1096 _ => Some((i, Token::Subtract, i + 1)),
1097 };
1098 }
1099 Some((i, '*')) => {
1100 return match self.chars.peek() {
1101 Some((_, '=')) => {
1102 self.chars.next();
1103 Some((i, Token::MulAssign, i + 2))
1104 }
1105 Some((_, '*')) => {
1106 self.chars.next();
1107 Some((i, Token::Power, i + 2))
1108 }
1109 _ => Some((i, Token::Mul, i + 1)),
1110 };
1111 }
1112 Some((i, '%')) => {
1113 return match self.chars.peek() {
1114 Some((_, '=')) => {
1115 self.chars.next();
1116 Some((i, Token::ModuloAssign, i + 2))
1117 }
1118 _ => Some((i, Token::Modulo, i + 1)),
1119 };
1120 }
1121 Some((i, '<')) => {
1122 return match self.chars.peek() {
1123 Some((_, '<')) => {
1124 self.chars.next();
1125 if let Some((_, '=')) = self.chars.peek() {
1126 self.chars.next();
1127 Some((i, Token::ShiftLeftAssign, i + 3))
1128 } else {
1129 Some((i, Token::ShiftLeft, i + 2))
1130 }
1131 }
1132 Some((_, '=')) => {
1133 self.chars.next();
1134 Some((i, Token::LessEqual, i + 2))
1135 }
1136 _ => Some((i, Token::Less, i + 1)),
1137 };
1138 }
1139 Some((i, '>')) => {
1140 return match self.chars.peek() {
1141 Some((_, '>')) => {
1142 self.chars.next();
1143 if let Some((_, '=')) = self.chars.peek() {
1144 self.chars.next();
1145 Some((i, Token::ShiftRightAssign, i + 3))
1146 } else {
1147 Some((i, Token::ShiftRight, i + 2))
1148 }
1149 }
1150 Some((_, '=')) => {
1151 self.chars.next();
1152 Some((i, Token::MoreEqual, i + 2))
1153 }
1154 _ => Some((i, Token::More, i + 1)),
1155 };
1156 }
1157 Some((i, '.')) => {
1158 if let Some((_, a)) = self.chars.peek() {
1159 if a.is_ascii_digit() && !self.parse_semver {
1160 return match self.parse_number(i + 1, '.') {
1161 Err(lex_error) => {
1162 self.errors.push(lex_error);
1163 None
1164 }
1165 Ok(parse_result) => Some(parse_result),
1166 };
1167 }
1168 }
1169 return Some((i, Token::Member, i + 1));
1170 }
1171 Some((i, '[')) => return Some((i, Token::OpenBracket, i + 1)),
1172 Some((i, ']')) => return Some((i, Token::CloseBracket, i + 1)),
1173 Some((i, ':')) => {
1174 return match self.chars.peek() {
1175 Some((_, '=')) => {
1176 self.chars.next();
1177 Some((i, Token::ColonAssign, i + 2))
1178 }
1179 _ => Some((i, Token::Colon, i + 1)),
1180 };
1181 }
1182 Some((i, '?')) => return Some((i, Token::Question, i + 1)),
1183 Some((_, ch)) if ch.is_whitespace() => (),
1184 Some((start, _)) => {
1185 let mut end;
1186
1187 loop {
1188 if let Some((i, ch)) = self.chars.next() {
1189 end = i;
1190
1191 if ch.is_whitespace() {
1192 break;
1193 }
1194 } else {
1195 end = self.input.len();
1196 break;
1197 }
1198 }
1199
1200 self.errors.push(LexicalError::UnrecognisedToken(
1201 Loc::File(self.file_no, start, end),
1202 self.input[start..end].to_owned(),
1203 ));
1204 }
1205 None => return None, }
1207 }
1208 }
1209
1210 fn match_identifier(&mut self, start: usize) -> (&'input str, usize) {
1211 let end;
1212 loop {
1213 if let Some((i, ch)) = self.chars.peek() {
1214 if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
1215 end = *i;
1216 break;
1217 }
1218 self.chars.next();
1219 } else {
1220 end = self.input.len();
1221 break;
1222 }
1223 }
1224
1225 (&self.input[start..end], end)
1226 }
1227}
1228
1229impl<'input> Iterator for Lexer<'input> {
1230 type Item = Spanned<'input>;
1231
1232 fn next(&mut self) -> Option<Self::Item> {
1233 if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
1237 self.parse_semver = true;
1238 }
1239
1240 let token = self.next();
1241
1242 self.last_tokens = [
1243 self.last_tokens[1],
1244 match token {
1245 Some((_, n, _)) => Some(n),
1246 _ => None,
1247 },
1248 ];
1249
1250 token
1251 }
1252}
1253
1254#[cfg(test)]
1255mod tests {
1256 use super::*;
1257
1258 #[test]
1259 fn test_lexer() {
1260 let mut comments = Vec::new();
1261 let mut errors = Vec::new();
1262
1263 let multiple_errors = r#" 9ea -9e € bool hex uint8 hex"g" /** "#;
1264 let tokens = Lexer::new(multiple_errors, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1265 assert_eq!(
1266 tokens,
1267 vec![
1268 (3, Token::Identifier("a"), 4),
1269 (5, Token::Subtract, 6),
1270 (13, Token::Bool, 17),
1271 (18, Token::Identifier("hex"), 21),
1272 (22, Token::Uint(8), 27),
1273 ]
1274 );
1275
1276 assert_eq!(
1277 errors,
1278 vec![
1279 LexicalError::MissingExponent(Loc::File(0, 1, 42)),
1280 LexicalError::MissingExponent(Loc::File(0, 6, 42)),
1281 LexicalError::UnrecognisedToken(Loc::File(0, 9, 12), '€'.to_string()),
1282 LexicalError::InvalidCharacterInHexLiteral(Loc::File(0, 32, 33), 'g'),
1283 LexicalError::EndOfFileInComment(Loc::File(0, 37, 42)),
1284 ]
1285 );
1286
1287 let mut errors = Vec::new();
1288 let tokens = Lexer::new("bool", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1289
1290 assert_eq!(tokens, vec!((0, Token::Bool, 4)));
1291
1292 let tokens = Lexer::new("uint8", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1293
1294 assert_eq!(tokens, vec!((0, Token::Uint(8), 5)));
1295
1296 let tokens = Lexer::new("hex", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1297
1298 assert_eq!(tokens, vec!((0, Token::Identifier("hex"), 3)));
1299
1300 let tokens = Lexer::new(
1301 "hex\"cafe_dead\" /* adad*** */",
1302 0,
1303 &mut comments,
1304 &mut errors,
1305 )
1306 .collect::<Vec<_>>();
1307
1308 assert_eq!(tokens, vec!((0, Token::HexLiteral("hex\"cafe_dead\""), 14)));
1309
1310 let tokens = Lexer::new(
1311 "// foo bar\n0x00fead0_12 00090 0_0",
1312 0,
1313 &mut comments,
1314 &mut errors,
1315 )
1316 .collect::<Vec<_>>();
1317
1318 assert_eq!(
1319 tokens,
1320 vec!(
1321 (11, Token::HexNumber("0x00fead0_12"), 23),
1322 (24, Token::Number("00090", ""), 29),
1323 (30, Token::Number("0_0", ""), 33)
1324 )
1325 );
1326
1327 let tokens = Lexer::new(
1328 "// foo bar\n0x00fead0_12 9.0008 0_0",
1329 0,
1330 &mut comments,
1331 &mut errors,
1332 )
1333 .collect::<Vec<_>>();
1334
1335 assert_eq!(
1336 tokens,
1337 vec!(
1338 (11, Token::HexNumber("0x00fead0_12"), 23),
1339 (24, Token::RationalNumber("9", "0008", ""), 30),
1340 (31, Token::Number("0_0", ""), 34)
1341 )
1342 );
1343
1344 let tokens = Lexer::new(
1345 "// foo bar\n0x00fead0_12 .0008 0.9e2",
1346 0,
1347 &mut comments,
1348 &mut errors,
1349 )
1350 .collect::<Vec<_>>();
1351
1352 assert_eq!(
1353 tokens,
1354 vec!(
1355 (11, Token::HexNumber("0x00fead0_12"), 23),
1356 (24, Token::RationalNumber("", "0008", ""), 29),
1357 (30, Token::RationalNumber("0", "9", "2"), 35)
1358 )
1359 );
1360
1361 let tokens = Lexer::new(
1362 "// foo bar\n0x00fead0_12 .0008 0.9e-2-2",
1363 0,
1364 &mut comments,
1365 &mut errors,
1366 )
1367 .collect::<Vec<_>>();
1368
1369 assert_eq!(
1370 tokens,
1371 vec!(
1372 (11, Token::HexNumber("0x00fead0_12"), 23),
1373 (24, Token::RationalNumber("", "0008", ""), 29),
1374 (30, Token::RationalNumber("0", "9", "-2"), 36),
1375 (36, Token::Subtract, 37),
1376 (37, Token::Number("2", ""), 38)
1377 )
1378 );
1379
1380 let tokens = Lexer::new("1.2_3e2-", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1381
1382 assert_eq!(
1383 tokens,
1384 vec!(
1385 (0, Token::RationalNumber("1", "2_3", "2"), 7),
1386 (7, Token::Subtract, 8)
1387 )
1388 );
1389
1390 let tokens = Lexer::new("\"foo\"", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1391
1392 assert_eq!(tokens, vec!((0, Token::StringLiteral(false, "foo"), 5)));
1393
1394 let tokens = Lexer::new(
1395 "pragma solidity >=0.5.0 <0.7.0;",
1396 0,
1397 &mut comments,
1398 &mut errors,
1399 )
1400 .collect::<Vec<_>>();
1401
1402 assert_eq!(
1403 tokens,
1404 vec!(
1405 (0, Token::Pragma, 6),
1406 (7, Token::Identifier("solidity"), 15),
1407 (16, Token::MoreEqual, 18),
1408 (18, Token::Number("0", ""), 19),
1409 (19, Token::Member, 20),
1410 (20, Token::Number("5", ""), 21),
1411 (21, Token::Member, 22),
1412 (22, Token::Number("0", ""), 23),
1413 (24, Token::Less, 25),
1414 (25, Token::Number("0", ""), 26),
1415 (26, Token::Member, 27),
1416 (27, Token::Number("7", ""), 28),
1417 (28, Token::Member, 29),
1418 (29, Token::Number("0", ""), 30),
1419 (30, Token::Semicolon, 31),
1420 )
1421 );
1422
1423 let tokens = Lexer::new(
1424 "pragma solidity \t>=0.5.0 <0.7.0 \n ;",
1425 0,
1426 &mut comments,
1427 &mut errors,
1428 )
1429 .collect::<Vec<_>>();
1430
1431 assert_eq!(
1432 tokens,
1433 vec!(
1434 (0, Token::Pragma, 6),
1435 (7, Token::Identifier("solidity"), 15),
1436 (17, Token::MoreEqual, 19),
1437 (19, Token::Number("0", ""), 20),
1438 (20, Token::Member, 21),
1439 (21, Token::Number("5", ""), 22),
1440 (22, Token::Member, 23),
1441 (23, Token::Number("0", ""), 24),
1442 (25, Token::Less, 26),
1443 (26, Token::Number("0", ""), 27),
1444 (27, Token::Member, 28),
1445 (28, Token::Number("7", ""), 29),
1446 (29, Token::Member, 30),
1447 (30, Token::Number("0", ""), 31),
1448 (34, Token::Semicolon, 35),
1449 )
1450 );
1451
1452 let tokens =
1453 Lexer::new("pragma solidity 赤;", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1454
1455 assert_eq!(
1456 tokens,
1457 vec!(
1458 (0, Token::Pragma, 6),
1459 (7, Token::Identifier("solidity"), 15),
1460 (16, Token::Identifier("赤"), 19),
1461 (19, Token::Semicolon, 20)
1462 )
1463 );
1464
1465 let tokens = Lexer::new(">>= >> >= >", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1466
1467 assert_eq!(
1468 tokens,
1469 vec!(
1470 (0, Token::ShiftRightAssign, 3),
1471 (4, Token::ShiftRight, 6),
1472 (7, Token::MoreEqual, 9),
1473 (10, Token::More, 11),
1474 )
1475 );
1476
1477 let tokens = Lexer::new("<<= << <= <", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1478
1479 assert_eq!(
1480 tokens,
1481 vec!(
1482 (0, Token::ShiftLeftAssign, 3),
1483 (4, Token::ShiftLeft, 6),
1484 (7, Token::LessEqual, 9),
1485 (10, Token::Less, 11),
1486 )
1487 );
1488
1489 let tokens = Lexer::new("-16 -- - -=", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1490
1491 assert_eq!(
1492 tokens,
1493 vec!(
1494 (0, Token::Subtract, 1),
1495 (1, Token::Number("16", ""), 3),
1496 (4, Token::Decrement, 6),
1497 (7, Token::Subtract, 8),
1498 (9, Token::SubtractAssign, 11),
1499 )
1500 );
1501
1502 let tokens = Lexer::new("-4 ", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1503
1504 assert_eq!(
1505 tokens,
1506 vec!((0, Token::Subtract, 1), (1, Token::Number("4", ""), 2),)
1507 );
1508
1509 let mut errors = Vec::new();
1510 let _ = Lexer::new(r#"hex"abcdefg""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1511
1512 assert_eq!(
1513 errors,
1514 vec![LexicalError::InvalidCharacterInHexLiteral(
1515 Loc::File(0, 10, 11),
1516 'g'
1517 )]
1518 );
1519
1520 let mut errors = Vec::new();
1521 let _ = Lexer::new(r#" € "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1522
1523 assert_eq!(
1524 errors,
1525 vec!(LexicalError::UnrecognisedToken(
1526 Loc::File(0, 1, 4),
1527 "€".to_owned()
1528 ))
1529 );
1530
1531 let mut errors = Vec::new();
1532 let _ = Lexer::new(r#"€"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1533
1534 assert_eq!(
1535 errors,
1536 vec!(LexicalError::UnrecognisedToken(
1537 Loc::File(0, 0, 3),
1538 "€".to_owned()
1539 ))
1540 );
1541
1542 let tokens =
1543 Lexer::new(r#"pragma foo bar"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1544
1545 assert_eq!(
1546 tokens,
1547 vec!(
1548 (0, Token::Pragma, 6),
1549 (7, Token::Identifier("foo"), 10),
1550 (11, Token::Identifier("bar"), 14),
1551 )
1552 );
1553
1554 comments.truncate(0);
1555
1556 let tokens = Lexer::new(r#"/// foo"#, 0, &mut comments, &mut errors).count();
1557
1558 assert_eq!(tokens, 0);
1559 assert_eq!(
1560 comments,
1561 vec![Comment::DocLine(Loc::File(0, 0, 7), "/// foo".to_owned())],
1562 );
1563
1564 comments.truncate(0);
1565
1566 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1567
1568 assert_eq!(tokens, 0);
1569 assert_eq!(
1570 comments,
1571 vec!(
1572 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1573 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1574 )
1575 );
1576
1577 comments.truncate(0);
1578
1579 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1580
1581 assert_eq!(tokens, 0);
1582 assert_eq!(
1583 comments,
1584 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1585 );
1586
1587 comments.truncate(0);
1588
1589 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1590
1591 assert_eq!(tokens, 0);
1592 assert_eq!(
1593 comments,
1594 vec!(Comment::DocBlock(
1595 Loc::File(0, 0, 10),
1596 "/** foo */".to_owned()
1597 ))
1598 );
1599
1600 comments.truncate(0);
1601
1602 let tokens = Lexer::new(
1603 "/** jadajadadjada */\n/* bar */",
1604 0,
1605 &mut comments,
1606 &mut errors,
1607 )
1608 .count();
1609
1610 assert_eq!(tokens, 0);
1611 assert_eq!(
1612 comments,
1613 vec!(
1614 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1615 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1616 )
1617 );
1618
1619 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1620 assert_eq!(tokens, None);
1621
1622 let mut errors = Vec::new();
1623 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1624 assert_eq!(
1625 errors,
1626 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1627 );
1628
1629 let mut errors = Vec::new();
1630 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1631 assert_eq!(tokens, None);
1632
1633 let tokens = Lexer::new(
1635 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1636 0,
1637 &mut comments,
1638 &mut errors,
1639 )
1640 .collect::<Vec<_>>();
1641
1642 assert_eq!(
1643 tokens,
1644 vec!(
1645 (0, Token::MoreEqual, 2),
1646 (5, Token::Member, 6),
1647 (7, Token::Identifier("très"), 12),
1648 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1649 (65, Token::Identifier("カラス"), 74)
1650 )
1651 );
1652
1653 let tokens = Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1654
1655 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1656
1657 let tokens =
1658 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1659
1660 assert_eq!(
1661 tokens,
1662 vec!(
1663 (0, Token::Identifier("unicode"), 7),
1664 (8, Token::StringLiteral(false, "€"), 13),
1665 )
1666 );
1667
1668 let tokens = Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1670
1671 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1672
1673 let tokens = Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1674
1675 assert_eq!(
1676 tokens,
1677 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1678 );
1679
1680 let mut errors = Vec::new();
1681 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1682
1683 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1684 assert_eq!(
1685 errors,
1686 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1687 );
1688
1689 let mut errors = Vec::new();
1690 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1691
1692 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1693 assert_eq!(
1694 errors,
1695 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1696 );
1697
1698 let mut errors = Vec::new();
1699 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1700
1701 assert_eq!(
1702 tokens,
1703 vec!(
1704 (0, Token::Number("42", ""), 2),
1705 (2, Token::Member, 3),
1706 (3, Token::Identifier("a"), 4)
1707 )
1708 );
1709
1710 let tokens = Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1711
1712 assert_eq!(
1713 tokens,
1714 vec!(
1715 (0, Token::Number("42", ""), 2),
1716 (2, Token::Member, 3),
1717 (3, Token::Member, 4),
1718 (4, Token::Identifier("a"), 5)
1719 )
1720 );
1721
1722 comments.truncate(0);
1723
1724 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1725
1726 assert_eq!(tokens, 0);
1727 assert_eq!(
1728 comments,
1729 vec!(
1730 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1731 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1732 )
1733 );
1734
1735 comments.truncate(0);
1736
1737 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1738
1739 assert_eq!(tokens, 0);
1740 assert_eq!(
1741 comments,
1742 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1743 );
1744
1745 comments.truncate(0);
1746
1747 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1748
1749 assert_eq!(tokens, 0);
1750 assert_eq!(
1751 comments,
1752 vec!(Comment::DocBlock(
1753 Loc::File(0, 0, 10),
1754 "/** foo */".to_owned()
1755 ))
1756 );
1757
1758 comments.truncate(0);
1759
1760 let tokens = Lexer::new(
1761 "/** jadajadadjada */\n/* bar */",
1762 0,
1763 &mut comments,
1764 &mut errors,
1765 )
1766 .count();
1767
1768 assert_eq!(tokens, 0);
1769 assert_eq!(
1770 comments,
1771 vec!(
1772 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1773 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1774 )
1775 );
1776
1777 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1778 assert_eq!(tokens, None);
1779
1780 let mut errors = Vec::new();
1781 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1782 assert_eq!(
1783 errors,
1784 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1785 );
1786
1787 let mut errors = Vec::new();
1788 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1789 assert_eq!(tokens, None);
1790
1791 let tokens = Lexer::new(
1793 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1794 0,
1795 &mut comments,
1796 &mut errors,
1797 )
1798 .collect::<Vec<(usize, Token, usize)>>();
1799
1800 assert_eq!(
1801 tokens,
1802 vec!(
1803 (0, Token::MoreEqual, 2),
1804 (5, Token::Member, 6),
1805 (7, Token::Identifier("très"), 12),
1806 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1807 (65, Token::Identifier("カラス"), 74)
1808 )
1809 );
1810
1811 let tokens =
1812 Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors)
1813 .collect::<Vec<(usize, Token, usize)>>();
1814
1815 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1816
1817 let tokens =
1818 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors)
1819 .collect::<Vec<(usize, Token, usize)>>();
1820
1821 assert_eq!(
1822 tokens,
1823 vec!(
1824 (0, Token::Identifier("unicode"), 7),
1825 (8, Token::StringLiteral(false, "€"), 13),
1826 )
1827 );
1828
1829 let tokens =
1831 Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors)
1832 .collect::<Vec<(usize, Token, usize)>>();
1833
1834 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1835
1836 let tokens =
1837 Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors)
1838 .collect::<Vec<(usize, Token, usize)>>();
1839
1840 assert_eq!(
1841 tokens,
1842 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1843 );
1844
1845 let mut errors = Vec::new();
1846 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors)
1847 .collect::<Vec<(usize, Token, usize)>>();
1848
1849 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1850 assert_eq!(
1851 errors,
1852 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1853 );
1854
1855 let mut errors = Vec::new();
1856 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors)
1857 .collect::<Vec<(usize, Token, usize)>>();
1858
1859 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1860 assert_eq!(
1861 errors,
1862 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1863 );
1864
1865 let mut errors = Vec::new();
1866 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors)
1867 .collect::<Vec<(usize, Token, usize)>>();
1868
1869 assert_eq!(
1870 tokens,
1871 vec!(
1872 (0, Token::Number("42", ""), 2),
1873 (2, Token::Member, 3),
1874 (3, Token::Identifier("a"), 4)
1875 )
1876 );
1877
1878 let tokens =
1879 Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors)
1880 .collect::<Vec<(usize, Token, usize)>>();
1881
1882 assert_eq!(
1883 tokens,
1884 vec!(
1885 (0, Token::Number("42", ""), 2),
1886 (2, Token::Member, 3),
1887 (3, Token::Member, 4),
1888 (4, Token::Identifier("a"), 5)
1889 )
1890 );
1891
1892 let mut errors = Vec::new();
1893 let _ = Lexer::new(r#"hex"g""#, 0, &mut comments, &mut errors)
1894 .collect::<Vec<(usize, Token, usize)>>();
1895 assert_eq!(
1896 errors,
1897 vec!(LexicalError::InvalidCharacterInHexLiteral(
1898 Loc::File(0, 4, 5),
1899 'g'
1900 ),)
1901 );
1902
1903 let mut errors = Vec::new();
1904 let tokens =
1905 Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<(usize, Token, usize)>>();
1906
1907 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1908
1909 let mut errors = Vec::new();
1910 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors)
1911 .collect::<Vec<(usize, Token, usize)>>();
1912
1913 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1914
1915 let mut errors = Vec::new();
1916 let tokens = Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1917
1918 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1919
1920 let mut errors = Vec::new();
1921 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1922
1923 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1924
1925 errors.clear();
1926 comments.clear();
1927 let tokens =
1928 Lexer::new("@my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1929 assert_eq!(tokens, vec![(0, Token::Annotation("my_annotation"), 14)]);
1930 assert!(errors.is_empty());
1931 assert!(comments.is_empty());
1932
1933 errors.clear();
1934 comments.clear();
1935 let tokens =
1936 Lexer::new("@ my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1937 assert_eq!(tokens, vec![(2, Token::Identifier("my_annotation"), 15)]);
1938 assert_eq!(
1939 errors,
1940 vec![LexicalError::UnrecognisedToken(
1941 Loc::File(0, 0, 1),
1942 "@".to_string()
1943 )]
1944 );
1945 assert!(comments.is_empty());
1946 }
1947}