1use crate::pt::{Comment, Loc};
10use itertools::{peek_nth, PeekNth};
11use phf::phf_map;
12use std::{fmt, str::CharIndices};
13use thiserror::Error;
14use unicode_xid::UnicodeXID;
15
16pub type Spanned<'a> = (usize, Token<'a>, usize);
18
19pub type Result<'a, T = Spanned<'a>, E = LexicalError> = std::result::Result<T, E>;
21
22#[derive(Copy, Clone, PartialEq, Eq, Debug)]
24#[allow(missing_docs)]
25pub enum Token<'input> {
26 Identifier(&'input str),
27 StringLiteral(bool, &'input str),
29 AddressLiteral(&'input str),
30 HexLiteral(&'input str),
31 Number(&'input str, &'input str),
33 RationalNumber(&'input str, &'input str, &'input str),
35 HexNumber(&'input str),
36 Divide,
37 Contract,
38 Library,
39 Interface,
40 Function,
41 Pragma,
42 Import,
43
44 Struct,
45 Event,
46 Enum,
47 Type,
48
49 Layout,
50 KwAt,
51
52 Memory,
53 Storage,
54 Calldata,
55 Transient,
56
57 Public,
58 Private,
59 Internal,
60 External,
61
62 Constant,
63
64 New,
65 Delete,
66
67 Pure,
68 View,
69 Payable,
70
71 Do,
72 Continue,
73 Break,
74
75 Throw,
76 Emit,
77 Return,
78 Returns,
79 Revert,
80
81 Uint(u16),
82 Int(u16),
83 Bytes(u8),
84 Byte,
86 DynamicBytes,
87 Bool,
88 Address,
89 String,
90
91 Semicolon,
92 Comma,
93 OpenParenthesis,
94 CloseParenthesis,
95 OpenCurlyBrace,
96 CloseCurlyBrace,
97
98 BitwiseOr,
99 BitwiseOrAssign,
100 Or,
101
102 BitwiseXor,
103 BitwiseXorAssign,
104
105 BitwiseAnd,
106 BitwiseAndAssign,
107 And,
108
109 AddAssign,
110 Increment,
111 Add,
112
113 SubtractAssign,
114 Decrement,
115 Subtract,
116
117 MulAssign,
118 Mul,
119 Power,
120 DivideAssign,
121 ModuloAssign,
122 Modulo,
123
124 Equal,
125 Assign,
126 ColonAssign,
127
128 NotEqual,
129 Not,
130
131 True,
132 False,
133 Else,
134 Anonymous,
135 For,
136 While,
137 If,
138
139 ShiftRight,
140 ShiftRightAssign,
141 Less,
142 LessEqual,
143
144 ShiftLeft,
145 ShiftLeftAssign,
146 More,
147 MoreEqual,
148
149 Constructor,
150 Indexed,
151
152 Member,
153 Colon,
154 OpenBracket,
155 CloseBracket,
156 BitwiseNot,
157 Question,
158
159 Mapping,
160 Arrow,
161
162 Try,
163 Catch,
164
165 Receive,
166 Fallback,
167
168 As,
169 Is,
170 Abstract,
171 Virtual,
172 Override,
173 Using,
174 Modifier,
175 Immutable,
176 Unchecked,
177
178 Assembly,
179 Let,
180 Leave,
181 Switch,
182 Case,
183 Default,
184 YulArrow,
185
186 Persistent,
188 Temporary,
189 Instance,
190
191 Annotation(&'input str),
192}
193
194impl fmt::Display for Token<'_> {
195 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196 match self {
197 Token::Identifier(id) => write!(f, "{id}"),
198 Token::StringLiteral(false, s) => write!(f, "\"{s}\""),
199 Token::StringLiteral(true, s) => write!(f, "unicode\"{s}\""),
200 Token::HexLiteral(hex) => write!(f, "{hex}"),
201 Token::AddressLiteral(address) => write!(f, "{address}"),
202 Token::Number(integer, "") => write!(f, "{integer}"),
203 Token::Number(integer, exp) => write!(f, "{integer}e{exp}"),
204 Token::RationalNumber(integer, fraction, "") => {
205 write!(f, "{integer}.{fraction}")
206 }
207 Token::RationalNumber(integer, fraction, exp) => {
208 write!(f, "{integer}.{fraction}e{exp}")
209 }
210 Token::HexNumber(n) => write!(f, "{n}"),
211 Token::Uint(w) => write!(f, "uint{w}"),
212 Token::Int(w) => write!(f, "int{w}"),
213 Token::Bytes(w) => write!(f, "bytes{w}"),
214 Token::Byte => write!(f, "byte"),
215 Token::DynamicBytes => write!(f, "bytes"),
216 Token::Semicolon => write!(f, ";"),
217 Token::Comma => write!(f, ","),
218 Token::OpenParenthesis => write!(f, "("),
219 Token::CloseParenthesis => write!(f, ")"),
220 Token::OpenCurlyBrace => write!(f, "{{"),
221 Token::CloseCurlyBrace => write!(f, "}}"),
222 Token::BitwiseOr => write!(f, "|"),
223 Token::BitwiseOrAssign => write!(f, "|="),
224 Token::Or => write!(f, "||"),
225 Token::BitwiseXor => write!(f, "^"),
226 Token::BitwiseXorAssign => write!(f, "^="),
227 Token::BitwiseAnd => write!(f, "&"),
228 Token::BitwiseAndAssign => write!(f, "&="),
229 Token::And => write!(f, "&&"),
230 Token::AddAssign => write!(f, "+="),
231 Token::Increment => write!(f, "++"),
232 Token::Add => write!(f, "+"),
233 Token::SubtractAssign => write!(f, "-="),
234 Token::Decrement => write!(f, "--"),
235 Token::Subtract => write!(f, "-"),
236 Token::MulAssign => write!(f, "*="),
237 Token::Mul => write!(f, "*"),
238 Token::Power => write!(f, "**"),
239 Token::Divide => write!(f, "/"),
240 Token::DivideAssign => write!(f, "/="),
241 Token::ModuloAssign => write!(f, "%="),
242 Token::Modulo => write!(f, "%"),
243 Token::Equal => write!(f, "=="),
244 Token::Assign => write!(f, "="),
245 Token::ColonAssign => write!(f, ":="),
246 Token::NotEqual => write!(f, "!="),
247 Token::Not => write!(f, "!"),
248 Token::ShiftLeft => write!(f, "<<"),
249 Token::ShiftLeftAssign => write!(f, "<<="),
250 Token::More => write!(f, ">"),
251 Token::MoreEqual => write!(f, ">="),
252 Token::Member => write!(f, "."),
253 Token::Colon => write!(f, ":"),
254 Token::OpenBracket => write!(f, "["),
255 Token::CloseBracket => write!(f, "]"),
256 Token::BitwiseNot => write!(f, "~"),
257 Token::Question => write!(f, "?"),
258 Token::ShiftRightAssign => write!(f, ">>="),
259 Token::ShiftRight => write!(f, ">>"),
260 Token::Less => write!(f, "<"),
261 Token::LessEqual => write!(f, "<="),
262 Token::Bool => write!(f, "bool"),
263 Token::Address => write!(f, "address"),
264 Token::String => write!(f, "string"),
265 Token::Contract => write!(f, "contract"),
266 Token::Library => write!(f, "library"),
267 Token::Interface => write!(f, "interface"),
268 Token::Function => write!(f, "function"),
269 Token::Pragma => write!(f, "pragma"),
270 Token::Import => write!(f, "import"),
271 Token::Struct => write!(f, "struct"),
272 Token::Event => write!(f, "event"),
273 Token::Enum => write!(f, "enum"),
274 Token::Type => write!(f, "type"),
275 Token::Memory => write!(f, "memory"),
276 Token::Storage => write!(f, "storage"),
277 Token::Calldata => write!(f, "calldata"),
278 Token::Public => write!(f, "public"),
279 Token::Private => write!(f, "private"),
280 Token::Internal => write!(f, "internal"),
281 Token::External => write!(f, "external"),
282 Token::Constant => write!(f, "constant"),
283 Token::New => write!(f, "new"),
284 Token::Delete => write!(f, "delete"),
285 Token::Pure => write!(f, "pure"),
286 Token::View => write!(f, "view"),
287 Token::Payable => write!(f, "payable"),
288 Token::Do => write!(f, "do"),
289 Token::Continue => write!(f, "continue"),
290 Token::Break => write!(f, "break"),
291 Token::Throw => write!(f, "throw"),
292 Token::Emit => write!(f, "emit"),
293 Token::Return => write!(f, "return"),
294 Token::Returns => write!(f, "returns"),
295 Token::Revert => write!(f, "revert"),
296 Token::True => write!(f, "true"),
297 Token::False => write!(f, "false"),
298 Token::Else => write!(f, "else"),
299 Token::Anonymous => write!(f, "anonymous"),
300 Token::For => write!(f, "for"),
301 Token::While => write!(f, "while"),
302 Token::If => write!(f, "if"),
303 Token::Constructor => write!(f, "constructor"),
304 Token::Indexed => write!(f, "indexed"),
305 Token::Mapping => write!(f, "mapping"),
306 Token::Arrow => write!(f, "=>"),
307 Token::Try => write!(f, "try"),
308 Token::Catch => write!(f, "catch"),
309 Token::Receive => write!(f, "receive"),
310 Token::Fallback => write!(f, "fallback"),
311 Token::As => write!(f, "as"),
312 Token::Is => write!(f, "is"),
313 Token::Abstract => write!(f, "abstract"),
314 Token::Virtual => write!(f, "virtual"),
315 Token::Override => write!(f, "override"),
316 Token::Using => write!(f, "using"),
317 Token::Modifier => write!(f, "modifier"),
318 Token::Immutable => write!(f, "immutable"),
319 Token::Unchecked => write!(f, "unchecked"),
320 Token::Assembly => write!(f, "assembly"),
321 Token::Let => write!(f, "let"),
322 Token::Leave => write!(f, "leave"),
323 Token::Switch => write!(f, "switch"),
324 Token::Case => write!(f, "case"),
325 Token::Default => write!(f, "default"),
326 Token::YulArrow => write!(f, "->"),
327 Token::Annotation(name) => write!(f, "@{name}"),
328 Token::Persistent => write!(f, "persistent"),
329 Token::Temporary => write!(f, "temporary"),
330 Token::Instance => write!(f, "instance"),
331 Token::Transient => write!(f, "transient"),
332 Token::Layout => write!(f, "layout"),
333 Token::KwAt => write!(f, "at"),
334 }
335 }
336}
337
338#[derive(Debug)]
361pub struct Lexer<'input> {
362 input: &'input str,
363 chars: PeekNth<CharIndices<'input>>,
364 comments: &'input mut Vec<Comment>,
365 file_no: usize,
366 parse_semver: bool,
368 last_tokens: [Option<Token<'input>>; 2],
369 pub errors: &'input mut Vec<LexicalError>,
371}
372
373#[derive(Debug, Clone, PartialEq, Eq, Error)]
375#[allow(missing_docs)]
376pub enum LexicalError {
377 #[error("end of file found in comment")]
378 EndOfFileInComment(Loc),
379
380 #[error("end of file found in string literal")]
381 EndOfFileInString(Loc),
382
383 #[error("end of file found in hex literal string")]
384 EndofFileInHex(Loc),
385
386 #[error("missing number")]
387 MissingNumber(Loc),
388
389 #[error("invalid character '{1}' in hex literal string")]
390 InvalidCharacterInHexLiteral(Loc, char),
391
392 #[error("unrecognised token '{1}'")]
393 UnrecognisedToken(Loc, String),
394
395 #[error("missing exponent")]
396 MissingExponent(Loc),
397
398 #[error("'{1}' found where 'from' expected")]
399 ExpectedFrom(Loc, String),
400}
401
402pub fn is_keyword(word: &str) -> bool {
404 KEYWORDS.contains_key(word)
405}
406
407static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
408 "address" => Token::Address,
409 "anonymous" => Token::Anonymous,
410 "bool" => Token::Bool,
411 "break" => Token::Break,
412 "bytes1" => Token::Bytes(1),
413 "bytes2" => Token::Bytes(2),
414 "bytes3" => Token::Bytes(3),
415 "bytes4" => Token::Bytes(4),
416 "bytes5" => Token::Bytes(5),
417 "bytes6" => Token::Bytes(6),
418 "bytes7" => Token::Bytes(7),
419 "bytes8" => Token::Bytes(8),
420 "bytes9" => Token::Bytes(9),
421 "bytes10" => Token::Bytes(10),
422 "bytes11" => Token::Bytes(11),
423 "bytes12" => Token::Bytes(12),
424 "bytes13" => Token::Bytes(13),
425 "bytes14" => Token::Bytes(14),
426 "bytes15" => Token::Bytes(15),
427 "bytes16" => Token::Bytes(16),
428 "bytes17" => Token::Bytes(17),
429 "bytes18" => Token::Bytes(18),
430 "bytes19" => Token::Bytes(19),
431 "bytes20" => Token::Bytes(20),
432 "bytes21" => Token::Bytes(21),
433 "bytes22" => Token::Bytes(22),
434 "bytes23" => Token::Bytes(23),
435 "bytes24" => Token::Bytes(24),
436 "bytes25" => Token::Bytes(25),
437 "bytes26" => Token::Bytes(26),
438 "bytes27" => Token::Bytes(27),
439 "bytes28" => Token::Bytes(28),
440 "bytes29" => Token::Bytes(29),
441 "bytes30" => Token::Bytes(30),
442 "bytes31" => Token::Bytes(31),
443 "bytes32" => Token::Bytes(32),
444 "bytes" => Token::DynamicBytes,
445 "byte" => Token::Byte,
446 "calldata" => Token::Calldata,
447 "case" => Token::Case,
448 "constant" => Token::Constant,
449 "constructor" => Token::Constructor,
450 "continue" => Token::Continue,
451 "contract" => Token::Contract,
452 "default" => Token::Default,
453 "delete" => Token::Delete,
454 "do" => Token::Do,
455 "else" => Token::Else,
456 "emit" => Token::Emit,
457 "enum" => Token::Enum,
458 "event" => Token::Event,
459 "external" => Token::External,
460 "false" => Token::False,
461 "for" => Token::For,
462 "function" => Token::Function,
463 "if" => Token::If,
464 "import" => Token::Import,
465 "indexed" => Token::Indexed,
466 "int8" => Token::Int(8),
467 "int16" => Token::Int(16),
468 "int24" => Token::Int(24),
469 "int32" => Token::Int(32),
470 "int40" => Token::Int(40),
471 "int48" => Token::Int(48),
472 "int56" => Token::Int(56),
473 "int64" => Token::Int(64),
474 "int72" => Token::Int(72),
475 "int80" => Token::Int(80),
476 "int88" => Token::Int(88),
477 "int96" => Token::Int(96),
478 "int104" => Token::Int(104),
479 "int112" => Token::Int(112),
480 "int120" => Token::Int(120),
481 "int128" => Token::Int(128),
482 "int136" => Token::Int(136),
483 "int144" => Token::Int(144),
484 "int152" => Token::Int(152),
485 "int160" => Token::Int(160),
486 "int168" => Token::Int(168),
487 "int176" => Token::Int(176),
488 "int184" => Token::Int(184),
489 "int192" => Token::Int(192),
490 "int200" => Token::Int(200),
491 "int208" => Token::Int(208),
492 "int216" => Token::Int(216),
493 "int224" => Token::Int(224),
494 "int232" => Token::Int(232),
495 "int240" => Token::Int(240),
496 "int248" => Token::Int(248),
497 "int256" => Token::Int(256),
498 "interface" => Token::Interface,
499 "internal" => Token::Internal,
500 "int" => Token::Int(256),
501 "leave" => Token::Leave,
502 "library" => Token::Library,
503 "mapping" => Token::Mapping,
504 "memory" => Token::Memory,
505 "new" => Token::New,
506 "payable" => Token::Payable,
507 "pragma" => Token::Pragma,
508 "private" => Token::Private,
509 "public" => Token::Public,
510 "pure" => Token::Pure,
511 "returns" => Token::Returns,
512 "return" => Token::Return,
513 "revert" => Token::Revert,
514 "storage" => Token::Storage,
515 "string" => Token::String,
516 "struct" => Token::Struct,
517 "switch" => Token::Switch,
518 "throw" => Token::Throw,
519 "true" => Token::True,
520 "type" => Token::Type,
521 "uint8" => Token::Uint(8),
522 "uint16" => Token::Uint(16),
523 "uint24" => Token::Uint(24),
524 "uint32" => Token::Uint(32),
525 "uint40" => Token::Uint(40),
526 "uint48" => Token::Uint(48),
527 "uint56" => Token::Uint(56),
528 "uint64" => Token::Uint(64),
529 "uint72" => Token::Uint(72),
530 "uint80" => Token::Uint(80),
531 "uint88" => Token::Uint(88),
532 "uint96" => Token::Uint(96),
533 "uint104" => Token::Uint(104),
534 "uint112" => Token::Uint(112),
535 "uint120" => Token::Uint(120),
536 "uint128" => Token::Uint(128),
537 "uint136" => Token::Uint(136),
538 "uint144" => Token::Uint(144),
539 "uint152" => Token::Uint(152),
540 "uint160" => Token::Uint(160),
541 "uint168" => Token::Uint(168),
542 "uint176" => Token::Uint(176),
543 "uint184" => Token::Uint(184),
544 "uint192" => Token::Uint(192),
545 "uint200" => Token::Uint(200),
546 "uint208" => Token::Uint(208),
547 "uint216" => Token::Uint(216),
548 "uint224" => Token::Uint(224),
549 "uint232" => Token::Uint(232),
550 "uint240" => Token::Uint(240),
551 "uint248" => Token::Uint(248),
552 "uint256" => Token::Uint(256),
553 "uint" => Token::Uint(256),
554 "view" => Token::View,
555 "while" => Token::While,
556 "try" => Token::Try,
557 "catch" => Token::Catch,
558 "receive" => Token::Receive,
559 "fallback" => Token::Fallback,
560 "as" => Token::As,
561 "is" => Token::Is,
562 "layout" => Token::Layout,
563 "at" => Token::KwAt,
564 "abstract" => Token::Abstract,
565 "virtual" => Token::Virtual,
566 "override" => Token::Override,
567 "using" => Token::Using,
568 "modifier" => Token::Modifier,
569 "immutable" => Token::Immutable,
570 "unchecked" => Token::Unchecked,
571 "assembly" => Token::Assembly,
572 "let" => Token::Let,
573 "persistent" => Token::Persistent,
574 "temporary" => Token::Temporary,
575 "instance" => Token::Instance,
576 "transient" => Token::Transient,
577};
578
579impl<'input> Lexer<'input> {
580 pub fn new(
593 input: &'input str,
594 file_no: usize,
595 comments: &'input mut Vec<Comment>,
596 errors: &'input mut Vec<LexicalError>,
597 ) -> Self {
598 Lexer {
599 input,
600 chars: peek_nth(input.char_indices()),
601 comments,
602 file_no,
603 parse_semver: false,
604 last_tokens: [None, None],
605 errors,
606 }
607 }
608
609 fn parse_number(&mut self, mut start: usize, ch: char) -> Result<'input> {
610 let mut is_rational = false;
611 if ch == '0' {
612 if let Some((_, 'x')) = self.chars.peek() {
613 self.chars.next();
615
616 let mut end = match self.chars.next() {
617 Some((end, ch)) if ch.is_ascii_hexdigit() => end,
618 Some((..)) => {
619 return Err(LexicalError::MissingNumber(Loc::File(
620 self.file_no,
621 start,
622 start + 1,
623 )));
624 }
625 None => {
626 return Err(LexicalError::EndofFileInHex(Loc::File(
627 self.file_no,
628 start,
629 self.input.len(),
630 )));
631 }
632 };
633
634 while let Some((i, ch)) = self.chars.peek() {
635 if !ch.is_ascii_hexdigit() && *ch != '_' {
636 break;
637 }
638 end = *i;
639 self.chars.next();
640 }
641
642 return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
643 }
644 }
645
646 if ch == '.' {
647 is_rational = true;
648 start -= 1;
649 }
650
651 let mut end = start;
652 while let Some((i, ch)) = self.chars.peek() {
653 if !ch.is_ascii_digit() && *ch != '_' {
654 break;
655 }
656 end = *i;
657 self.chars.next();
658 }
659
660 if self.parse_semver {
661 let integer = &self.input[start..=end];
662 let exp = &self.input[0..0];
663
664 return Ok((start, Token::Number(integer, exp), end + 1));
665 }
666
667 let mut rational_end = end;
668 let mut end_before_rational = end + 1;
669 let mut rational_start = end;
670 if is_rational {
671 end_before_rational = start;
672 rational_start = start + 1;
673 }
674
675 if let Some((_, '.')) = self.chars.peek() {
676 if let Some((i, ch)) = self.chars.peek_nth(1) {
677 if ch.is_ascii_digit() && !is_rational {
678 rational_start = *i;
679 rational_end = *i;
680 is_rational = true;
681 self.chars.next(); while let Some((i, ch)) = self.chars.peek() {
683 if !ch.is_ascii_digit() && *ch != '_' {
684 break;
685 }
686 rational_end = *i;
687 end = *i;
688 self.chars.next();
689 }
690 }
691 }
692 }
693
694 let old_end = end;
695 let mut exp_start = end + 1;
696
697 if let Some((i, 'e' | 'E')) = self.chars.peek() {
698 exp_start = *i + 1;
699 self.chars.next();
700 while matches!(self.chars.peek(), Some((_, '-'))) {
702 self.chars.next();
703 }
704 while let Some((i, ch)) = self.chars.peek() {
705 if !ch.is_ascii_digit() && *ch != '_' {
706 break;
707 }
708 end = *i;
709 self.chars.next();
710 }
711
712 if exp_start > end {
713 return Err(LexicalError::MissingExponent(Loc::File(
714 self.file_no,
715 start,
716 self.input.len(),
717 )));
718 }
719 }
720
721 if is_rational {
722 let integer = &self.input[start..end_before_rational];
723 let fraction = &self.input[rational_start..=rational_end];
724 let exp = &self.input[exp_start..=end];
725
726 return Ok((
727 start,
728 Token::RationalNumber(integer, fraction, exp),
729 end + 1,
730 ));
731 }
732
733 let integer = &self.input[start..=old_end];
734 let exp = &self.input[exp_start..=end];
735
736 Ok((start, Token::Number(integer, exp), end + 1))
737 }
738
739 fn string(
740 &mut self,
741 unicode: bool,
742 token_start: usize,
743 string_start: usize,
744 quote_char: char,
745 ) -> Result<'input> {
746 let mut end;
747
748 let mut last_was_escape = false;
749
750 loop {
751 if let Some((i, ch)) = self.chars.next() {
752 end = i;
753 if !last_was_escape {
754 if ch == quote_char {
755 break;
756 }
757 last_was_escape = ch == '\\';
758 } else {
759 last_was_escape = false;
760 }
761 } else {
762 return Err(LexicalError::EndOfFileInString(Loc::File(
763 self.file_no,
764 token_start,
765 self.input.len(),
766 )));
767 }
768 }
769
770 Ok((
771 token_start,
772 Token::StringLiteral(unicode, &self.input[string_start..end]),
773 end + 1,
774 ))
775 }
776
777 fn next(&mut self) -> Option<Spanned<'input>> {
778 'toplevel: loop {
779 match self.chars.next() {
780 Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
781 let (id, end) = self.match_identifier(start);
782
783 if id == "unicode" {
784 match self.chars.peek() {
785 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
786 let quote_char = *quote_char;
787
788 self.chars.next();
789 let str_res = self.string(true, start, start + 8, quote_char);
790 match str_res {
791 Err(lex_err) => self.errors.push(lex_err),
792 Ok(val) => return Some(val),
793 }
794 }
795 _ => (),
796 }
797 }
798
799 if id == "hex" {
800 match self.chars.peek() {
801 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
802 let quote_char = *quote_char;
803
804 self.chars.next();
805
806 for (i, ch) in &mut self.chars {
807 if ch == quote_char {
808 return Some((
809 start,
810 Token::HexLiteral(&self.input[start..=i]),
811 i + 1,
812 ));
813 }
814
815 if !ch.is_ascii_hexdigit() && ch != '_' {
816 for (_, ch) in &mut self.chars {
818 if ch == quote_char {
819 break;
820 }
821 }
822
823 self.errors.push(
824 LexicalError::InvalidCharacterInHexLiteral(
825 Loc::File(self.file_no, i, i + 1),
826 ch,
827 ),
828 );
829 continue 'toplevel;
830 }
831 }
832
833 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
834 self.file_no,
835 start,
836 self.input.len(),
837 )));
838 return None;
839 }
840 _ => (),
841 }
842 }
843
844 if id == "address" {
845 match self.chars.peek() {
846 Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
847 let quote_char = *quote_char;
848
849 self.chars.next();
850
851 for (i, ch) in &mut self.chars {
852 if ch == quote_char {
853 return Some((
854 start,
855 Token::AddressLiteral(&self.input[start..=i]),
856 i + 1,
857 ));
858 }
859 }
860
861 self.errors.push(LexicalError::EndOfFileInString(Loc::File(
862 self.file_no,
863 start,
864 self.input.len(),
865 )));
866 return None;
867 }
868 _ => (),
869 }
870 }
871
872 return if let Some(w) = KEYWORDS.get(id) {
873 Some((start, *w, end))
874 } else {
875 Some((start, Token::Identifier(id), end))
876 };
877 }
878 Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
879 let str_res = self.string(false, start, start + 1, quote_char);
880 match str_res {
881 Err(lex_err) => self.errors.push(lex_err),
882 Ok(val) => return Some(val),
883 }
884 }
885 Some((start, '/')) => {
886 match self.chars.peek() {
887 Some((_, '=')) => {
888 self.chars.next();
889 return Some((start, Token::DivideAssign, start + 2));
890 }
891 Some((_, '/')) => {
892 self.chars.next();
894
895 let mut newline = false;
896
897 let doc_comment = match self.chars.next() {
898 Some((_, '/')) => {
899 !matches!(self.chars.peek(), Some((_, '/')))
901 }
902 Some((_, ch)) if ch == '\n' || ch == '\r' => {
903 newline = true;
904 false
905 }
906 _ => false,
907 };
908
909 let mut last = start + 3;
910
911 if !newline {
912 loop {
913 match self.chars.next() {
914 None => {
915 last = self.input.len();
916 break;
917 }
918 Some((offset, '\n' | '\r')) => {
919 last = offset;
920 break;
921 }
922 Some(_) => (),
923 }
924 }
925 }
926
927 if doc_comment {
928 self.comments.push(Comment::DocLine(
929 Loc::File(self.file_no, start, last),
930 self.input[start..last].to_owned(),
931 ));
932 } else {
933 self.comments.push(Comment::Line(
934 Loc::File(self.file_no, start, last),
935 self.input[start..last].to_owned(),
936 ));
937 }
938 }
939 Some((_, '*')) => {
940 self.chars.next();
942
943 let doc_comment_start = matches!(self.chars.peek(), Some((_, '*')));
944
945 let mut last = start + 3;
946 let mut seen_star = false;
947
948 loop {
949 if let Some((i, ch)) = self.chars.next() {
950 if seen_star && ch == '/' {
951 break;
952 }
953 seen_star = ch == '*';
954 last = i;
955 } else {
956 self.errors.push(LexicalError::EndOfFileInComment(Loc::File(
957 self.file_no,
958 start,
959 self.input.len(),
960 )));
961 return None;
962 }
963 }
964
965 if doc_comment_start && last > start + 2 {
967 self.comments.push(Comment::DocBlock(
968 Loc::File(self.file_no, start, last + 2),
969 self.input[start..last + 2].to_owned(),
970 ));
971 } else {
972 self.comments.push(Comment::Block(
973 Loc::File(self.file_no, start, last + 2),
974 self.input[start..last + 2].to_owned(),
975 ));
976 }
977 }
978 _ => {
979 return Some((start, Token::Divide, start + 1));
980 }
981 }
982 }
983 Some((start, ch)) if ch.is_ascii_digit() => {
984 let parse_result = self.parse_number(start, ch);
985 match parse_result {
986 Err(lex_err) => {
987 self.errors.push(lex_err.clone());
988 if matches!(lex_err, LexicalError::EndofFileInHex(_)) {
989 return None;
990 }
991 }
992 Ok(parse_result) => return Some(parse_result),
993 }
994 }
995 Some((start, '@')) => {
996 let (id, end) = self.match_identifier(start);
997 if id.len() == 1 {
998 self.errors.push(LexicalError::UnrecognisedToken(
999 Loc::File(self.file_no, start, start + 1),
1000 id.to_owned(),
1001 ));
1002 } else {
1003 return Some((start, Token::Annotation(&id[1..]), end));
1004 };
1005 }
1006 Some((i, ';')) => {
1007 self.parse_semver = false;
1008 return Some((i, Token::Semicolon, i + 1));
1009 }
1010 Some((i, ',')) => return Some((i, Token::Comma, i + 1)),
1011 Some((i, '(')) => return Some((i, Token::OpenParenthesis, i + 1)),
1012 Some((i, ')')) => return Some((i, Token::CloseParenthesis, i + 1)),
1013 Some((i, '{')) => return Some((i, Token::OpenCurlyBrace, i + 1)),
1014 Some((i, '}')) => return Some((i, Token::CloseCurlyBrace, i + 1)),
1015 Some((i, '~')) => return Some((i, Token::BitwiseNot, i + 1)),
1016 Some((i, '=')) => {
1017 return match self.chars.peek() {
1018 Some((_, '=')) => {
1019 self.chars.next();
1020 Some((i, Token::Equal, i + 2))
1021 }
1022 Some((_, '>')) => {
1023 self.chars.next();
1024 Some((i, Token::Arrow, i + 2))
1025 }
1026 _ => Some((i, Token::Assign, i + 1)),
1027 }
1028 }
1029 Some((i, '!')) => {
1030 return if let Some((_, '=')) = self.chars.peek() {
1031 self.chars.next();
1032 Some((i, Token::NotEqual, i + 2))
1033 } else {
1034 Some((i, Token::Not, i + 1))
1035 }
1036 }
1037 Some((i, '|')) => {
1038 return match self.chars.peek() {
1039 Some((_, '=')) => {
1040 self.chars.next();
1041 Some((i, Token::BitwiseOrAssign, i + 2))
1042 }
1043 Some((_, '|')) => {
1044 self.chars.next();
1045 Some((i, Token::Or, i + 2))
1046 }
1047 _ => Some((i, Token::BitwiseOr, i + 1)),
1048 };
1049 }
1050 Some((i, '&')) => {
1051 return match self.chars.peek() {
1052 Some((_, '=')) => {
1053 self.chars.next();
1054 Some((i, Token::BitwiseAndAssign, i + 2))
1055 }
1056 Some((_, '&')) => {
1057 self.chars.next();
1058 Some((i, Token::And, i + 2))
1059 }
1060 _ => Some((i, Token::BitwiseAnd, i + 1)),
1061 };
1062 }
1063 Some((i, '^')) => {
1064 return match self.chars.peek() {
1065 Some((_, '=')) => {
1066 self.chars.next();
1067 Some((i, Token::BitwiseXorAssign, i + 2))
1068 }
1069 _ => Some((i, Token::BitwiseXor, i + 1)),
1070 };
1071 }
1072 Some((i, '+')) => {
1073 return match self.chars.peek() {
1074 Some((_, '=')) => {
1075 self.chars.next();
1076 Some((i, Token::AddAssign, i + 2))
1077 }
1078 Some((_, '+')) => {
1079 self.chars.next();
1080 Some((i, Token::Increment, i + 2))
1081 }
1082 _ => Some((i, Token::Add, i + 1)),
1083 };
1084 }
1085 Some((i, '-')) => {
1086 return match self.chars.peek() {
1087 Some((_, '=')) => {
1088 self.chars.next();
1089 Some((i, Token::SubtractAssign, i + 2))
1090 }
1091 Some((_, '-')) => {
1092 self.chars.next();
1093 Some((i, Token::Decrement, i + 2))
1094 }
1095 Some((_, '>')) => {
1096 self.chars.next();
1097 Some((i, Token::YulArrow, i + 2))
1098 }
1099 _ => Some((i, Token::Subtract, i + 1)),
1100 };
1101 }
1102 Some((i, '*')) => {
1103 return match self.chars.peek() {
1104 Some((_, '=')) => {
1105 self.chars.next();
1106 Some((i, Token::MulAssign, i + 2))
1107 }
1108 Some((_, '*')) => {
1109 self.chars.next();
1110 Some((i, Token::Power, i + 2))
1111 }
1112 _ => Some((i, Token::Mul, i + 1)),
1113 };
1114 }
1115 Some((i, '%')) => {
1116 return match self.chars.peek() {
1117 Some((_, '=')) => {
1118 self.chars.next();
1119 Some((i, Token::ModuloAssign, i + 2))
1120 }
1121 _ => Some((i, Token::Modulo, i + 1)),
1122 };
1123 }
1124 Some((i, '<')) => {
1125 return match self.chars.peek() {
1126 Some((_, '<')) => {
1127 self.chars.next();
1128 if let Some((_, '=')) = self.chars.peek() {
1129 self.chars.next();
1130 Some((i, Token::ShiftLeftAssign, i + 3))
1131 } else {
1132 Some((i, Token::ShiftLeft, i + 2))
1133 }
1134 }
1135 Some((_, '=')) => {
1136 self.chars.next();
1137 Some((i, Token::LessEqual, i + 2))
1138 }
1139 _ => Some((i, Token::Less, i + 1)),
1140 };
1141 }
1142 Some((i, '>')) => {
1143 return match self.chars.peek() {
1144 Some((_, '>')) => {
1145 self.chars.next();
1146 if let Some((_, '=')) = self.chars.peek() {
1147 self.chars.next();
1148 Some((i, Token::ShiftRightAssign, i + 3))
1149 } else {
1150 Some((i, Token::ShiftRight, i + 2))
1151 }
1152 }
1153 Some((_, '=')) => {
1154 self.chars.next();
1155 Some((i, Token::MoreEqual, i + 2))
1156 }
1157 _ => Some((i, Token::More, i + 1)),
1158 };
1159 }
1160 Some((i, '.')) => {
1161 if let Some((_, a)) = self.chars.peek() {
1162 if a.is_ascii_digit() && !self.parse_semver {
1163 return match self.parse_number(i + 1, '.') {
1164 Err(lex_error) => {
1165 self.errors.push(lex_error);
1166 None
1167 }
1168 Ok(parse_result) => Some(parse_result),
1169 };
1170 }
1171 }
1172 return Some((i, Token::Member, i + 1));
1173 }
1174 Some((i, '[')) => return Some((i, Token::OpenBracket, i + 1)),
1175 Some((i, ']')) => return Some((i, Token::CloseBracket, i + 1)),
1176 Some((i, ':')) => {
1177 return match self.chars.peek() {
1178 Some((_, '=')) => {
1179 self.chars.next();
1180 Some((i, Token::ColonAssign, i + 2))
1181 }
1182 _ => Some((i, Token::Colon, i + 1)),
1183 };
1184 }
1185 Some((i, '?')) => return Some((i, Token::Question, i + 1)),
1186 Some((_, ch)) if ch.is_whitespace() => (),
1187 Some((start, _)) => {
1188 let mut end;
1189
1190 loop {
1191 if let Some((i, ch)) = self.chars.next() {
1192 end = i;
1193
1194 if ch.is_whitespace() {
1195 break;
1196 }
1197 } else {
1198 end = self.input.len();
1199 break;
1200 }
1201 }
1202
1203 self.errors.push(LexicalError::UnrecognisedToken(
1204 Loc::File(self.file_no, start, end),
1205 self.input[start..end].to_owned(),
1206 ));
1207 }
1208 None => return None, }
1210 }
1211 }
1212
1213 fn match_identifier(&mut self, start: usize) -> (&'input str, usize) {
1214 let end;
1215 loop {
1216 if let Some((i, ch)) = self.chars.peek() {
1217 if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
1218 end = *i;
1219 break;
1220 }
1221 self.chars.next();
1222 } else {
1223 end = self.input.len();
1224 break;
1225 }
1226 }
1227
1228 (&self.input[start..end], end)
1229 }
1230}
1231
1232impl<'input> Iterator for Lexer<'input> {
1233 type Item = Spanned<'input>;
1234
1235 fn next(&mut self) -> Option<Self::Item> {
1236 if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
1240 self.parse_semver = true;
1241 }
1242
1243 let token = self.next();
1244
1245 self.last_tokens = [
1246 self.last_tokens[1],
1247 match token {
1248 Some((_, n, _)) => Some(n),
1249 _ => None,
1250 },
1251 ];
1252
1253 token
1254 }
1255}
1256
1257#[cfg(test)]
1258mod tests {
1259 use super::*;
1260
1261 #[test]
1262 fn test_lexer() {
1263 let mut comments = Vec::new();
1264 let mut errors = Vec::new();
1265
1266 let multiple_errors = r#" 9ea -9e € bool hex uint8 hex"g" /** "#;
1267 let tokens = Lexer::new(multiple_errors, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1268 assert_eq!(
1269 tokens,
1270 vec![
1271 (3, Token::Identifier("a"), 4),
1272 (5, Token::Subtract, 6),
1273 (13, Token::Bool, 17),
1274 (18, Token::Identifier("hex"), 21),
1275 (22, Token::Uint(8), 27),
1276 ]
1277 );
1278
1279 assert_eq!(
1280 errors,
1281 vec![
1282 LexicalError::MissingExponent(Loc::File(0, 1, 42)),
1283 LexicalError::MissingExponent(Loc::File(0, 6, 42)),
1284 LexicalError::UnrecognisedToken(Loc::File(0, 9, 12), '€'.to_string()),
1285 LexicalError::InvalidCharacterInHexLiteral(Loc::File(0, 32, 33), 'g'),
1286 LexicalError::EndOfFileInComment(Loc::File(0, 37, 42)),
1287 ]
1288 );
1289
1290 let mut errors = Vec::new();
1291 let tokens = Lexer::new("bool", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1292
1293 assert_eq!(tokens, vec!((0, Token::Bool, 4)));
1294
1295 let tokens = Lexer::new("uint8", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1296
1297 assert_eq!(tokens, vec!((0, Token::Uint(8), 5)));
1298
1299 let tokens = Lexer::new("hex", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1300
1301 assert_eq!(tokens, vec!((0, Token::Identifier("hex"), 3)));
1302
1303 let tokens = Lexer::new(
1304 "hex\"cafe_dead\" /* adad*** */",
1305 0,
1306 &mut comments,
1307 &mut errors,
1308 )
1309 .collect::<Vec<_>>();
1310
1311 assert_eq!(tokens, vec!((0, Token::HexLiteral("hex\"cafe_dead\""), 14)));
1312
1313 let tokens = Lexer::new(
1314 "// foo bar\n0x00fead0_12 00090 0_0",
1315 0,
1316 &mut comments,
1317 &mut errors,
1318 )
1319 .collect::<Vec<_>>();
1320
1321 assert_eq!(
1322 tokens,
1323 vec!(
1324 (11, Token::HexNumber("0x00fead0_12"), 23),
1325 (24, Token::Number("00090", ""), 29),
1326 (30, Token::Number("0_0", ""), 33)
1327 )
1328 );
1329
1330 let tokens = Lexer::new(
1331 "// foo bar\n0x00fead0_12 9.0008 0_0",
1332 0,
1333 &mut comments,
1334 &mut errors,
1335 )
1336 .collect::<Vec<_>>();
1337
1338 assert_eq!(
1339 tokens,
1340 vec!(
1341 (11, Token::HexNumber("0x00fead0_12"), 23),
1342 (24, Token::RationalNumber("9", "0008", ""), 30),
1343 (31, Token::Number("0_0", ""), 34)
1344 )
1345 );
1346
1347 let tokens = Lexer::new(
1348 "// foo bar\n0x00fead0_12 .0008 0.9e2",
1349 0,
1350 &mut comments,
1351 &mut errors,
1352 )
1353 .collect::<Vec<_>>();
1354
1355 assert_eq!(
1356 tokens,
1357 vec!(
1358 (11, Token::HexNumber("0x00fead0_12"), 23),
1359 (24, Token::RationalNumber("", "0008", ""), 29),
1360 (30, Token::RationalNumber("0", "9", "2"), 35)
1361 )
1362 );
1363
1364 let tokens = Lexer::new(
1365 "// foo bar\n0x00fead0_12 .0008 0.9e-2-2",
1366 0,
1367 &mut comments,
1368 &mut errors,
1369 )
1370 .collect::<Vec<_>>();
1371
1372 assert_eq!(
1373 tokens,
1374 vec!(
1375 (11, Token::HexNumber("0x00fead0_12"), 23),
1376 (24, Token::RationalNumber("", "0008", ""), 29),
1377 (30, Token::RationalNumber("0", "9", "-2"), 36),
1378 (36, Token::Subtract, 37),
1379 (37, Token::Number("2", ""), 38)
1380 )
1381 );
1382
1383 let tokens = Lexer::new("1.2_3e2-", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1384
1385 assert_eq!(
1386 tokens,
1387 vec!(
1388 (0, Token::RationalNumber("1", "2_3", "2"), 7),
1389 (7, Token::Subtract, 8)
1390 )
1391 );
1392
1393 let tokens = Lexer::new("\"foo\"", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1394
1395 assert_eq!(tokens, vec!((0, Token::StringLiteral(false, "foo"), 5)));
1396
1397 let tokens = Lexer::new(
1398 "pragma solidity >=0.5.0 <0.7.0;",
1399 0,
1400 &mut comments,
1401 &mut errors,
1402 )
1403 .collect::<Vec<_>>();
1404
1405 assert_eq!(
1406 tokens,
1407 vec!(
1408 (0, Token::Pragma, 6),
1409 (7, Token::Identifier("solidity"), 15),
1410 (16, Token::MoreEqual, 18),
1411 (18, Token::Number("0", ""), 19),
1412 (19, Token::Member, 20),
1413 (20, Token::Number("5", ""), 21),
1414 (21, Token::Member, 22),
1415 (22, Token::Number("0", ""), 23),
1416 (24, Token::Less, 25),
1417 (25, Token::Number("0", ""), 26),
1418 (26, Token::Member, 27),
1419 (27, Token::Number("7", ""), 28),
1420 (28, Token::Member, 29),
1421 (29, Token::Number("0", ""), 30),
1422 (30, Token::Semicolon, 31),
1423 )
1424 );
1425
1426 let tokens = Lexer::new(
1427 "pragma solidity \t>=0.5.0 <0.7.0 \n ;",
1428 0,
1429 &mut comments,
1430 &mut errors,
1431 )
1432 .collect::<Vec<_>>();
1433
1434 assert_eq!(
1435 tokens,
1436 vec!(
1437 (0, Token::Pragma, 6),
1438 (7, Token::Identifier("solidity"), 15),
1439 (17, Token::MoreEqual, 19),
1440 (19, Token::Number("0", ""), 20),
1441 (20, Token::Member, 21),
1442 (21, Token::Number("5", ""), 22),
1443 (22, Token::Member, 23),
1444 (23, Token::Number("0", ""), 24),
1445 (25, Token::Less, 26),
1446 (26, Token::Number("0", ""), 27),
1447 (27, Token::Member, 28),
1448 (28, Token::Number("7", ""), 29),
1449 (29, Token::Member, 30),
1450 (30, Token::Number("0", ""), 31),
1451 (34, Token::Semicolon, 35),
1452 )
1453 );
1454
1455 let tokens =
1456 Lexer::new("pragma solidity 赤;", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1457
1458 assert_eq!(
1459 tokens,
1460 vec!(
1461 (0, Token::Pragma, 6),
1462 (7, Token::Identifier("solidity"), 15),
1463 (16, Token::Identifier("赤"), 19),
1464 (19, Token::Semicolon, 20)
1465 )
1466 );
1467
1468 let tokens = Lexer::new(">>= >> >= >", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1469
1470 assert_eq!(
1471 tokens,
1472 vec!(
1473 (0, Token::ShiftRightAssign, 3),
1474 (4, Token::ShiftRight, 6),
1475 (7, Token::MoreEqual, 9),
1476 (10, Token::More, 11),
1477 )
1478 );
1479
1480 let tokens = Lexer::new("<<= << <= <", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1481
1482 assert_eq!(
1483 tokens,
1484 vec!(
1485 (0, Token::ShiftLeftAssign, 3),
1486 (4, Token::ShiftLeft, 6),
1487 (7, Token::LessEqual, 9),
1488 (10, Token::Less, 11),
1489 )
1490 );
1491
1492 let tokens = Lexer::new("-16 -- - -=", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1493
1494 assert_eq!(
1495 tokens,
1496 vec!(
1497 (0, Token::Subtract, 1),
1498 (1, Token::Number("16", ""), 3),
1499 (4, Token::Decrement, 6),
1500 (7, Token::Subtract, 8),
1501 (9, Token::SubtractAssign, 11),
1502 )
1503 );
1504
1505 let tokens = Lexer::new("-4 ", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1506
1507 assert_eq!(
1508 tokens,
1509 vec!((0, Token::Subtract, 1), (1, Token::Number("4", ""), 2),)
1510 );
1511
1512 let mut errors = Vec::new();
1513 let _ = Lexer::new(r#"hex"abcdefg""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1514
1515 assert_eq!(
1516 errors,
1517 vec![LexicalError::InvalidCharacterInHexLiteral(
1518 Loc::File(0, 10, 11),
1519 'g'
1520 )]
1521 );
1522
1523 let mut errors = Vec::new();
1524 let _ = Lexer::new(r#" € "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1525
1526 assert_eq!(
1527 errors,
1528 vec!(LexicalError::UnrecognisedToken(
1529 Loc::File(0, 1, 4),
1530 "€".to_owned()
1531 ))
1532 );
1533
1534 let mut errors = Vec::new();
1535 let _ = Lexer::new(r#"€"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1536
1537 assert_eq!(
1538 errors,
1539 vec!(LexicalError::UnrecognisedToken(
1540 Loc::File(0, 0, 3),
1541 "€".to_owned()
1542 ))
1543 );
1544
1545 let tokens =
1546 Lexer::new(r#"pragma foo bar"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1547
1548 assert_eq!(
1549 tokens,
1550 vec!(
1551 (0, Token::Pragma, 6),
1552 (7, Token::Identifier("foo"), 10),
1553 (11, Token::Identifier("bar"), 14),
1554 )
1555 );
1556
1557 comments.truncate(0);
1558
1559 let tokens = Lexer::new(r#"/// foo"#, 0, &mut comments, &mut errors).count();
1560
1561 assert_eq!(tokens, 0);
1562 assert_eq!(
1563 comments,
1564 vec![Comment::DocLine(Loc::File(0, 0, 7), "/// foo".to_owned())],
1565 );
1566
1567 comments.truncate(0);
1568
1569 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1570
1571 assert_eq!(tokens, 0);
1572 assert_eq!(
1573 comments,
1574 vec!(
1575 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1576 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1577 )
1578 );
1579
1580 comments.truncate(0);
1581
1582 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1583
1584 assert_eq!(tokens, 0);
1585 assert_eq!(
1586 comments,
1587 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1588 );
1589
1590 comments.truncate(0);
1591
1592 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1593
1594 assert_eq!(tokens, 0);
1595 assert_eq!(
1596 comments,
1597 vec!(Comment::DocBlock(
1598 Loc::File(0, 0, 10),
1599 "/** foo */".to_owned()
1600 ))
1601 );
1602
1603 comments.truncate(0);
1604
1605 let tokens = Lexer::new(
1606 "/** jadajadadjada */\n/* bar */",
1607 0,
1608 &mut comments,
1609 &mut errors,
1610 )
1611 .count();
1612
1613 assert_eq!(tokens, 0);
1614 assert_eq!(
1615 comments,
1616 vec!(
1617 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1618 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1619 )
1620 );
1621
1622 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1623 assert_eq!(tokens, None);
1624
1625 let mut errors = Vec::new();
1626 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1627 assert_eq!(
1628 errors,
1629 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1630 );
1631
1632 let mut errors = Vec::new();
1633 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1634 assert_eq!(tokens, None);
1635
1636 let tokens = Lexer::new(
1638 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1639 0,
1640 &mut comments,
1641 &mut errors,
1642 )
1643 .collect::<Vec<_>>();
1644
1645 assert_eq!(
1646 tokens,
1647 vec!(
1648 (0, Token::MoreEqual, 2),
1649 (5, Token::Member, 6),
1650 (7, Token::Identifier("très"), 12),
1651 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1652 (65, Token::Identifier("カラス"), 74)
1653 )
1654 );
1655
1656 let tokens = Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1657
1658 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1659
1660 let tokens =
1661 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1662
1663 assert_eq!(
1664 tokens,
1665 vec!(
1666 (0, Token::Identifier("unicode"), 7),
1667 (8, Token::StringLiteral(false, "€"), 13),
1668 )
1669 );
1670
1671 let tokens = Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1673
1674 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1675
1676 let tokens = Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1677
1678 assert_eq!(
1679 tokens,
1680 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1681 );
1682
1683 let mut errors = Vec::new();
1684 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1685
1686 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1687 assert_eq!(
1688 errors,
1689 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1690 );
1691
1692 let mut errors = Vec::new();
1693 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1694
1695 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1696 assert_eq!(
1697 errors,
1698 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1699 );
1700
1701 let mut errors = Vec::new();
1702 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1703
1704 assert_eq!(
1705 tokens,
1706 vec!(
1707 (0, Token::Number("42", ""), 2),
1708 (2, Token::Member, 3),
1709 (3, Token::Identifier("a"), 4)
1710 )
1711 );
1712
1713 let tokens = Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1714
1715 assert_eq!(
1716 tokens,
1717 vec!(
1718 (0, Token::Number("42", ""), 2),
1719 (2, Token::Member, 3),
1720 (3, Token::Member, 4),
1721 (4, Token::Identifier("a"), 5)
1722 )
1723 );
1724
1725 comments.truncate(0);
1726
1727 let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1728
1729 assert_eq!(tokens, 0);
1730 assert_eq!(
1731 comments,
1732 vec!(
1733 Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1734 Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1735 )
1736 );
1737
1738 comments.truncate(0);
1739
1740 let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1741
1742 assert_eq!(tokens, 0);
1743 assert_eq!(
1744 comments,
1745 vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1746 );
1747
1748 comments.truncate(0);
1749
1750 let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1751
1752 assert_eq!(tokens, 0);
1753 assert_eq!(
1754 comments,
1755 vec!(Comment::DocBlock(
1756 Loc::File(0, 0, 10),
1757 "/** foo */".to_owned()
1758 ))
1759 );
1760
1761 comments.truncate(0);
1762
1763 let tokens = Lexer::new(
1764 "/** jadajadadjada */\n/* bar */",
1765 0,
1766 &mut comments,
1767 &mut errors,
1768 )
1769 .count();
1770
1771 assert_eq!(tokens, 0);
1772 assert_eq!(
1773 comments,
1774 vec!(
1775 Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1776 Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1777 )
1778 );
1779
1780 let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1781 assert_eq!(tokens, None);
1782
1783 let mut errors = Vec::new();
1784 let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1785 assert_eq!(
1786 errors,
1787 vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1788 );
1789
1790 let mut errors = Vec::new();
1791 let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1792 assert_eq!(tokens, None);
1793
1794 let tokens = Lexer::new(
1796 ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1797 0,
1798 &mut comments,
1799 &mut errors,
1800 )
1801 .collect::<Vec<(usize, Token, usize)>>();
1802
1803 assert_eq!(
1804 tokens,
1805 vec!(
1806 (0, Token::MoreEqual, 2),
1807 (5, Token::Member, 6),
1808 (7, Token::Identifier("très"), 12),
1809 (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1810 (65, Token::Identifier("カラス"), 74)
1811 )
1812 );
1813
1814 let tokens =
1815 Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors)
1816 .collect::<Vec<(usize, Token, usize)>>();
1817
1818 assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1819
1820 let tokens =
1821 Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors)
1822 .collect::<Vec<(usize, Token, usize)>>();
1823
1824 assert_eq!(
1825 tokens,
1826 vec!(
1827 (0, Token::Identifier("unicode"), 7),
1828 (8, Token::StringLiteral(false, "€"), 13),
1829 )
1830 );
1831
1832 let tokens =
1834 Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors)
1835 .collect::<Vec<(usize, Token, usize)>>();
1836
1837 assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1838
1839 let tokens =
1840 Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors)
1841 .collect::<Vec<(usize, Token, usize)>>();
1842
1843 assert_eq!(
1844 tokens,
1845 vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1846 );
1847
1848 let mut errors = Vec::new();
1849 let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors)
1850 .collect::<Vec<(usize, Token, usize)>>();
1851
1852 assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1853 assert_eq!(
1854 errors,
1855 vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1856 );
1857
1858 let mut errors = Vec::new();
1859 let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors)
1860 .collect::<Vec<(usize, Token, usize)>>();
1861
1862 assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1863 assert_eq!(
1864 errors,
1865 vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1866 );
1867
1868 let mut errors = Vec::new();
1869 let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors)
1870 .collect::<Vec<(usize, Token, usize)>>();
1871
1872 assert_eq!(
1873 tokens,
1874 vec!(
1875 (0, Token::Number("42", ""), 2),
1876 (2, Token::Member, 3),
1877 (3, Token::Identifier("a"), 4)
1878 )
1879 );
1880
1881 let tokens =
1882 Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors)
1883 .collect::<Vec<(usize, Token, usize)>>();
1884
1885 assert_eq!(
1886 tokens,
1887 vec!(
1888 (0, Token::Number("42", ""), 2),
1889 (2, Token::Member, 3),
1890 (3, Token::Member, 4),
1891 (4, Token::Identifier("a"), 5)
1892 )
1893 );
1894
1895 let mut errors = Vec::new();
1896 let _ = Lexer::new(r#"hex"g""#, 0, &mut comments, &mut errors)
1897 .collect::<Vec<(usize, Token, usize)>>();
1898 assert_eq!(
1899 errors,
1900 vec!(LexicalError::InvalidCharacterInHexLiteral(
1901 Loc::File(0, 4, 5),
1902 'g'
1903 ),)
1904 );
1905
1906 let mut errors = Vec::new();
1907 let tokens =
1908 Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<(usize, Token, usize)>>();
1909
1910 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1911
1912 let mut errors = Vec::new();
1913 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors)
1914 .collect::<Vec<(usize, Token, usize)>>();
1915
1916 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1917
1918 let mut errors = Vec::new();
1919 let tokens = Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1920
1921 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1922
1923 let mut errors = Vec::new();
1924 let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1925
1926 assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1927
1928 errors.clear();
1929 comments.clear();
1930 let tokens =
1931 Lexer::new("@my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1932 assert_eq!(tokens, vec![(0, Token::Annotation("my_annotation"), 14)]);
1933 assert!(errors.is_empty());
1934 assert!(comments.is_empty());
1935
1936 errors.clear();
1937 comments.clear();
1938 let tokens =
1939 Lexer::new("@ my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1940 assert_eq!(tokens, vec![(2, Token::Identifier("my_annotation"), 15)]);
1941 assert_eq!(
1942 errors,
1943 vec![LexicalError::UnrecognisedToken(
1944 Loc::File(0, 0, 1),
1945 "@".to_string()
1946 )]
1947 );
1948 assert!(comments.is_empty());
1949 }
1950}