foundry_solang_parser/
lexer.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Custom Solidity lexer.
4//!
5//! Solidity needs a custom lexer for two reasons:
6//!  - comments and doc comments
7//!  - pragma value is [^;]+
8
9use crate::pt::{Comment, Loc};
10use itertools::{peek_nth, PeekNth};
11use phf::phf_map;
12use std::{fmt, str::CharIndices};
13use thiserror::Error;
14use unicode_xid::UnicodeXID;
15
16/// A spanned [Token].
17pub type Spanned<'a> = (usize, Token<'a>, usize);
18
19/// [Lexer]'s Result type.
20pub type Result<'a, T = Spanned<'a>, E = LexicalError> = std::result::Result<T, E>;
21
22/// A Solidity lexical token. Produced by [Lexer].
23#[derive(Copy, Clone, PartialEq, Eq, Debug)]
24#[allow(missing_docs)]
25pub enum Token<'input> {
26    Identifier(&'input str),
27    /// `(unicode, literal)`
28    StringLiteral(bool, &'input str),
29    AddressLiteral(&'input str),
30    HexLiteral(&'input str),
31    /// `(number, exponent)`
32    Number(&'input str, &'input str),
33    /// `(number, fraction, exponent)`
34    RationalNumber(&'input str, &'input str, &'input str),
35    HexNumber(&'input str),
36    Divide,
37    Contract,
38    Library,
39    Interface,
40    Function,
41    Pragma,
42    Import,
43
44    Struct,
45    Event,
46    Enum,
47    Type,
48
49    Layout,
50    KwAt,
51
52    Memory,
53    Storage,
54    Calldata,
55    Transient,
56
57    Public,
58    Private,
59    Internal,
60    External,
61
62    Constant,
63
64    New,
65    Delete,
66
67    Pure,
68    View,
69    Payable,
70
71    Do,
72    Continue,
73    Break,
74
75    Throw,
76    Emit,
77    Return,
78    Returns,
79    Revert,
80
81    Uint(u16),
82    Int(u16),
83    Bytes(u8),
84    // prior to 0.8.0 `byte` used to be an alias for `bytes1`
85    Byte,
86    DynamicBytes,
87    Bool,
88    Address,
89    String,
90
91    Semicolon,
92    Comma,
93    OpenParenthesis,
94    CloseParenthesis,
95    OpenCurlyBrace,
96    CloseCurlyBrace,
97
98    BitwiseOr,
99    BitwiseOrAssign,
100    Or,
101
102    BitwiseXor,
103    BitwiseXorAssign,
104
105    BitwiseAnd,
106    BitwiseAndAssign,
107    And,
108
109    AddAssign,
110    Increment,
111    Add,
112
113    SubtractAssign,
114    Decrement,
115    Subtract,
116
117    MulAssign,
118    Mul,
119    Power,
120    DivideAssign,
121    ModuloAssign,
122    Modulo,
123
124    Equal,
125    Assign,
126    ColonAssign,
127
128    NotEqual,
129    Not,
130
131    True,
132    False,
133    Else,
134    Anonymous,
135    For,
136    While,
137    If,
138
139    ShiftRight,
140    ShiftRightAssign,
141    Less,
142    LessEqual,
143
144    ShiftLeft,
145    ShiftLeftAssign,
146    More,
147    MoreEqual,
148
149    Constructor,
150    Indexed,
151
152    Member,
153    Colon,
154    OpenBracket,
155    CloseBracket,
156    BitwiseNot,
157    Question,
158
159    Mapping,
160    Arrow,
161
162    Try,
163    Catch,
164
165    Receive,
166    Fallback,
167
168    As,
169    Is,
170    Abstract,
171    Virtual,
172    Override,
173    Using,
174    Modifier,
175    Immutable,
176    Unchecked,
177
178    Assembly,
179    Let,
180    Leave,
181    Switch,
182    Case,
183    Default,
184    YulArrow,
185
186    // Storage types for Soroban
187    Persistent,
188    Temporary,
189    Instance,
190
191    Annotation(&'input str),
192}
193
194impl fmt::Display for Token<'_> {
195    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196        match self {
197            Token::Identifier(id) => write!(f, "{id}"),
198            Token::StringLiteral(false, s) => write!(f, "\"{s}\""),
199            Token::StringLiteral(true, s) => write!(f, "unicode\"{s}\""),
200            Token::HexLiteral(hex) => write!(f, "{hex}"),
201            Token::AddressLiteral(address) => write!(f, "{address}"),
202            Token::Number(integer, "") => write!(f, "{integer}"),
203            Token::Number(integer, exp) => write!(f, "{integer}e{exp}"),
204            Token::RationalNumber(integer, fraction, "") => {
205                write!(f, "{integer}.{fraction}")
206            }
207            Token::RationalNumber(integer, fraction, exp) => {
208                write!(f, "{integer}.{fraction}e{exp}")
209            }
210            Token::HexNumber(n) => write!(f, "{n}"),
211            Token::Uint(w) => write!(f, "uint{w}"),
212            Token::Int(w) => write!(f, "int{w}"),
213            Token::Bytes(w) => write!(f, "bytes{w}"),
214            Token::Byte => write!(f, "byte"),
215            Token::DynamicBytes => write!(f, "bytes"),
216            Token::Semicolon => write!(f, ";"),
217            Token::Comma => write!(f, ","),
218            Token::OpenParenthesis => write!(f, "("),
219            Token::CloseParenthesis => write!(f, ")"),
220            Token::OpenCurlyBrace => write!(f, "{{"),
221            Token::CloseCurlyBrace => write!(f, "}}"),
222            Token::BitwiseOr => write!(f, "|"),
223            Token::BitwiseOrAssign => write!(f, "|="),
224            Token::Or => write!(f, "||"),
225            Token::BitwiseXor => write!(f, "^"),
226            Token::BitwiseXorAssign => write!(f, "^="),
227            Token::BitwiseAnd => write!(f, "&"),
228            Token::BitwiseAndAssign => write!(f, "&="),
229            Token::And => write!(f, "&&"),
230            Token::AddAssign => write!(f, "+="),
231            Token::Increment => write!(f, "++"),
232            Token::Add => write!(f, "+"),
233            Token::SubtractAssign => write!(f, "-="),
234            Token::Decrement => write!(f, "--"),
235            Token::Subtract => write!(f, "-"),
236            Token::MulAssign => write!(f, "*="),
237            Token::Mul => write!(f, "*"),
238            Token::Power => write!(f, "**"),
239            Token::Divide => write!(f, "/"),
240            Token::DivideAssign => write!(f, "/="),
241            Token::ModuloAssign => write!(f, "%="),
242            Token::Modulo => write!(f, "%"),
243            Token::Equal => write!(f, "=="),
244            Token::Assign => write!(f, "="),
245            Token::ColonAssign => write!(f, ":="),
246            Token::NotEqual => write!(f, "!="),
247            Token::Not => write!(f, "!"),
248            Token::ShiftLeft => write!(f, "<<"),
249            Token::ShiftLeftAssign => write!(f, "<<="),
250            Token::More => write!(f, ">"),
251            Token::MoreEqual => write!(f, ">="),
252            Token::Member => write!(f, "."),
253            Token::Colon => write!(f, ":"),
254            Token::OpenBracket => write!(f, "["),
255            Token::CloseBracket => write!(f, "]"),
256            Token::BitwiseNot => write!(f, "~"),
257            Token::Question => write!(f, "?"),
258            Token::ShiftRightAssign => write!(f, ">>="),
259            Token::ShiftRight => write!(f, ">>"),
260            Token::Less => write!(f, "<"),
261            Token::LessEqual => write!(f, "<="),
262            Token::Bool => write!(f, "bool"),
263            Token::Address => write!(f, "address"),
264            Token::String => write!(f, "string"),
265            Token::Contract => write!(f, "contract"),
266            Token::Library => write!(f, "library"),
267            Token::Interface => write!(f, "interface"),
268            Token::Function => write!(f, "function"),
269            Token::Pragma => write!(f, "pragma"),
270            Token::Import => write!(f, "import"),
271            Token::Struct => write!(f, "struct"),
272            Token::Event => write!(f, "event"),
273            Token::Enum => write!(f, "enum"),
274            Token::Type => write!(f, "type"),
275            Token::Memory => write!(f, "memory"),
276            Token::Storage => write!(f, "storage"),
277            Token::Calldata => write!(f, "calldata"),
278            Token::Public => write!(f, "public"),
279            Token::Private => write!(f, "private"),
280            Token::Internal => write!(f, "internal"),
281            Token::External => write!(f, "external"),
282            Token::Constant => write!(f, "constant"),
283            Token::New => write!(f, "new"),
284            Token::Delete => write!(f, "delete"),
285            Token::Pure => write!(f, "pure"),
286            Token::View => write!(f, "view"),
287            Token::Payable => write!(f, "payable"),
288            Token::Do => write!(f, "do"),
289            Token::Continue => write!(f, "continue"),
290            Token::Break => write!(f, "break"),
291            Token::Throw => write!(f, "throw"),
292            Token::Emit => write!(f, "emit"),
293            Token::Return => write!(f, "return"),
294            Token::Returns => write!(f, "returns"),
295            Token::Revert => write!(f, "revert"),
296            Token::True => write!(f, "true"),
297            Token::False => write!(f, "false"),
298            Token::Else => write!(f, "else"),
299            Token::Anonymous => write!(f, "anonymous"),
300            Token::For => write!(f, "for"),
301            Token::While => write!(f, "while"),
302            Token::If => write!(f, "if"),
303            Token::Constructor => write!(f, "constructor"),
304            Token::Indexed => write!(f, "indexed"),
305            Token::Mapping => write!(f, "mapping"),
306            Token::Arrow => write!(f, "=>"),
307            Token::Try => write!(f, "try"),
308            Token::Catch => write!(f, "catch"),
309            Token::Receive => write!(f, "receive"),
310            Token::Fallback => write!(f, "fallback"),
311            Token::As => write!(f, "as"),
312            Token::Is => write!(f, "is"),
313            Token::Abstract => write!(f, "abstract"),
314            Token::Virtual => write!(f, "virtual"),
315            Token::Override => write!(f, "override"),
316            Token::Using => write!(f, "using"),
317            Token::Modifier => write!(f, "modifier"),
318            Token::Immutable => write!(f, "immutable"),
319            Token::Unchecked => write!(f, "unchecked"),
320            Token::Assembly => write!(f, "assembly"),
321            Token::Let => write!(f, "let"),
322            Token::Leave => write!(f, "leave"),
323            Token::Switch => write!(f, "switch"),
324            Token::Case => write!(f, "case"),
325            Token::Default => write!(f, "default"),
326            Token::YulArrow => write!(f, "->"),
327            Token::Annotation(name) => write!(f, "@{name}"),
328            Token::Persistent => write!(f, "persistent"),
329            Token::Temporary => write!(f, "temporary"),
330            Token::Instance => write!(f, "instance"),
331            Token::Transient => write!(f, "transient"),
332            Token::Layout => write!(f, "layout"),
333            Token::KwAt => write!(f, "at"),
334        }
335    }
336}
337
338/// Custom Solidity lexer.
339///
340/// # Examples
341///
342/// ```
343/// use solang_parser::lexer::{Lexer, Token};
344///
345/// let source = "uint256 number = 0;";
346/// let mut comments = Vec::new();
347/// let mut errors = Vec::new();
348/// let mut lexer = Lexer::new(source, 0, &mut comments, &mut errors);
349///
350/// let mut next_token = || lexer.next().map(|(_, token, _)| token);
351/// assert_eq!(next_token(), Some(Token::Uint(256)));
352/// assert_eq!(next_token(), Some(Token::Identifier("number")));
353/// assert_eq!(next_token(), Some(Token::Assign));
354/// assert_eq!(next_token(), Some(Token::Number("0", "")));
355/// assert_eq!(next_token(), Some(Token::Semicolon));
356/// assert_eq!(next_token(), None);
357/// assert!(errors.is_empty());
358/// assert!(comments.is_empty());
359/// ```
360#[derive(Debug)]
361pub struct Lexer<'input> {
362    input: &'input str,
363    chars: PeekNth<CharIndices<'input>>,
364    comments: &'input mut Vec<Comment>,
365    file_no: usize,
366    /// While parsing version semver, do not parse rational numbers
367    parse_semver: bool,
368    last_tokens: [Option<Token<'input>>; 2],
369    /// The mutable reference to the error vector.
370    pub errors: &'input mut Vec<LexicalError>,
371}
372
373/// An error thrown by [Lexer].
374#[derive(Debug, Clone, PartialEq, Eq, Error)]
375#[allow(missing_docs)]
376pub enum LexicalError {
377    #[error("end of file found in comment")]
378    EndOfFileInComment(Loc),
379
380    #[error("end of file found in string literal")]
381    EndOfFileInString(Loc),
382
383    #[error("end of file found in hex literal string")]
384    EndofFileInHex(Loc),
385
386    #[error("missing number")]
387    MissingNumber(Loc),
388
389    #[error("invalid character '{1}' in hex literal string")]
390    InvalidCharacterInHexLiteral(Loc, char),
391
392    #[error("unrecognised token '{1}'")]
393    UnrecognisedToken(Loc, String),
394
395    #[error("missing exponent")]
396    MissingExponent(Loc),
397
398    #[error("'{1}' found where 'from' expected")]
399    ExpectedFrom(Loc, String),
400}
401
402/// Returns whether `word` is a keyword in Solidity.
403pub fn is_keyword(word: &str) -> bool {
404    KEYWORDS.contains_key(word)
405}
406
407static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
408    "address" => Token::Address,
409    "anonymous" => Token::Anonymous,
410    "bool" => Token::Bool,
411    "break" => Token::Break,
412    "bytes1" => Token::Bytes(1),
413    "bytes2" => Token::Bytes(2),
414    "bytes3" => Token::Bytes(3),
415    "bytes4" => Token::Bytes(4),
416    "bytes5" => Token::Bytes(5),
417    "bytes6" => Token::Bytes(6),
418    "bytes7" => Token::Bytes(7),
419    "bytes8" => Token::Bytes(8),
420    "bytes9" => Token::Bytes(9),
421    "bytes10" => Token::Bytes(10),
422    "bytes11" => Token::Bytes(11),
423    "bytes12" => Token::Bytes(12),
424    "bytes13" => Token::Bytes(13),
425    "bytes14" => Token::Bytes(14),
426    "bytes15" => Token::Bytes(15),
427    "bytes16" => Token::Bytes(16),
428    "bytes17" => Token::Bytes(17),
429    "bytes18" => Token::Bytes(18),
430    "bytes19" => Token::Bytes(19),
431    "bytes20" => Token::Bytes(20),
432    "bytes21" => Token::Bytes(21),
433    "bytes22" => Token::Bytes(22),
434    "bytes23" => Token::Bytes(23),
435    "bytes24" => Token::Bytes(24),
436    "bytes25" => Token::Bytes(25),
437    "bytes26" => Token::Bytes(26),
438    "bytes27" => Token::Bytes(27),
439    "bytes28" => Token::Bytes(28),
440    "bytes29" => Token::Bytes(29),
441    "bytes30" => Token::Bytes(30),
442    "bytes31" => Token::Bytes(31),
443    "bytes32" => Token::Bytes(32),
444    "bytes" => Token::DynamicBytes,
445    "byte" => Token::Byte,
446    "calldata" => Token::Calldata,
447    "case" => Token::Case,
448    "constant" => Token::Constant,
449    "constructor" => Token::Constructor,
450    "continue" => Token::Continue,
451    "contract" => Token::Contract,
452    "default" => Token::Default,
453    "delete" => Token::Delete,
454    "do" => Token::Do,
455    "else" => Token::Else,
456    "emit" => Token::Emit,
457    "enum" => Token::Enum,
458    "event" => Token::Event,
459    "external" => Token::External,
460    "false" => Token::False,
461    "for" => Token::For,
462    "function" => Token::Function,
463    "if" => Token::If,
464    "import" => Token::Import,
465    "indexed" => Token::Indexed,
466    "int8" => Token::Int(8),
467    "int16" => Token::Int(16),
468    "int24" => Token::Int(24),
469    "int32" => Token::Int(32),
470    "int40" => Token::Int(40),
471    "int48" => Token::Int(48),
472    "int56" => Token::Int(56),
473    "int64" => Token::Int(64),
474    "int72" => Token::Int(72),
475    "int80" => Token::Int(80),
476    "int88" => Token::Int(88),
477    "int96" => Token::Int(96),
478    "int104" => Token::Int(104),
479    "int112" => Token::Int(112),
480    "int120" => Token::Int(120),
481    "int128" => Token::Int(128),
482    "int136" => Token::Int(136),
483    "int144" => Token::Int(144),
484    "int152" => Token::Int(152),
485    "int160" => Token::Int(160),
486    "int168" => Token::Int(168),
487    "int176" => Token::Int(176),
488    "int184" => Token::Int(184),
489    "int192" => Token::Int(192),
490    "int200" => Token::Int(200),
491    "int208" => Token::Int(208),
492    "int216" => Token::Int(216),
493    "int224" => Token::Int(224),
494    "int232" => Token::Int(232),
495    "int240" => Token::Int(240),
496    "int248" => Token::Int(248),
497    "int256" => Token::Int(256),
498    "interface" => Token::Interface,
499    "internal" => Token::Internal,
500    "int" => Token::Int(256),
501    "leave" => Token::Leave,
502    "library" => Token::Library,
503    "mapping" => Token::Mapping,
504    "memory" => Token::Memory,
505    "new" => Token::New,
506    "payable" => Token::Payable,
507    "pragma" => Token::Pragma,
508    "private" => Token::Private,
509    "public" => Token::Public,
510    "pure" => Token::Pure,
511    "returns" => Token::Returns,
512    "return" => Token::Return,
513    "revert" => Token::Revert,
514    "storage" => Token::Storage,
515    "string" => Token::String,
516    "struct" => Token::Struct,
517    "switch" => Token::Switch,
518    "throw" => Token::Throw,
519    "true" => Token::True,
520    "type" => Token::Type,
521    "uint8" => Token::Uint(8),
522    "uint16" => Token::Uint(16),
523    "uint24" => Token::Uint(24),
524    "uint32" => Token::Uint(32),
525    "uint40" => Token::Uint(40),
526    "uint48" => Token::Uint(48),
527    "uint56" => Token::Uint(56),
528    "uint64" => Token::Uint(64),
529    "uint72" => Token::Uint(72),
530    "uint80" => Token::Uint(80),
531    "uint88" => Token::Uint(88),
532    "uint96" => Token::Uint(96),
533    "uint104" => Token::Uint(104),
534    "uint112" => Token::Uint(112),
535    "uint120" => Token::Uint(120),
536    "uint128" => Token::Uint(128),
537    "uint136" => Token::Uint(136),
538    "uint144" => Token::Uint(144),
539    "uint152" => Token::Uint(152),
540    "uint160" => Token::Uint(160),
541    "uint168" => Token::Uint(168),
542    "uint176" => Token::Uint(176),
543    "uint184" => Token::Uint(184),
544    "uint192" => Token::Uint(192),
545    "uint200" => Token::Uint(200),
546    "uint208" => Token::Uint(208),
547    "uint216" => Token::Uint(216),
548    "uint224" => Token::Uint(224),
549    "uint232" => Token::Uint(232),
550    "uint240" => Token::Uint(240),
551    "uint248" => Token::Uint(248),
552    "uint256" => Token::Uint(256),
553    "uint" => Token::Uint(256),
554    "view" => Token::View,
555    "while" => Token::While,
556    "try" => Token::Try,
557    "catch" => Token::Catch,
558    "receive" => Token::Receive,
559    "fallback" => Token::Fallback,
560    "as" => Token::As,
561    "is" => Token::Is,
562    "layout" => Token::Layout,
563    "at" => Token::KwAt,
564    "abstract" => Token::Abstract,
565    "virtual" => Token::Virtual,
566    "override" => Token::Override,
567    "using" => Token::Using,
568    "modifier" => Token::Modifier,
569    "immutable" => Token::Immutable,
570    "unchecked" => Token::Unchecked,
571    "assembly" => Token::Assembly,
572    "let" => Token::Let,
573    "transient" => Token::Transient,
574};
575
576impl<'input> Lexer<'input> {
577    /// Instantiates a new Lexer.
578    ///
579    /// # Examples
580    ///
581    /// ```
582    /// use solang_parser::lexer::Lexer;
583    ///
584    /// let source = "uint256 number = 0;";
585    /// let mut comments = Vec::new();
586    /// let mut errors = Vec::new();
587    /// let mut lexer = Lexer::new(source, 0, &mut comments, &mut errors);
588    /// ```
589    pub fn new(
590        input: &'input str,
591        file_no: usize,
592        comments: &'input mut Vec<Comment>,
593        errors: &'input mut Vec<LexicalError>,
594    ) -> Self {
595        Lexer {
596            input,
597            chars: peek_nth(input.char_indices()),
598            comments,
599            file_no,
600            parse_semver: false,
601            last_tokens: [None, None],
602            errors,
603        }
604    }
605
606    fn parse_number(&mut self, mut start: usize, ch: char) -> Result<'input> {
607        let mut is_rational = false;
608        if ch == '0' {
609            if let Some((_, 'x')) = self.chars.peek() {
610                // hex number
611                self.chars.next();
612
613                let mut end = match self.chars.next() {
614                    Some((end, ch)) if ch.is_ascii_hexdigit() => end,
615                    Some((..)) => {
616                        return Err(LexicalError::MissingNumber(Loc::File(
617                            self.file_no,
618                            start,
619                            start + 1,
620                        )));
621                    }
622                    None => {
623                        return Err(LexicalError::EndofFileInHex(Loc::File(
624                            self.file_no,
625                            start,
626                            self.input.len(),
627                        )));
628                    }
629                };
630
631                while let Some((i, ch)) = self.chars.peek() {
632                    if !ch.is_ascii_hexdigit() && *ch != '_' {
633                        break;
634                    }
635                    end = *i;
636                    self.chars.next();
637                }
638
639                return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
640            }
641        }
642
643        if ch == '.' {
644            is_rational = true;
645            start -= 1;
646        }
647
648        let mut end = start;
649        while let Some((i, ch)) = self.chars.peek() {
650            if !ch.is_ascii_digit() && *ch != '_' {
651                break;
652            }
653            end = *i;
654            self.chars.next();
655        }
656
657        if self.parse_semver {
658            let integer = &self.input[start..=end];
659            let exp = &self.input[0..0];
660
661            return Ok((start, Token::Number(integer, exp), end + 1));
662        }
663
664        let mut rational_end = end;
665        let mut end_before_rational = end + 1;
666        let mut rational_start = end;
667        if is_rational {
668            end_before_rational = start;
669            rational_start = start + 1;
670        }
671
672        if let Some((_, '.')) = self.chars.peek() {
673            if let Some((i, ch)) = self.chars.peek_nth(1) {
674                if ch.is_ascii_digit() && !is_rational {
675                    rational_start = *i;
676                    rational_end = *i;
677                    is_rational = true;
678                    self.chars.next(); // advance over '.'
679                    while let Some((i, ch)) = self.chars.peek() {
680                        if !ch.is_ascii_digit() && *ch != '_' {
681                            break;
682                        }
683                        rational_end = *i;
684                        end = *i;
685                        self.chars.next();
686                    }
687                }
688            }
689        }
690
691        let old_end = end;
692        let mut exp_start = end + 1;
693
694        if let Some((i, 'e' | 'E')) = self.chars.peek() {
695            exp_start = *i + 1;
696            self.chars.next();
697            // Negative exponent
698            while matches!(self.chars.peek(), Some((_, '-'))) {
699                self.chars.next();
700            }
701            while let Some((i, ch)) = self.chars.peek() {
702                if !ch.is_ascii_digit() && *ch != '_' {
703                    break;
704                }
705                end = *i;
706                self.chars.next();
707            }
708
709            if exp_start > end {
710                return Err(LexicalError::MissingExponent(Loc::File(
711                    self.file_no,
712                    start,
713                    self.input.len(),
714                )));
715            }
716        }
717
718        if is_rational {
719            let integer = &self.input[start..end_before_rational];
720            let fraction = &self.input[rational_start..=rational_end];
721            let exp = &self.input[exp_start..=end];
722
723            return Ok((
724                start,
725                Token::RationalNumber(integer, fraction, exp),
726                end + 1,
727            ));
728        }
729
730        let integer = &self.input[start..=old_end];
731        let exp = &self.input[exp_start..=end];
732
733        Ok((start, Token::Number(integer, exp), end + 1))
734    }
735
736    fn string(
737        &mut self,
738        unicode: bool,
739        token_start: usize,
740        string_start: usize,
741        quote_char: char,
742    ) -> Result<'input> {
743        let mut end;
744
745        let mut last_was_escape = false;
746
747        loop {
748            if let Some((i, ch)) = self.chars.next() {
749                end = i;
750                if !last_was_escape {
751                    if ch == quote_char {
752                        break;
753                    }
754                    last_was_escape = ch == '\\';
755                } else {
756                    last_was_escape = false;
757                }
758            } else {
759                return Err(LexicalError::EndOfFileInString(Loc::File(
760                    self.file_no,
761                    token_start,
762                    self.input.len(),
763                )));
764            }
765        }
766
767        Ok((
768            token_start,
769            Token::StringLiteral(unicode, &self.input[string_start..end]),
770            end + 1,
771        ))
772    }
773
774    fn next(&mut self) -> Option<Spanned<'input>> {
775        'toplevel: loop {
776            match self.chars.next() {
777                Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
778                    let (id, end) = self.match_identifier(start);
779
780                    if id == "unicode" {
781                        match self.chars.peek() {
782                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
783                                let quote_char = *quote_char;
784
785                                self.chars.next();
786                                let str_res = self.string(true, start, start + 8, quote_char);
787                                match str_res {
788                                    Err(lex_err) => self.errors.push(lex_err),
789                                    Ok(val) => return Some(val),
790                                }
791                            }
792                            _ => (),
793                        }
794                    }
795
796                    if id == "hex" {
797                        match self.chars.peek() {
798                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
799                                let quote_char = *quote_char;
800
801                                self.chars.next();
802
803                                for (i, ch) in &mut self.chars {
804                                    if ch == quote_char {
805                                        return Some((
806                                            start,
807                                            Token::HexLiteral(&self.input[start..=i]),
808                                            i + 1,
809                                        ));
810                                    }
811
812                                    if !ch.is_ascii_hexdigit() && ch != '_' {
813                                        // Eat up the remainer of the string
814                                        for (_, ch) in &mut self.chars {
815                                            if ch == quote_char {
816                                                break;
817                                            }
818                                        }
819
820                                        self.errors.push(
821                                            LexicalError::InvalidCharacterInHexLiteral(
822                                                Loc::File(self.file_no, i, i + 1),
823                                                ch,
824                                            ),
825                                        );
826                                        continue 'toplevel;
827                                    }
828                                }
829
830                                self.errors.push(LexicalError::EndOfFileInString(Loc::File(
831                                    self.file_no,
832                                    start,
833                                    self.input.len(),
834                                )));
835                                return None;
836                            }
837                            _ => (),
838                        }
839                    }
840
841                    if id == "address" {
842                        match self.chars.peek() {
843                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
844                                let quote_char = *quote_char;
845
846                                self.chars.next();
847
848                                for (i, ch) in &mut self.chars {
849                                    if ch == quote_char {
850                                        return Some((
851                                            start,
852                                            Token::AddressLiteral(&self.input[start..=i]),
853                                            i + 1,
854                                        ));
855                                    }
856                                }
857
858                                self.errors.push(LexicalError::EndOfFileInString(Loc::File(
859                                    self.file_no,
860                                    start,
861                                    self.input.len(),
862                                )));
863                                return None;
864                            }
865                            _ => (),
866                        }
867                    }
868
869                    return if let Some(w) = KEYWORDS.get(id) {
870                        Some((start, *w, end))
871                    } else {
872                        Some((start, Token::Identifier(id), end))
873                    };
874                }
875                Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
876                    let str_res = self.string(false, start, start + 1, quote_char);
877                    match str_res {
878                        Err(lex_err) => self.errors.push(lex_err),
879                        Ok(val) => return Some(val),
880                    }
881                }
882                Some((start, '/')) => {
883                    match self.chars.peek() {
884                        Some((_, '=')) => {
885                            self.chars.next();
886                            return Some((start, Token::DivideAssign, start + 2));
887                        }
888                        Some((_, '/')) => {
889                            // line comment
890                            self.chars.next();
891
892                            let mut newline = false;
893
894                            let doc_comment = match self.chars.next() {
895                                Some((_, '/')) => {
896                                    // ///(/)+ is still a line comment
897                                    !matches!(self.chars.peek(), Some((_, '/')))
898                                }
899                                Some((_, ch)) if ch == '\n' || ch == '\r' => {
900                                    newline = true;
901                                    false
902                                }
903                                _ => false,
904                            };
905
906                            let mut last = start + 3;
907
908                            if !newline {
909                                loop {
910                                    match self.chars.next() {
911                                        None => {
912                                            last = self.input.len();
913                                            break;
914                                        }
915                                        Some((offset, '\n' | '\r')) => {
916                                            last = offset;
917                                            break;
918                                        }
919                                        Some(_) => (),
920                                    }
921                                }
922                            }
923
924                            if doc_comment {
925                                self.comments.push(Comment::DocLine(
926                                    Loc::File(self.file_no, start, last),
927                                    self.input[start..last].to_owned(),
928                                ));
929                            } else {
930                                self.comments.push(Comment::Line(
931                                    Loc::File(self.file_no, start, last),
932                                    self.input[start..last].to_owned(),
933                                ));
934                            }
935                        }
936                        Some((_, '*')) => {
937                            // multiline comment
938                            self.chars.next();
939
940                            let doc_comment_start = matches!(self.chars.peek(), Some((_, '*')));
941
942                            let mut last = start + 3;
943                            let mut seen_star = false;
944
945                            loop {
946                                if let Some((i, ch)) = self.chars.next() {
947                                    if seen_star && ch == '/' {
948                                        break;
949                                    }
950                                    seen_star = ch == '*';
951                                    last = i;
952                                } else {
953                                    self.errors.push(LexicalError::EndOfFileInComment(Loc::File(
954                                        self.file_no,
955                                        start,
956                                        self.input.len(),
957                                    )));
958                                    return None;
959                                }
960                            }
961
962                            // `/**/` is not a doc comment
963                            if doc_comment_start && last > start + 2 {
964                                self.comments.push(Comment::DocBlock(
965                                    Loc::File(self.file_no, start, last + 2),
966                                    self.input[start..last + 2].to_owned(),
967                                ));
968                            } else {
969                                self.comments.push(Comment::Block(
970                                    Loc::File(self.file_no, start, last + 2),
971                                    self.input[start..last + 2].to_owned(),
972                                ));
973                            }
974                        }
975                        _ => {
976                            return Some((start, Token::Divide, start + 1));
977                        }
978                    }
979                }
980                Some((start, ch)) if ch.is_ascii_digit() => {
981                    let parse_result = self.parse_number(start, ch);
982                    match parse_result {
983                        Err(lex_err) => {
984                            self.errors.push(lex_err.clone());
985                            if matches!(lex_err, LexicalError::EndofFileInHex(_)) {
986                                return None;
987                            }
988                        }
989                        Ok(parse_result) => return Some(parse_result),
990                    }
991                }
992                Some((start, '@')) => {
993                    let (id, end) = self.match_identifier(start);
994                    if id.len() == 1 {
995                        self.errors.push(LexicalError::UnrecognisedToken(
996                            Loc::File(self.file_no, start, start + 1),
997                            id.to_owned(),
998                        ));
999                    } else {
1000                        return Some((start, Token::Annotation(&id[1..]), end));
1001                    };
1002                }
1003                Some((i, ';')) => {
1004                    self.parse_semver = false;
1005                    return Some((i, Token::Semicolon, i + 1));
1006                }
1007                Some((i, ',')) => return Some((i, Token::Comma, i + 1)),
1008                Some((i, '(')) => return Some((i, Token::OpenParenthesis, i + 1)),
1009                Some((i, ')')) => return Some((i, Token::CloseParenthesis, i + 1)),
1010                Some((i, '{')) => return Some((i, Token::OpenCurlyBrace, i + 1)),
1011                Some((i, '}')) => return Some((i, Token::CloseCurlyBrace, i + 1)),
1012                Some((i, '~')) => return Some((i, Token::BitwiseNot, i + 1)),
1013                Some((i, '=')) => {
1014                    return match self.chars.peek() {
1015                        Some((_, '=')) => {
1016                            self.chars.next();
1017                            Some((i, Token::Equal, i + 2))
1018                        }
1019                        Some((_, '>')) => {
1020                            self.chars.next();
1021                            Some((i, Token::Arrow, i + 2))
1022                        }
1023                        _ => Some((i, Token::Assign, i + 1)),
1024                    }
1025                }
1026                Some((i, '!')) => {
1027                    return if let Some((_, '=')) = self.chars.peek() {
1028                        self.chars.next();
1029                        Some((i, Token::NotEqual, i + 2))
1030                    } else {
1031                        Some((i, Token::Not, i + 1))
1032                    }
1033                }
1034                Some((i, '|')) => {
1035                    return match self.chars.peek() {
1036                        Some((_, '=')) => {
1037                            self.chars.next();
1038                            Some((i, Token::BitwiseOrAssign, i + 2))
1039                        }
1040                        Some((_, '|')) => {
1041                            self.chars.next();
1042                            Some((i, Token::Or, i + 2))
1043                        }
1044                        _ => Some((i, Token::BitwiseOr, i + 1)),
1045                    };
1046                }
1047                Some((i, '&')) => {
1048                    return match self.chars.peek() {
1049                        Some((_, '=')) => {
1050                            self.chars.next();
1051                            Some((i, Token::BitwiseAndAssign, i + 2))
1052                        }
1053                        Some((_, '&')) => {
1054                            self.chars.next();
1055                            Some((i, Token::And, i + 2))
1056                        }
1057                        _ => Some((i, Token::BitwiseAnd, i + 1)),
1058                    };
1059                }
1060                Some((i, '^')) => {
1061                    return match self.chars.peek() {
1062                        Some((_, '=')) => {
1063                            self.chars.next();
1064                            Some((i, Token::BitwiseXorAssign, i + 2))
1065                        }
1066                        _ => Some((i, Token::BitwiseXor, i + 1)),
1067                    };
1068                }
1069                Some((i, '+')) => {
1070                    return match self.chars.peek() {
1071                        Some((_, '=')) => {
1072                            self.chars.next();
1073                            Some((i, Token::AddAssign, i + 2))
1074                        }
1075                        Some((_, '+')) => {
1076                            self.chars.next();
1077                            Some((i, Token::Increment, i + 2))
1078                        }
1079                        _ => Some((i, Token::Add, i + 1)),
1080                    };
1081                }
1082                Some((i, '-')) => {
1083                    return match self.chars.peek() {
1084                        Some((_, '=')) => {
1085                            self.chars.next();
1086                            Some((i, Token::SubtractAssign, i + 2))
1087                        }
1088                        Some((_, '-')) => {
1089                            self.chars.next();
1090                            Some((i, Token::Decrement, i + 2))
1091                        }
1092                        Some((_, '>')) => {
1093                            self.chars.next();
1094                            Some((i, Token::YulArrow, i + 2))
1095                        }
1096                        _ => Some((i, Token::Subtract, i + 1)),
1097                    };
1098                }
1099                Some((i, '*')) => {
1100                    return match self.chars.peek() {
1101                        Some((_, '=')) => {
1102                            self.chars.next();
1103                            Some((i, Token::MulAssign, i + 2))
1104                        }
1105                        Some((_, '*')) => {
1106                            self.chars.next();
1107                            Some((i, Token::Power, i + 2))
1108                        }
1109                        _ => Some((i, Token::Mul, i + 1)),
1110                    };
1111                }
1112                Some((i, '%')) => {
1113                    return match self.chars.peek() {
1114                        Some((_, '=')) => {
1115                            self.chars.next();
1116                            Some((i, Token::ModuloAssign, i + 2))
1117                        }
1118                        _ => Some((i, Token::Modulo, i + 1)),
1119                    };
1120                }
1121                Some((i, '<')) => {
1122                    return match self.chars.peek() {
1123                        Some((_, '<')) => {
1124                            self.chars.next();
1125                            if let Some((_, '=')) = self.chars.peek() {
1126                                self.chars.next();
1127                                Some((i, Token::ShiftLeftAssign, i + 3))
1128                            } else {
1129                                Some((i, Token::ShiftLeft, i + 2))
1130                            }
1131                        }
1132                        Some((_, '=')) => {
1133                            self.chars.next();
1134                            Some((i, Token::LessEqual, i + 2))
1135                        }
1136                        _ => Some((i, Token::Less, i + 1)),
1137                    };
1138                }
1139                Some((i, '>')) => {
1140                    return match self.chars.peek() {
1141                        Some((_, '>')) => {
1142                            self.chars.next();
1143                            if let Some((_, '=')) = self.chars.peek() {
1144                                self.chars.next();
1145                                Some((i, Token::ShiftRightAssign, i + 3))
1146                            } else {
1147                                Some((i, Token::ShiftRight, i + 2))
1148                            }
1149                        }
1150                        Some((_, '=')) => {
1151                            self.chars.next();
1152                            Some((i, Token::MoreEqual, i + 2))
1153                        }
1154                        _ => Some((i, Token::More, i + 1)),
1155                    };
1156                }
1157                Some((i, '.')) => {
1158                    if let Some((_, a)) = self.chars.peek() {
1159                        if a.is_ascii_digit() && !self.parse_semver {
1160                            return match self.parse_number(i + 1, '.') {
1161                                Err(lex_error) => {
1162                                    self.errors.push(lex_error);
1163                                    None
1164                                }
1165                                Ok(parse_result) => Some(parse_result),
1166                            };
1167                        }
1168                    }
1169                    return Some((i, Token::Member, i + 1));
1170                }
1171                Some((i, '[')) => return Some((i, Token::OpenBracket, i + 1)),
1172                Some((i, ']')) => return Some((i, Token::CloseBracket, i + 1)),
1173                Some((i, ':')) => {
1174                    return match self.chars.peek() {
1175                        Some((_, '=')) => {
1176                            self.chars.next();
1177                            Some((i, Token::ColonAssign, i + 2))
1178                        }
1179                        _ => Some((i, Token::Colon, i + 1)),
1180                    };
1181                }
1182                Some((i, '?')) => return Some((i, Token::Question, i + 1)),
1183                Some((_, ch)) if ch.is_whitespace() => (),
1184                Some((start, _)) => {
1185                    let mut end;
1186
1187                    loop {
1188                        if let Some((i, ch)) = self.chars.next() {
1189                            end = i;
1190
1191                            if ch.is_whitespace() {
1192                                break;
1193                            }
1194                        } else {
1195                            end = self.input.len();
1196                            break;
1197                        }
1198                    }
1199
1200                    self.errors.push(LexicalError::UnrecognisedToken(
1201                        Loc::File(self.file_no, start, end),
1202                        self.input[start..end].to_owned(),
1203                    ));
1204                }
1205                None => return None, // End of file
1206            }
1207        }
1208    }
1209
1210    fn match_identifier(&mut self, start: usize) -> (&'input str, usize) {
1211        let end;
1212        loop {
1213            if let Some((i, ch)) = self.chars.peek() {
1214                if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
1215                    end = *i;
1216                    break;
1217                }
1218                self.chars.next();
1219            } else {
1220                end = self.input.len();
1221                break;
1222            }
1223        }
1224
1225        (&self.input[start..end], end)
1226    }
1227}
1228
1229impl<'input> Iterator for Lexer<'input> {
1230    type Item = Spanned<'input>;
1231
1232    fn next(&mut self) -> Option<Self::Item> {
1233        // Lexer should be aware of whether the last two tokens were
1234        // pragma followed by identifier. If this is true, then special parsing should be
1235        // done for the pragma value
1236        if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
1237            self.parse_semver = true;
1238        }
1239
1240        let token = self.next();
1241
1242        self.last_tokens = [
1243            self.last_tokens[1],
1244            match token {
1245                Some((_, n, _)) => Some(n),
1246                _ => None,
1247            },
1248        ];
1249
1250        token
1251    }
1252}
1253
1254#[cfg(test)]
1255mod tests {
1256    use super::*;
1257
1258    #[test]
1259    fn test_lexer() {
1260        let mut comments = Vec::new();
1261        let mut errors = Vec::new();
1262
1263        let multiple_errors = r#" 9ea -9e € bool hex uint8 hex"g"   /**  "#;
1264        let tokens = Lexer::new(multiple_errors, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1265        assert_eq!(
1266            tokens,
1267            vec![
1268                (3, Token::Identifier("a"), 4),
1269                (5, Token::Subtract, 6),
1270                (13, Token::Bool, 17),
1271                (18, Token::Identifier("hex"), 21),
1272                (22, Token::Uint(8), 27),
1273            ]
1274        );
1275
1276        assert_eq!(
1277            errors,
1278            vec![
1279                LexicalError::MissingExponent(Loc::File(0, 1, 42)),
1280                LexicalError::MissingExponent(Loc::File(0, 6, 42)),
1281                LexicalError::UnrecognisedToken(Loc::File(0, 9, 12), '€'.to_string()),
1282                LexicalError::InvalidCharacterInHexLiteral(Loc::File(0, 32, 33), 'g'),
1283                LexicalError::EndOfFileInComment(Loc::File(0, 37, 42)),
1284            ]
1285        );
1286
1287        let mut errors = Vec::new();
1288        let tokens = Lexer::new("bool", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1289
1290        assert_eq!(tokens, vec!((0, Token::Bool, 4)));
1291
1292        let tokens = Lexer::new("uint8", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1293
1294        assert_eq!(tokens, vec!((0, Token::Uint(8), 5)));
1295
1296        let tokens = Lexer::new("hex", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1297
1298        assert_eq!(tokens, vec!((0, Token::Identifier("hex"), 3)));
1299
1300        let tokens = Lexer::new(
1301            "hex\"cafe_dead\" /* adad*** */",
1302            0,
1303            &mut comments,
1304            &mut errors,
1305        )
1306        .collect::<Vec<_>>();
1307
1308        assert_eq!(tokens, vec!((0, Token::HexLiteral("hex\"cafe_dead\""), 14)));
1309
1310        let tokens = Lexer::new(
1311            "// foo bar\n0x00fead0_12 00090 0_0",
1312            0,
1313            &mut comments,
1314            &mut errors,
1315        )
1316        .collect::<Vec<_>>();
1317
1318        assert_eq!(
1319            tokens,
1320            vec!(
1321                (11, Token::HexNumber("0x00fead0_12"), 23),
1322                (24, Token::Number("00090", ""), 29),
1323                (30, Token::Number("0_0", ""), 33)
1324            )
1325        );
1326
1327        let tokens = Lexer::new(
1328            "// foo bar\n0x00fead0_12 9.0008 0_0",
1329            0,
1330            &mut comments,
1331            &mut errors,
1332        )
1333        .collect::<Vec<_>>();
1334
1335        assert_eq!(
1336            tokens,
1337            vec!(
1338                (11, Token::HexNumber("0x00fead0_12"), 23),
1339                (24, Token::RationalNumber("9", "0008", ""), 30),
1340                (31, Token::Number("0_0", ""), 34)
1341            )
1342        );
1343
1344        let tokens = Lexer::new(
1345            "// foo bar\n0x00fead0_12 .0008 0.9e2",
1346            0,
1347            &mut comments,
1348            &mut errors,
1349        )
1350        .collect::<Vec<_>>();
1351
1352        assert_eq!(
1353            tokens,
1354            vec!(
1355                (11, Token::HexNumber("0x00fead0_12"), 23),
1356                (24, Token::RationalNumber("", "0008", ""), 29),
1357                (30, Token::RationalNumber("0", "9", "2"), 35)
1358            )
1359        );
1360
1361        let tokens = Lexer::new(
1362            "// foo bar\n0x00fead0_12 .0008 0.9e-2-2",
1363            0,
1364            &mut comments,
1365            &mut errors,
1366        )
1367        .collect::<Vec<_>>();
1368
1369        assert_eq!(
1370            tokens,
1371            vec!(
1372                (11, Token::HexNumber("0x00fead0_12"), 23),
1373                (24, Token::RationalNumber("", "0008", ""), 29),
1374                (30, Token::RationalNumber("0", "9", "-2"), 36),
1375                (36, Token::Subtract, 37),
1376                (37, Token::Number("2", ""), 38)
1377            )
1378        );
1379
1380        let tokens = Lexer::new("1.2_3e2-", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1381
1382        assert_eq!(
1383            tokens,
1384            vec!(
1385                (0, Token::RationalNumber("1", "2_3", "2"), 7),
1386                (7, Token::Subtract, 8)
1387            )
1388        );
1389
1390        let tokens = Lexer::new("\"foo\"", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1391
1392        assert_eq!(tokens, vec!((0, Token::StringLiteral(false, "foo"), 5)));
1393
1394        let tokens = Lexer::new(
1395            "pragma solidity >=0.5.0 <0.7.0;",
1396            0,
1397            &mut comments,
1398            &mut errors,
1399        )
1400        .collect::<Vec<_>>();
1401
1402        assert_eq!(
1403            tokens,
1404            vec!(
1405                (0, Token::Pragma, 6),
1406                (7, Token::Identifier("solidity"), 15),
1407                (16, Token::MoreEqual, 18),
1408                (18, Token::Number("0", ""), 19),
1409                (19, Token::Member, 20),
1410                (20, Token::Number("5", ""), 21),
1411                (21, Token::Member, 22),
1412                (22, Token::Number("0", ""), 23),
1413                (24, Token::Less, 25),
1414                (25, Token::Number("0", ""), 26),
1415                (26, Token::Member, 27),
1416                (27, Token::Number("7", ""), 28),
1417                (28, Token::Member, 29),
1418                (29, Token::Number("0", ""), 30),
1419                (30, Token::Semicolon, 31),
1420            )
1421        );
1422
1423        let tokens = Lexer::new(
1424            "pragma solidity \t>=0.5.0 <0.7.0 \n ;",
1425            0,
1426            &mut comments,
1427            &mut errors,
1428        )
1429        .collect::<Vec<_>>();
1430
1431        assert_eq!(
1432            tokens,
1433            vec!(
1434                (0, Token::Pragma, 6),
1435                (7, Token::Identifier("solidity"), 15),
1436                (17, Token::MoreEqual, 19),
1437                (19, Token::Number("0", ""), 20),
1438                (20, Token::Member, 21),
1439                (21, Token::Number("5", ""), 22),
1440                (22, Token::Member, 23),
1441                (23, Token::Number("0", ""), 24),
1442                (25, Token::Less, 26),
1443                (26, Token::Number("0", ""), 27),
1444                (27, Token::Member, 28),
1445                (28, Token::Number("7", ""), 29),
1446                (29, Token::Member, 30),
1447                (30, Token::Number("0", ""), 31),
1448                (34, Token::Semicolon, 35),
1449            )
1450        );
1451
1452        let tokens =
1453            Lexer::new("pragma solidity 赤;", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1454
1455        assert_eq!(
1456            tokens,
1457            vec!(
1458                (0, Token::Pragma, 6),
1459                (7, Token::Identifier("solidity"), 15),
1460                (16, Token::Identifier("赤"), 19),
1461                (19, Token::Semicolon, 20)
1462            )
1463        );
1464
1465        let tokens = Lexer::new(">>= >> >= >", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1466
1467        assert_eq!(
1468            tokens,
1469            vec!(
1470                (0, Token::ShiftRightAssign, 3),
1471                (4, Token::ShiftRight, 6),
1472                (7, Token::MoreEqual, 9),
1473                (10, Token::More, 11),
1474            )
1475        );
1476
1477        let tokens = Lexer::new("<<= << <= <", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1478
1479        assert_eq!(
1480            tokens,
1481            vec!(
1482                (0, Token::ShiftLeftAssign, 3),
1483                (4, Token::ShiftLeft, 6),
1484                (7, Token::LessEqual, 9),
1485                (10, Token::Less, 11),
1486            )
1487        );
1488
1489        let tokens = Lexer::new("-16 -- - -=", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1490
1491        assert_eq!(
1492            tokens,
1493            vec!(
1494                (0, Token::Subtract, 1),
1495                (1, Token::Number("16", ""), 3),
1496                (4, Token::Decrement, 6),
1497                (7, Token::Subtract, 8),
1498                (9, Token::SubtractAssign, 11),
1499            )
1500        );
1501
1502        let tokens = Lexer::new("-4 ", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1503
1504        assert_eq!(
1505            tokens,
1506            vec!((0, Token::Subtract, 1), (1, Token::Number("4", ""), 2),)
1507        );
1508
1509        let mut errors = Vec::new();
1510        let _ = Lexer::new(r#"hex"abcdefg""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1511
1512        assert_eq!(
1513            errors,
1514            vec![LexicalError::InvalidCharacterInHexLiteral(
1515                Loc::File(0, 10, 11),
1516                'g'
1517            )]
1518        );
1519
1520        let mut errors = Vec::new();
1521        let _ = Lexer::new(r#" € "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1522
1523        assert_eq!(
1524            errors,
1525            vec!(LexicalError::UnrecognisedToken(
1526                Loc::File(0, 1, 4),
1527                "€".to_owned()
1528            ))
1529        );
1530
1531        let mut errors = Vec::new();
1532        let _ = Lexer::new(r#"€"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1533
1534        assert_eq!(
1535            errors,
1536            vec!(LexicalError::UnrecognisedToken(
1537                Loc::File(0, 0, 3),
1538                "€".to_owned()
1539            ))
1540        );
1541
1542        let tokens =
1543            Lexer::new(r#"pragma foo bar"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1544
1545        assert_eq!(
1546            tokens,
1547            vec!(
1548                (0, Token::Pragma, 6),
1549                (7, Token::Identifier("foo"), 10),
1550                (11, Token::Identifier("bar"), 14),
1551            )
1552        );
1553
1554        comments.truncate(0);
1555
1556        let tokens = Lexer::new(r#"/// foo"#, 0, &mut comments, &mut errors).count();
1557
1558        assert_eq!(tokens, 0);
1559        assert_eq!(
1560            comments,
1561            vec![Comment::DocLine(Loc::File(0, 0, 7), "/// foo".to_owned())],
1562        );
1563
1564        comments.truncate(0);
1565
1566        let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1567
1568        assert_eq!(tokens, 0);
1569        assert_eq!(
1570            comments,
1571            vec!(
1572                Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1573                Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1574            )
1575        );
1576
1577        comments.truncate(0);
1578
1579        let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1580
1581        assert_eq!(tokens, 0);
1582        assert_eq!(
1583            comments,
1584            vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1585        );
1586
1587        comments.truncate(0);
1588
1589        let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1590
1591        assert_eq!(tokens, 0);
1592        assert_eq!(
1593            comments,
1594            vec!(Comment::DocBlock(
1595                Loc::File(0, 0, 10),
1596                "/** foo */".to_owned()
1597            ))
1598        );
1599
1600        comments.truncate(0);
1601
1602        let tokens = Lexer::new(
1603            "/** jadajadadjada */\n/* bar */",
1604            0,
1605            &mut comments,
1606            &mut errors,
1607        )
1608        .count();
1609
1610        assert_eq!(tokens, 0);
1611        assert_eq!(
1612            comments,
1613            vec!(
1614                Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1615                Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1616            )
1617        );
1618
1619        let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1620        assert_eq!(tokens, None);
1621
1622        let mut errors = Vec::new();
1623        let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1624        assert_eq!(
1625            errors,
1626            vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1627        );
1628
1629        let mut errors = Vec::new();
1630        let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1631        assert_eq!(tokens, None);
1632
1633        // some unicode tests
1634        let tokens = Lexer::new(
1635            ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1636            0,
1637            &mut comments,
1638            &mut errors,
1639        )
1640        .collect::<Vec<_>>();
1641
1642        assert_eq!(
1643            tokens,
1644            vec!(
1645                (0, Token::MoreEqual, 2),
1646                (5, Token::Member, 6),
1647                (7, Token::Identifier("très"), 12),
1648                (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1649                (65, Token::Identifier("カラス"), 74)
1650            )
1651        );
1652
1653        let tokens = Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1654
1655        assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1656
1657        let tokens =
1658            Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1659
1660        assert_eq!(
1661            tokens,
1662            vec!(
1663                (0, Token::Identifier("unicode"), 7),
1664                (8, Token::StringLiteral(false, "€"), 13),
1665            )
1666        );
1667
1668        // scientific notation
1669        let tokens = Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1670
1671        assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1672
1673        let tokens = Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1674
1675        assert_eq!(
1676            tokens,
1677            vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1678        );
1679
1680        let mut errors = Vec::new();
1681        let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1682
1683        assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1684        assert_eq!(
1685            errors,
1686            vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1687        );
1688
1689        let mut errors = Vec::new();
1690        let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1691
1692        assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1693        assert_eq!(
1694            errors,
1695            vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1696        );
1697
1698        let mut errors = Vec::new();
1699        let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1700
1701        assert_eq!(
1702            tokens,
1703            vec!(
1704                (0, Token::Number("42", ""), 2),
1705                (2, Token::Member, 3),
1706                (3, Token::Identifier("a"), 4)
1707            )
1708        );
1709
1710        let tokens = Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1711
1712        assert_eq!(
1713            tokens,
1714            vec!(
1715                (0, Token::Number("42", ""), 2),
1716                (2, Token::Member, 3),
1717                (3, Token::Member, 4),
1718                (4, Token::Identifier("a"), 5)
1719            )
1720        );
1721
1722        comments.truncate(0);
1723
1724        let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1725
1726        assert_eq!(tokens, 0);
1727        assert_eq!(
1728            comments,
1729            vec!(
1730                Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1731                Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1732            )
1733        );
1734
1735        comments.truncate(0);
1736
1737        let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1738
1739        assert_eq!(tokens, 0);
1740        assert_eq!(
1741            comments,
1742            vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1743        );
1744
1745        comments.truncate(0);
1746
1747        let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1748
1749        assert_eq!(tokens, 0);
1750        assert_eq!(
1751            comments,
1752            vec!(Comment::DocBlock(
1753                Loc::File(0, 0, 10),
1754                "/** foo */".to_owned()
1755            ))
1756        );
1757
1758        comments.truncate(0);
1759
1760        let tokens = Lexer::new(
1761            "/** jadajadadjada */\n/* bar */",
1762            0,
1763            &mut comments,
1764            &mut errors,
1765        )
1766        .count();
1767
1768        assert_eq!(tokens, 0);
1769        assert_eq!(
1770            comments,
1771            vec!(
1772                Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1773                Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1774            )
1775        );
1776
1777        let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1778        assert_eq!(tokens, None);
1779
1780        let mut errors = Vec::new();
1781        let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1782        assert_eq!(
1783            errors,
1784            vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1785        );
1786
1787        let mut errors = Vec::new();
1788        let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1789        assert_eq!(tokens, None);
1790
1791        // some unicode tests
1792        let tokens = Lexer::new(
1793            ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1794            0,
1795            &mut comments,
1796            &mut errors,
1797        )
1798        .collect::<Vec<(usize, Token, usize)>>();
1799
1800        assert_eq!(
1801            tokens,
1802            vec!(
1803                (0, Token::MoreEqual, 2),
1804                (5, Token::Member, 6),
1805                (7, Token::Identifier("très"), 12),
1806                (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1807                (65, Token::Identifier("カラス"), 74)
1808            )
1809        );
1810
1811        let tokens =
1812            Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors)
1813                .collect::<Vec<(usize, Token, usize)>>();
1814
1815        assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1816
1817        let tokens =
1818            Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors)
1819                .collect::<Vec<(usize, Token, usize)>>();
1820
1821        assert_eq!(
1822            tokens,
1823            vec!(
1824                (0, Token::Identifier("unicode"), 7),
1825                (8, Token::StringLiteral(false, "€"), 13),
1826            )
1827        );
1828
1829        // scientific notation
1830        let tokens =
1831            Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors)
1832                .collect::<Vec<(usize, Token, usize)>>();
1833
1834        assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1835
1836        let tokens =
1837            Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors)
1838                .collect::<Vec<(usize, Token, usize)>>();
1839
1840        assert_eq!(
1841            tokens,
1842            vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1843        );
1844
1845        let mut errors = Vec::new();
1846        let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors)
1847            .collect::<Vec<(usize, Token, usize)>>();
1848
1849        assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1850        assert_eq!(
1851            errors,
1852            vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1853        );
1854
1855        let mut errors = Vec::new();
1856        let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors)
1857            .collect::<Vec<(usize, Token, usize)>>();
1858
1859        assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1860        assert_eq!(
1861            errors,
1862            vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1863        );
1864
1865        let mut errors = Vec::new();
1866        let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors)
1867            .collect::<Vec<(usize, Token, usize)>>();
1868
1869        assert_eq!(
1870            tokens,
1871            vec!(
1872                (0, Token::Number("42", ""), 2),
1873                (2, Token::Member, 3),
1874                (3, Token::Identifier("a"), 4)
1875            )
1876        );
1877
1878        let tokens =
1879            Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors)
1880                .collect::<Vec<(usize, Token, usize)>>();
1881
1882        assert_eq!(
1883            tokens,
1884            vec!(
1885                (0, Token::Number("42", ""), 2),
1886                (2, Token::Member, 3),
1887                (3, Token::Member, 4),
1888                (4, Token::Identifier("a"), 5)
1889            )
1890        );
1891
1892        let mut errors = Vec::new();
1893        let _ = Lexer::new(r#"hex"g""#, 0, &mut comments, &mut errors)
1894            .collect::<Vec<(usize, Token, usize)>>();
1895        assert_eq!(
1896            errors,
1897            vec!(LexicalError::InvalidCharacterInHexLiteral(
1898                Loc::File(0, 4, 5),
1899                'g'
1900            ),)
1901        );
1902
1903        let mut errors = Vec::new();
1904        let tokens =
1905            Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<(usize, Token, usize)>>();
1906
1907        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1908
1909        let mut errors = Vec::new();
1910        let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors)
1911            .collect::<Vec<(usize, Token, usize)>>();
1912
1913        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1914
1915        let mut errors = Vec::new();
1916        let tokens = Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1917
1918        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1919
1920        let mut errors = Vec::new();
1921        let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1922
1923        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1924
1925        errors.clear();
1926        comments.clear();
1927        let tokens =
1928            Lexer::new("@my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1929        assert_eq!(tokens, vec![(0, Token::Annotation("my_annotation"), 14)]);
1930        assert!(errors.is_empty());
1931        assert!(comments.is_empty());
1932
1933        errors.clear();
1934        comments.clear();
1935        let tokens =
1936            Lexer::new("@ my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1937        assert_eq!(tokens, vec![(2, Token::Identifier("my_annotation"), 15)]);
1938        assert_eq!(
1939            errors,
1940            vec![LexicalError::UnrecognisedToken(
1941                Loc::File(0, 0, 1),
1942                "@".to_string()
1943            )]
1944        );
1945        assert!(comments.is_empty());
1946    }
1947}