solang_parser/
lexer.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Custom Solidity lexer.
4//!
5//! Solidity needs a custom lexer for two reasons:
6//!  - comments and doc comments
7//!  - pragma value is [^;]+
8
9use crate::pt::{Comment, Loc};
10use itertools::{peek_nth, PeekNth};
11use phf::phf_map;
12use std::{fmt, str::CharIndices};
13use thiserror::Error;
14use unicode_xid::UnicodeXID;
15
16/// A spanned [Token].
17pub type Spanned<'a> = (usize, Token<'a>, usize);
18
19/// [Lexer]'s Result type.
20pub type Result<'a, T = Spanned<'a>, E = LexicalError> = std::result::Result<T, E>;
21
22/// A Solidity lexical token. Produced by [Lexer].
23#[derive(Copy, Clone, PartialEq, Eq, Debug)]
24#[allow(missing_docs)]
25pub enum Token<'input> {
26    Identifier(&'input str),
27    /// `(unicode, literal)`
28    StringLiteral(bool, &'input str),
29    AddressLiteral(&'input str),
30    HexLiteral(&'input str),
31    /// `(number, exponent)`
32    Number(&'input str, &'input str),
33    /// `(number, fraction, exponent)`
34    RationalNumber(&'input str, &'input str, &'input str),
35    HexNumber(&'input str),
36    Divide,
37    Contract,
38    Library,
39    Interface,
40    Function,
41    Pragma,
42    Import,
43
44    Struct,
45    Event,
46    Enum,
47    Type,
48
49    Memory,
50    Storage,
51    Calldata,
52
53    Public,
54    Private,
55    Internal,
56    External,
57
58    Constant,
59
60    New,
61    Delete,
62
63    Pure,
64    View,
65    Payable,
66
67    Do,
68    Continue,
69    Break,
70
71    Throw,
72    Emit,
73    Return,
74    Returns,
75    Revert,
76
77    Uint(u16),
78    Int(u16),
79    Bytes(u8),
80    // prior to 0.8.0 `byte` used to be an alias for `bytes1`
81    Byte,
82    DynamicBytes,
83    Bool,
84    Address,
85    String,
86
87    Semicolon,
88    Comma,
89    OpenParenthesis,
90    CloseParenthesis,
91    OpenCurlyBrace,
92    CloseCurlyBrace,
93
94    BitwiseOr,
95    BitwiseOrAssign,
96    Or,
97
98    BitwiseXor,
99    BitwiseXorAssign,
100
101    BitwiseAnd,
102    BitwiseAndAssign,
103    And,
104
105    AddAssign,
106    Increment,
107    Add,
108
109    SubtractAssign,
110    Decrement,
111    Subtract,
112
113    MulAssign,
114    Mul,
115    Power,
116    DivideAssign,
117    ModuloAssign,
118    Modulo,
119
120    Equal,
121    Assign,
122    ColonAssign,
123
124    NotEqual,
125    Not,
126
127    True,
128    False,
129    Else,
130    Anonymous,
131    For,
132    While,
133    If,
134
135    ShiftRight,
136    ShiftRightAssign,
137    Less,
138    LessEqual,
139
140    ShiftLeft,
141    ShiftLeftAssign,
142    More,
143    MoreEqual,
144
145    Constructor,
146    Indexed,
147
148    Member,
149    Colon,
150    OpenBracket,
151    CloseBracket,
152    BitwiseNot,
153    Question,
154
155    Mapping,
156    Arrow,
157
158    Try,
159    Catch,
160
161    Receive,
162    Fallback,
163
164    As,
165    Is,
166    Abstract,
167    Virtual,
168    Override,
169    Using,
170    Modifier,
171    Immutable,
172    Unchecked,
173
174    Assembly,
175    Let,
176    Leave,
177    Switch,
178    Case,
179    Default,
180    YulArrow,
181
182    // Storage types for Soroban
183    Persistent,
184    Temporary,
185    Instance,
186
187    Annotation(&'input str),
188}
189
190impl fmt::Display for Token<'_> {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        match self {
193            Token::Identifier(id) => write!(f, "{id}"),
194            Token::StringLiteral(false, s) => write!(f, "\"{s}\""),
195            Token::StringLiteral(true, s) => write!(f, "unicode\"{s}\""),
196            Token::HexLiteral(hex) => write!(f, "{hex}"),
197            Token::AddressLiteral(address) => write!(f, "{address}"),
198            Token::Number(integer, "") => write!(f, "{integer}"),
199            Token::Number(integer, exp) => write!(f, "{integer}e{exp}"),
200            Token::RationalNumber(integer, fraction, "") => {
201                write!(f, "{integer}.{fraction}")
202            }
203            Token::RationalNumber(integer, fraction, exp) => {
204                write!(f, "{integer}.{fraction}e{exp}")
205            }
206            Token::HexNumber(n) => write!(f, "{n}"),
207            Token::Uint(w) => write!(f, "uint{w}"),
208            Token::Int(w) => write!(f, "int{w}"),
209            Token::Bytes(w) => write!(f, "bytes{w}"),
210            Token::Byte => write!(f, "byte"),
211            Token::DynamicBytes => write!(f, "bytes"),
212            Token::Semicolon => write!(f, ";"),
213            Token::Comma => write!(f, ","),
214            Token::OpenParenthesis => write!(f, "("),
215            Token::CloseParenthesis => write!(f, ")"),
216            Token::OpenCurlyBrace => write!(f, "{{"),
217            Token::CloseCurlyBrace => write!(f, "}}"),
218            Token::BitwiseOr => write!(f, "|"),
219            Token::BitwiseOrAssign => write!(f, "|="),
220            Token::Or => write!(f, "||"),
221            Token::BitwiseXor => write!(f, "^"),
222            Token::BitwiseXorAssign => write!(f, "^="),
223            Token::BitwiseAnd => write!(f, "&"),
224            Token::BitwiseAndAssign => write!(f, "&="),
225            Token::And => write!(f, "&&"),
226            Token::AddAssign => write!(f, "+="),
227            Token::Increment => write!(f, "++"),
228            Token::Add => write!(f, "+"),
229            Token::SubtractAssign => write!(f, "-="),
230            Token::Decrement => write!(f, "--"),
231            Token::Subtract => write!(f, "-"),
232            Token::MulAssign => write!(f, "*="),
233            Token::Mul => write!(f, "*"),
234            Token::Power => write!(f, "**"),
235            Token::Divide => write!(f, "/"),
236            Token::DivideAssign => write!(f, "/="),
237            Token::ModuloAssign => write!(f, "%="),
238            Token::Modulo => write!(f, "%"),
239            Token::Equal => write!(f, "=="),
240            Token::Assign => write!(f, "="),
241            Token::ColonAssign => write!(f, ":="),
242            Token::NotEqual => write!(f, "!="),
243            Token::Not => write!(f, "!"),
244            Token::ShiftLeft => write!(f, "<<"),
245            Token::ShiftLeftAssign => write!(f, "<<="),
246            Token::More => write!(f, ">"),
247            Token::MoreEqual => write!(f, ">="),
248            Token::Member => write!(f, "."),
249            Token::Colon => write!(f, ":"),
250            Token::OpenBracket => write!(f, "["),
251            Token::CloseBracket => write!(f, "]"),
252            Token::BitwiseNot => write!(f, "~"),
253            Token::Question => write!(f, "?"),
254            Token::ShiftRightAssign => write!(f, ">>="),
255            Token::ShiftRight => write!(f, ">>"),
256            Token::Less => write!(f, "<"),
257            Token::LessEqual => write!(f, "<="),
258            Token::Bool => write!(f, "bool"),
259            Token::Address => write!(f, "address"),
260            Token::String => write!(f, "string"),
261            Token::Contract => write!(f, "contract"),
262            Token::Library => write!(f, "library"),
263            Token::Interface => write!(f, "interface"),
264            Token::Function => write!(f, "function"),
265            Token::Pragma => write!(f, "pragma"),
266            Token::Import => write!(f, "import"),
267            Token::Struct => write!(f, "struct"),
268            Token::Event => write!(f, "event"),
269            Token::Enum => write!(f, "enum"),
270            Token::Type => write!(f, "type"),
271            Token::Memory => write!(f, "memory"),
272            Token::Storage => write!(f, "storage"),
273            Token::Calldata => write!(f, "calldata"),
274            Token::Public => write!(f, "public"),
275            Token::Private => write!(f, "private"),
276            Token::Internal => write!(f, "internal"),
277            Token::External => write!(f, "external"),
278            Token::Constant => write!(f, "constant"),
279            Token::New => write!(f, "new"),
280            Token::Delete => write!(f, "delete"),
281            Token::Pure => write!(f, "pure"),
282            Token::View => write!(f, "view"),
283            Token::Payable => write!(f, "payable"),
284            Token::Do => write!(f, "do"),
285            Token::Continue => write!(f, "continue"),
286            Token::Break => write!(f, "break"),
287            Token::Throw => write!(f, "throw"),
288            Token::Emit => write!(f, "emit"),
289            Token::Return => write!(f, "return"),
290            Token::Returns => write!(f, "returns"),
291            Token::Revert => write!(f, "revert"),
292            Token::True => write!(f, "true"),
293            Token::False => write!(f, "false"),
294            Token::Else => write!(f, "else"),
295            Token::Anonymous => write!(f, "anonymous"),
296            Token::For => write!(f, "for"),
297            Token::While => write!(f, "while"),
298            Token::If => write!(f, "if"),
299            Token::Constructor => write!(f, "constructor"),
300            Token::Indexed => write!(f, "indexed"),
301            Token::Mapping => write!(f, "mapping"),
302            Token::Arrow => write!(f, "=>"),
303            Token::Try => write!(f, "try"),
304            Token::Catch => write!(f, "catch"),
305            Token::Receive => write!(f, "receive"),
306            Token::Fallback => write!(f, "fallback"),
307            Token::As => write!(f, "as"),
308            Token::Is => write!(f, "is"),
309            Token::Abstract => write!(f, "abstract"),
310            Token::Virtual => write!(f, "virtual"),
311            Token::Override => write!(f, "override"),
312            Token::Using => write!(f, "using"),
313            Token::Modifier => write!(f, "modifier"),
314            Token::Immutable => write!(f, "immutable"),
315            Token::Unchecked => write!(f, "unchecked"),
316            Token::Assembly => write!(f, "assembly"),
317            Token::Let => write!(f, "let"),
318            Token::Leave => write!(f, "leave"),
319            Token::Switch => write!(f, "switch"),
320            Token::Case => write!(f, "case"),
321            Token::Default => write!(f, "default"),
322            Token::YulArrow => write!(f, "->"),
323            Token::Annotation(name) => write!(f, "@{name}"),
324            Token::Persistent => write!(f, "persistent"),
325            Token::Temporary => write!(f, "temporary"),
326            Token::Instance => write!(f, "instance"),
327        }
328    }
329}
330
331/// Custom Solidity lexer.
332///
333/// # Examples
334///
335/// ```
336/// use solang_parser::lexer::{Lexer, Token};
337///
338/// let source = "uint256 number = 0;";
339/// let mut comments = Vec::new();
340/// let mut errors = Vec::new();
341/// let mut lexer = Lexer::new(source, 0, &mut comments, &mut errors);
342///
343/// let mut next_token = || lexer.next().map(|(_, token, _)| token);
344/// assert_eq!(next_token(), Some(Token::Uint(256)));
345/// assert_eq!(next_token(), Some(Token::Identifier("number")));
346/// assert_eq!(next_token(), Some(Token::Assign));
347/// assert_eq!(next_token(), Some(Token::Number("0", "")));
348/// assert_eq!(next_token(), Some(Token::Semicolon));
349/// assert_eq!(next_token(), None);
350/// assert!(errors.is_empty());
351/// assert!(comments.is_empty());
352/// ```
353#[derive(Debug)]
354pub struct Lexer<'input> {
355    input: &'input str,
356    chars: PeekNth<CharIndices<'input>>,
357    comments: &'input mut Vec<Comment>,
358    file_no: usize,
359    /// While parsing version semver, do not parse rational numbers
360    parse_semver: bool,
361    last_tokens: [Option<Token<'input>>; 2],
362    /// The mutable reference to the error vector.
363    pub errors: &'input mut Vec<LexicalError>,
364}
365
366/// An error thrown by [Lexer].
367#[derive(Debug, Clone, PartialEq, Eq, Error)]
368#[allow(missing_docs)]
369pub enum LexicalError {
370    #[error("end of file found in comment")]
371    EndOfFileInComment(Loc),
372
373    #[error("end of file found in string literal")]
374    EndOfFileInString(Loc),
375
376    #[error("end of file found in hex literal string")]
377    EndofFileInHex(Loc),
378
379    #[error("missing number")]
380    MissingNumber(Loc),
381
382    #[error("invalid character '{1}' in hex literal string")]
383    InvalidCharacterInHexLiteral(Loc, char),
384
385    #[error("unrecognised token '{1}'")]
386    UnrecognisedToken(Loc, String),
387
388    #[error("missing exponent")]
389    MissingExponent(Loc),
390
391    #[error("'{1}' found where 'from' expected")]
392    ExpectedFrom(Loc, String),
393}
394
395/// Returns whether `word` is a keyword in Solidity.
396pub fn is_keyword(word: &str) -> bool {
397    KEYWORDS.contains_key(word)
398}
399
400static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
401    "address" => Token::Address,
402    "anonymous" => Token::Anonymous,
403    "bool" => Token::Bool,
404    "break" => Token::Break,
405    "bytes1" => Token::Bytes(1),
406    "bytes2" => Token::Bytes(2),
407    "bytes3" => Token::Bytes(3),
408    "bytes4" => Token::Bytes(4),
409    "bytes5" => Token::Bytes(5),
410    "bytes6" => Token::Bytes(6),
411    "bytes7" => Token::Bytes(7),
412    "bytes8" => Token::Bytes(8),
413    "bytes9" => Token::Bytes(9),
414    "bytes10" => Token::Bytes(10),
415    "bytes11" => Token::Bytes(11),
416    "bytes12" => Token::Bytes(12),
417    "bytes13" => Token::Bytes(13),
418    "bytes14" => Token::Bytes(14),
419    "bytes15" => Token::Bytes(15),
420    "bytes16" => Token::Bytes(16),
421    "bytes17" => Token::Bytes(17),
422    "bytes18" => Token::Bytes(18),
423    "bytes19" => Token::Bytes(19),
424    "bytes20" => Token::Bytes(20),
425    "bytes21" => Token::Bytes(21),
426    "bytes22" => Token::Bytes(22),
427    "bytes23" => Token::Bytes(23),
428    "bytes24" => Token::Bytes(24),
429    "bytes25" => Token::Bytes(25),
430    "bytes26" => Token::Bytes(26),
431    "bytes27" => Token::Bytes(27),
432    "bytes28" => Token::Bytes(28),
433    "bytes29" => Token::Bytes(29),
434    "bytes30" => Token::Bytes(30),
435    "bytes31" => Token::Bytes(31),
436    "bytes32" => Token::Bytes(32),
437    "bytes" => Token::DynamicBytes,
438    "byte" => Token::Byte,
439    "calldata" => Token::Calldata,
440    "case" => Token::Case,
441    "constant" => Token::Constant,
442    "constructor" => Token::Constructor,
443    "continue" => Token::Continue,
444    "contract" => Token::Contract,
445    "default" => Token::Default,
446    "delete" => Token::Delete,
447    "do" => Token::Do,
448    "else" => Token::Else,
449    "emit" => Token::Emit,
450    "enum" => Token::Enum,
451    "event" => Token::Event,
452    "external" => Token::External,
453    "false" => Token::False,
454    "for" => Token::For,
455    "function" => Token::Function,
456    "if" => Token::If,
457    "import" => Token::Import,
458    "indexed" => Token::Indexed,
459    "int8" => Token::Int(8),
460    "int16" => Token::Int(16),
461    "int24" => Token::Int(24),
462    "int32" => Token::Int(32),
463    "int40" => Token::Int(40),
464    "int48" => Token::Int(48),
465    "int56" => Token::Int(56),
466    "int64" => Token::Int(64),
467    "int72" => Token::Int(72),
468    "int80" => Token::Int(80),
469    "int88" => Token::Int(88),
470    "int96" => Token::Int(96),
471    "int104" => Token::Int(104),
472    "int112" => Token::Int(112),
473    "int120" => Token::Int(120),
474    "int128" => Token::Int(128),
475    "int136" => Token::Int(136),
476    "int144" => Token::Int(144),
477    "int152" => Token::Int(152),
478    "int160" => Token::Int(160),
479    "int168" => Token::Int(168),
480    "int176" => Token::Int(176),
481    "int184" => Token::Int(184),
482    "int192" => Token::Int(192),
483    "int200" => Token::Int(200),
484    "int208" => Token::Int(208),
485    "int216" => Token::Int(216),
486    "int224" => Token::Int(224),
487    "int232" => Token::Int(232),
488    "int240" => Token::Int(240),
489    "int248" => Token::Int(248),
490    "int256" => Token::Int(256),
491    "interface" => Token::Interface,
492    "internal" => Token::Internal,
493    "int" => Token::Int(256),
494    "leave" => Token::Leave,
495    "library" => Token::Library,
496    "mapping" => Token::Mapping,
497    "memory" => Token::Memory,
498    "new" => Token::New,
499    "payable" => Token::Payable,
500    "pragma" => Token::Pragma,
501    "private" => Token::Private,
502    "public" => Token::Public,
503    "pure" => Token::Pure,
504    "returns" => Token::Returns,
505    "return" => Token::Return,
506    "revert" => Token::Revert,
507    "storage" => Token::Storage,
508    "string" => Token::String,
509    "struct" => Token::Struct,
510    "switch" => Token::Switch,
511    "throw" => Token::Throw,
512    "true" => Token::True,
513    "type" => Token::Type,
514    "uint8" => Token::Uint(8),
515    "uint16" => Token::Uint(16),
516    "uint24" => Token::Uint(24),
517    "uint32" => Token::Uint(32),
518    "uint40" => Token::Uint(40),
519    "uint48" => Token::Uint(48),
520    "uint56" => Token::Uint(56),
521    "uint64" => Token::Uint(64),
522    "uint72" => Token::Uint(72),
523    "uint80" => Token::Uint(80),
524    "uint88" => Token::Uint(88),
525    "uint96" => Token::Uint(96),
526    "uint104" => Token::Uint(104),
527    "uint112" => Token::Uint(112),
528    "uint120" => Token::Uint(120),
529    "uint128" => Token::Uint(128),
530    "uint136" => Token::Uint(136),
531    "uint144" => Token::Uint(144),
532    "uint152" => Token::Uint(152),
533    "uint160" => Token::Uint(160),
534    "uint168" => Token::Uint(168),
535    "uint176" => Token::Uint(176),
536    "uint184" => Token::Uint(184),
537    "uint192" => Token::Uint(192),
538    "uint200" => Token::Uint(200),
539    "uint208" => Token::Uint(208),
540    "uint216" => Token::Uint(216),
541    "uint224" => Token::Uint(224),
542    "uint232" => Token::Uint(232),
543    "uint240" => Token::Uint(240),
544    "uint248" => Token::Uint(248),
545    "uint256" => Token::Uint(256),
546    "uint" => Token::Uint(256),
547    "view" => Token::View,
548    "while" => Token::While,
549    "try" => Token::Try,
550    "catch" => Token::Catch,
551    "receive" => Token::Receive,
552    "fallback" => Token::Fallback,
553    "as" => Token::As,
554    "is" => Token::Is,
555    "abstract" => Token::Abstract,
556    "virtual" => Token::Virtual,
557    "override" => Token::Override,
558    "using" => Token::Using,
559    "modifier" => Token::Modifier,
560    "immutable" => Token::Immutable,
561    "unchecked" => Token::Unchecked,
562    "assembly" => Token::Assembly,
563    "let" => Token::Let,
564    "persistent" => Token::Persistent,
565    "temporary" => Token::Temporary,
566    "instance" => Token::Instance,
567};
568
569impl<'input> Lexer<'input> {
570    /// Instantiates a new Lexer.
571    ///
572    /// # Examples
573    ///
574    /// ```
575    /// use solang_parser::lexer::Lexer;
576    ///
577    /// let source = "uint256 number = 0;";
578    /// let mut comments = Vec::new();
579    /// let mut errors = Vec::new();
580    /// let mut lexer = Lexer::new(source, 0, &mut comments, &mut errors);
581    /// ```
582    pub fn new(
583        input: &'input str,
584        file_no: usize,
585        comments: &'input mut Vec<Comment>,
586        errors: &'input mut Vec<LexicalError>,
587    ) -> Self {
588        Lexer {
589            input,
590            chars: peek_nth(input.char_indices()),
591            comments,
592            file_no,
593            parse_semver: false,
594            last_tokens: [None, None],
595            errors,
596        }
597    }
598
599    fn parse_number(&mut self, mut start: usize, ch: char) -> Result<'input> {
600        let mut is_rational = false;
601        if ch == '0' {
602            if let Some((_, 'x')) = self.chars.peek() {
603                // hex number
604                self.chars.next();
605
606                let mut end = match self.chars.next() {
607                    Some((end, ch)) if ch.is_ascii_hexdigit() => end,
608                    Some((..)) => {
609                        return Err(LexicalError::MissingNumber(Loc::File(
610                            self.file_no,
611                            start,
612                            start + 1,
613                        )));
614                    }
615                    None => {
616                        return Err(LexicalError::EndofFileInHex(Loc::File(
617                            self.file_no,
618                            start,
619                            self.input.len(),
620                        )));
621                    }
622                };
623
624                while let Some((i, ch)) = self.chars.peek() {
625                    if !ch.is_ascii_hexdigit() && *ch != '_' {
626                        break;
627                    }
628                    end = *i;
629                    self.chars.next();
630                }
631
632                return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
633            }
634        }
635
636        if ch == '.' {
637            is_rational = true;
638            start -= 1;
639        }
640
641        let mut end = start;
642        while let Some((i, ch)) = self.chars.peek() {
643            if !ch.is_ascii_digit() && *ch != '_' {
644                break;
645            }
646            end = *i;
647            self.chars.next();
648        }
649
650        if self.parse_semver {
651            let integer = &self.input[start..=end];
652            let exp = &self.input[0..0];
653
654            return Ok((start, Token::Number(integer, exp), end + 1));
655        }
656
657        let mut rational_end = end;
658        let mut end_before_rational = end + 1;
659        let mut rational_start = end;
660        if is_rational {
661            end_before_rational = start;
662            rational_start = start + 1;
663        }
664
665        if let Some((_, '.')) = self.chars.peek() {
666            if let Some((i, ch)) = self.chars.peek_nth(1) {
667                if ch.is_ascii_digit() && !is_rational {
668                    rational_start = *i;
669                    rational_end = *i;
670                    is_rational = true;
671                    self.chars.next(); // advance over '.'
672                    while let Some((i, ch)) = self.chars.peek() {
673                        if !ch.is_ascii_digit() && *ch != '_' {
674                            break;
675                        }
676                        rational_end = *i;
677                        end = *i;
678                        self.chars.next();
679                    }
680                }
681            }
682        }
683
684        let old_end = end;
685        let mut exp_start = end + 1;
686
687        if let Some((i, 'e' | 'E')) = self.chars.peek() {
688            exp_start = *i + 1;
689            self.chars.next();
690            // Negative exponent
691            while matches!(self.chars.peek(), Some((_, '-'))) {
692                self.chars.next();
693            }
694            while let Some((i, ch)) = self.chars.peek() {
695                if !ch.is_ascii_digit() && *ch != '_' {
696                    break;
697                }
698                end = *i;
699                self.chars.next();
700            }
701
702            if exp_start > end {
703                return Err(LexicalError::MissingExponent(Loc::File(
704                    self.file_no,
705                    start,
706                    self.input.len(),
707                )));
708            }
709        }
710
711        if is_rational {
712            let integer = &self.input[start..end_before_rational];
713            let fraction = &self.input[rational_start..=rational_end];
714            let exp = &self.input[exp_start..=end];
715
716            return Ok((
717                start,
718                Token::RationalNumber(integer, fraction, exp),
719                end + 1,
720            ));
721        }
722
723        let integer = &self.input[start..=old_end];
724        let exp = &self.input[exp_start..=end];
725
726        Ok((start, Token::Number(integer, exp), end + 1))
727    }
728
729    fn string(
730        &mut self,
731        unicode: bool,
732        token_start: usize,
733        string_start: usize,
734        quote_char: char,
735    ) -> Result<'input> {
736        let mut end;
737
738        let mut last_was_escape = false;
739
740        loop {
741            if let Some((i, ch)) = self.chars.next() {
742                end = i;
743                if !last_was_escape {
744                    if ch == quote_char {
745                        break;
746                    }
747                    last_was_escape = ch == '\\';
748                } else {
749                    last_was_escape = false;
750                }
751            } else {
752                return Err(LexicalError::EndOfFileInString(Loc::File(
753                    self.file_no,
754                    token_start,
755                    self.input.len(),
756                )));
757            }
758        }
759
760        Ok((
761            token_start,
762            Token::StringLiteral(unicode, &self.input[string_start..end]),
763            end + 1,
764        ))
765    }
766
767    fn next(&mut self) -> Option<Spanned<'input>> {
768        'toplevel: loop {
769            match self.chars.next() {
770                Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
771                    let (id, end) = self.match_identifier(start);
772
773                    if id == "unicode" {
774                        match self.chars.peek() {
775                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
776                                let quote_char = *quote_char;
777
778                                self.chars.next();
779                                let str_res = self.string(true, start, start + 8, quote_char);
780                                match str_res {
781                                    Err(lex_err) => self.errors.push(lex_err),
782                                    Ok(val) => return Some(val),
783                                }
784                            }
785                            _ => (),
786                        }
787                    }
788
789                    if id == "hex" {
790                        match self.chars.peek() {
791                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
792                                let quote_char = *quote_char;
793
794                                self.chars.next();
795
796                                for (i, ch) in &mut self.chars {
797                                    if ch == quote_char {
798                                        return Some((
799                                            start,
800                                            Token::HexLiteral(&self.input[start..=i]),
801                                            i + 1,
802                                        ));
803                                    }
804
805                                    if !ch.is_ascii_hexdigit() && ch != '_' {
806                                        // Eat up the remainer of the string
807                                        for (_, ch) in &mut self.chars {
808                                            if ch == quote_char {
809                                                break;
810                                            }
811                                        }
812
813                                        self.errors.push(
814                                            LexicalError::InvalidCharacterInHexLiteral(
815                                                Loc::File(self.file_no, i, i + 1),
816                                                ch,
817                                            ),
818                                        );
819                                        continue 'toplevel;
820                                    }
821                                }
822
823                                self.errors.push(LexicalError::EndOfFileInString(Loc::File(
824                                    self.file_no,
825                                    start,
826                                    self.input.len(),
827                                )));
828                                return None;
829                            }
830                            _ => (),
831                        }
832                    }
833
834                    if id == "address" {
835                        match self.chars.peek() {
836                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
837                                let quote_char = *quote_char;
838
839                                self.chars.next();
840
841                                for (i, ch) in &mut self.chars {
842                                    if ch == quote_char {
843                                        return Some((
844                                            start,
845                                            Token::AddressLiteral(&self.input[start..=i]),
846                                            i + 1,
847                                        ));
848                                    }
849                                }
850
851                                self.errors.push(LexicalError::EndOfFileInString(Loc::File(
852                                    self.file_no,
853                                    start,
854                                    self.input.len(),
855                                )));
856                                return None;
857                            }
858                            _ => (),
859                        }
860                    }
861
862                    return if let Some(w) = KEYWORDS.get(id) {
863                        Some((start, *w, end))
864                    } else {
865                        Some((start, Token::Identifier(id), end))
866                    };
867                }
868                Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
869                    let str_res = self.string(false, start, start + 1, quote_char);
870                    match str_res {
871                        Err(lex_err) => self.errors.push(lex_err),
872                        Ok(val) => return Some(val),
873                    }
874                }
875                Some((start, '/')) => {
876                    match self.chars.peek() {
877                        Some((_, '=')) => {
878                            self.chars.next();
879                            return Some((start, Token::DivideAssign, start + 2));
880                        }
881                        Some((_, '/')) => {
882                            // line comment
883                            self.chars.next();
884
885                            let mut newline = false;
886
887                            let doc_comment = match self.chars.next() {
888                                Some((_, '/')) => {
889                                    // ///(/)+ is still a line comment
890                                    !matches!(self.chars.peek(), Some((_, '/')))
891                                }
892                                Some((_, ch)) if ch == '\n' || ch == '\r' => {
893                                    newline = true;
894                                    false
895                                }
896                                _ => false,
897                            };
898
899                            let mut last = start + 3;
900
901                            if !newline {
902                                loop {
903                                    match self.chars.next() {
904                                        None => {
905                                            last = self.input.len();
906                                            break;
907                                        }
908                                        Some((offset, '\n' | '\r')) => {
909                                            last = offset;
910                                            break;
911                                        }
912                                        Some(_) => (),
913                                    }
914                                }
915                            }
916
917                            if doc_comment {
918                                self.comments.push(Comment::DocLine(
919                                    Loc::File(self.file_no, start, last),
920                                    self.input[start..last].to_owned(),
921                                ));
922                            } else {
923                                self.comments.push(Comment::Line(
924                                    Loc::File(self.file_no, start, last),
925                                    self.input[start..last].to_owned(),
926                                ));
927                            }
928                        }
929                        Some((_, '*')) => {
930                            // multiline comment
931                            self.chars.next();
932
933                            let doc_comment_start = matches!(self.chars.peek(), Some((_, '*')));
934
935                            let mut last = start + 3;
936                            let mut seen_star = false;
937
938                            loop {
939                                if let Some((i, ch)) = self.chars.next() {
940                                    if seen_star && ch == '/' {
941                                        break;
942                                    }
943                                    seen_star = ch == '*';
944                                    last = i;
945                                } else {
946                                    self.errors.push(LexicalError::EndOfFileInComment(Loc::File(
947                                        self.file_no,
948                                        start,
949                                        self.input.len(),
950                                    )));
951                                    return None;
952                                }
953                            }
954
955                            // `/**/` is not a doc comment
956                            if doc_comment_start && last > start + 2 {
957                                self.comments.push(Comment::DocBlock(
958                                    Loc::File(self.file_no, start, last + 2),
959                                    self.input[start..last + 2].to_owned(),
960                                ));
961                            } else {
962                                self.comments.push(Comment::Block(
963                                    Loc::File(self.file_no, start, last + 2),
964                                    self.input[start..last + 2].to_owned(),
965                                ));
966                            }
967                        }
968                        _ => {
969                            return Some((start, Token::Divide, start + 1));
970                        }
971                    }
972                }
973                Some((start, ch)) if ch.is_ascii_digit() => {
974                    let parse_result = self.parse_number(start, ch);
975                    match parse_result {
976                        Err(lex_err) => {
977                            self.errors.push(lex_err.clone());
978                            if matches!(lex_err, LexicalError::EndofFileInHex(_)) {
979                                return None;
980                            }
981                        }
982                        Ok(parse_result) => return Some(parse_result),
983                    }
984                }
985                Some((start, '@')) => {
986                    let (id, end) = self.match_identifier(start);
987                    if id.len() == 1 {
988                        self.errors.push(LexicalError::UnrecognisedToken(
989                            Loc::File(self.file_no, start, start + 1),
990                            id.to_owned(),
991                        ));
992                    } else {
993                        return Some((start, Token::Annotation(&id[1..]), end));
994                    };
995                }
996                Some((i, ';')) => {
997                    self.parse_semver = false;
998                    return Some((i, Token::Semicolon, i + 1));
999                }
1000                Some((i, ',')) => return Some((i, Token::Comma, i + 1)),
1001                Some((i, '(')) => return Some((i, Token::OpenParenthesis, i + 1)),
1002                Some((i, ')')) => return Some((i, Token::CloseParenthesis, i + 1)),
1003                Some((i, '{')) => return Some((i, Token::OpenCurlyBrace, i + 1)),
1004                Some((i, '}')) => return Some((i, Token::CloseCurlyBrace, i + 1)),
1005                Some((i, '~')) => return Some((i, Token::BitwiseNot, i + 1)),
1006                Some((i, '=')) => {
1007                    return match self.chars.peek() {
1008                        Some((_, '=')) => {
1009                            self.chars.next();
1010                            Some((i, Token::Equal, i + 2))
1011                        }
1012                        Some((_, '>')) => {
1013                            self.chars.next();
1014                            Some((i, Token::Arrow, i + 2))
1015                        }
1016                        _ => Some((i, Token::Assign, i + 1)),
1017                    }
1018                }
1019                Some((i, '!')) => {
1020                    return if let Some((_, '=')) = self.chars.peek() {
1021                        self.chars.next();
1022                        Some((i, Token::NotEqual, i + 2))
1023                    } else {
1024                        Some((i, Token::Not, i + 1))
1025                    }
1026                }
1027                Some((i, '|')) => {
1028                    return match self.chars.peek() {
1029                        Some((_, '=')) => {
1030                            self.chars.next();
1031                            Some((i, Token::BitwiseOrAssign, i + 2))
1032                        }
1033                        Some((_, '|')) => {
1034                            self.chars.next();
1035                            Some((i, Token::Or, i + 2))
1036                        }
1037                        _ => Some((i, Token::BitwiseOr, i + 1)),
1038                    };
1039                }
1040                Some((i, '&')) => {
1041                    return match self.chars.peek() {
1042                        Some((_, '=')) => {
1043                            self.chars.next();
1044                            Some((i, Token::BitwiseAndAssign, i + 2))
1045                        }
1046                        Some((_, '&')) => {
1047                            self.chars.next();
1048                            Some((i, Token::And, i + 2))
1049                        }
1050                        _ => Some((i, Token::BitwiseAnd, i + 1)),
1051                    };
1052                }
1053                Some((i, '^')) => {
1054                    return match self.chars.peek() {
1055                        Some((_, '=')) => {
1056                            self.chars.next();
1057                            Some((i, Token::BitwiseXorAssign, i + 2))
1058                        }
1059                        _ => Some((i, Token::BitwiseXor, i + 1)),
1060                    };
1061                }
1062                Some((i, '+')) => {
1063                    return match self.chars.peek() {
1064                        Some((_, '=')) => {
1065                            self.chars.next();
1066                            Some((i, Token::AddAssign, i + 2))
1067                        }
1068                        Some((_, '+')) => {
1069                            self.chars.next();
1070                            Some((i, Token::Increment, i + 2))
1071                        }
1072                        _ => Some((i, Token::Add, i + 1)),
1073                    };
1074                }
1075                Some((i, '-')) => {
1076                    return match self.chars.peek() {
1077                        Some((_, '=')) => {
1078                            self.chars.next();
1079                            Some((i, Token::SubtractAssign, i + 2))
1080                        }
1081                        Some((_, '-')) => {
1082                            self.chars.next();
1083                            Some((i, Token::Decrement, i + 2))
1084                        }
1085                        Some((_, '>')) => {
1086                            self.chars.next();
1087                            Some((i, Token::YulArrow, i + 2))
1088                        }
1089                        _ => Some((i, Token::Subtract, i + 1)),
1090                    };
1091                }
1092                Some((i, '*')) => {
1093                    return match self.chars.peek() {
1094                        Some((_, '=')) => {
1095                            self.chars.next();
1096                            Some((i, Token::MulAssign, i + 2))
1097                        }
1098                        Some((_, '*')) => {
1099                            self.chars.next();
1100                            Some((i, Token::Power, i + 2))
1101                        }
1102                        _ => Some((i, Token::Mul, i + 1)),
1103                    };
1104                }
1105                Some((i, '%')) => {
1106                    return match self.chars.peek() {
1107                        Some((_, '=')) => {
1108                            self.chars.next();
1109                            Some((i, Token::ModuloAssign, i + 2))
1110                        }
1111                        _ => Some((i, Token::Modulo, i + 1)),
1112                    };
1113                }
1114                Some((i, '<')) => {
1115                    return match self.chars.peek() {
1116                        Some((_, '<')) => {
1117                            self.chars.next();
1118                            if let Some((_, '=')) = self.chars.peek() {
1119                                self.chars.next();
1120                                Some((i, Token::ShiftLeftAssign, i + 3))
1121                            } else {
1122                                Some((i, Token::ShiftLeft, i + 2))
1123                            }
1124                        }
1125                        Some((_, '=')) => {
1126                            self.chars.next();
1127                            Some((i, Token::LessEqual, i + 2))
1128                        }
1129                        _ => Some((i, Token::Less, i + 1)),
1130                    };
1131                }
1132                Some((i, '>')) => {
1133                    return match self.chars.peek() {
1134                        Some((_, '>')) => {
1135                            self.chars.next();
1136                            if let Some((_, '=')) = self.chars.peek() {
1137                                self.chars.next();
1138                                Some((i, Token::ShiftRightAssign, i + 3))
1139                            } else {
1140                                Some((i, Token::ShiftRight, i + 2))
1141                            }
1142                        }
1143                        Some((_, '=')) => {
1144                            self.chars.next();
1145                            Some((i, Token::MoreEqual, i + 2))
1146                        }
1147                        _ => Some((i, Token::More, i + 1)),
1148                    };
1149                }
1150                Some((i, '.')) => {
1151                    if let Some((_, a)) = self.chars.peek() {
1152                        if a.is_ascii_digit() && !self.parse_semver {
1153                            return match self.parse_number(i + 1, '.') {
1154                                Err(lex_error) => {
1155                                    self.errors.push(lex_error);
1156                                    None
1157                                }
1158                                Ok(parse_result) => Some(parse_result),
1159                            };
1160                        }
1161                    }
1162                    return Some((i, Token::Member, i + 1));
1163                }
1164                Some((i, '[')) => return Some((i, Token::OpenBracket, i + 1)),
1165                Some((i, ']')) => return Some((i, Token::CloseBracket, i + 1)),
1166                Some((i, ':')) => {
1167                    return match self.chars.peek() {
1168                        Some((_, '=')) => {
1169                            self.chars.next();
1170                            Some((i, Token::ColonAssign, i + 2))
1171                        }
1172                        _ => Some((i, Token::Colon, i + 1)),
1173                    };
1174                }
1175                Some((i, '?')) => return Some((i, Token::Question, i + 1)),
1176                Some((_, ch)) if ch.is_whitespace() => (),
1177                Some((start, _)) => {
1178                    let mut end;
1179
1180                    loop {
1181                        if let Some((i, ch)) = self.chars.next() {
1182                            end = i;
1183
1184                            if ch.is_whitespace() {
1185                                break;
1186                            }
1187                        } else {
1188                            end = self.input.len();
1189                            break;
1190                        }
1191                    }
1192
1193                    self.errors.push(LexicalError::UnrecognisedToken(
1194                        Loc::File(self.file_no, start, end),
1195                        self.input[start..end].to_owned(),
1196                    ));
1197                }
1198                None => return None, // End of file
1199            }
1200        }
1201    }
1202
1203    fn match_identifier(&mut self, start: usize) -> (&'input str, usize) {
1204        let end;
1205        loop {
1206            if let Some((i, ch)) = self.chars.peek() {
1207                if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
1208                    end = *i;
1209                    break;
1210                }
1211                self.chars.next();
1212            } else {
1213                end = self.input.len();
1214                break;
1215            }
1216        }
1217
1218        (&self.input[start..end], end)
1219    }
1220}
1221
1222impl<'input> Iterator for Lexer<'input> {
1223    type Item = Spanned<'input>;
1224
1225    fn next(&mut self) -> Option<Self::Item> {
1226        // Lexer should be aware of whether the last two tokens were
1227        // pragma followed by identifier. If this is true, then special parsing should be
1228        // done for the pragma value
1229        if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
1230            self.parse_semver = true;
1231        }
1232
1233        let token = self.next();
1234
1235        self.last_tokens = [
1236            self.last_tokens[1],
1237            match token {
1238                Some((_, n, _)) => Some(n),
1239                _ => None,
1240            },
1241        ];
1242
1243        token
1244    }
1245}
1246
1247#[cfg(test)]
1248mod tests {
1249    use super::*;
1250
1251    #[test]
1252    fn test_lexer() {
1253        let mut comments = Vec::new();
1254        let mut errors = Vec::new();
1255
1256        let multiple_errors = r#" 9ea -9e € bool hex uint8 hex"g"   /**  "#;
1257        let tokens = Lexer::new(multiple_errors, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1258        assert_eq!(
1259            tokens,
1260            vec![
1261                (3, Token::Identifier("a"), 4),
1262                (5, Token::Subtract, 6),
1263                (13, Token::Bool, 17),
1264                (18, Token::Identifier("hex"), 21),
1265                (22, Token::Uint(8), 27),
1266            ]
1267        );
1268
1269        assert_eq!(
1270            errors,
1271            vec![
1272                LexicalError::MissingExponent(Loc::File(0, 1, 42)),
1273                LexicalError::MissingExponent(Loc::File(0, 6, 42)),
1274                LexicalError::UnrecognisedToken(Loc::File(0, 9, 12), '€'.to_string()),
1275                LexicalError::InvalidCharacterInHexLiteral(Loc::File(0, 32, 33), 'g'),
1276                LexicalError::EndOfFileInComment(Loc::File(0, 37, 42)),
1277            ]
1278        );
1279
1280        let mut errors = Vec::new();
1281        let tokens = Lexer::new("bool", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1282
1283        assert_eq!(tokens, vec!((0, Token::Bool, 4)));
1284
1285        let tokens = Lexer::new("uint8", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1286
1287        assert_eq!(tokens, vec!((0, Token::Uint(8), 5)));
1288
1289        let tokens = Lexer::new("hex", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1290
1291        assert_eq!(tokens, vec!((0, Token::Identifier("hex"), 3)));
1292
1293        let tokens = Lexer::new(
1294            "hex\"cafe_dead\" /* adad*** */",
1295            0,
1296            &mut comments,
1297            &mut errors,
1298        )
1299        .collect::<Vec<_>>();
1300
1301        assert_eq!(tokens, vec!((0, Token::HexLiteral("hex\"cafe_dead\""), 14)));
1302
1303        let tokens = Lexer::new(
1304            "// foo bar\n0x00fead0_12 00090 0_0",
1305            0,
1306            &mut comments,
1307            &mut errors,
1308        )
1309        .collect::<Vec<_>>();
1310
1311        assert_eq!(
1312            tokens,
1313            vec!(
1314                (11, Token::HexNumber("0x00fead0_12"), 23),
1315                (24, Token::Number("00090", ""), 29),
1316                (30, Token::Number("0_0", ""), 33)
1317            )
1318        );
1319
1320        let tokens = Lexer::new(
1321            "// foo bar\n0x00fead0_12 9.0008 0_0",
1322            0,
1323            &mut comments,
1324            &mut errors,
1325        )
1326        .collect::<Vec<_>>();
1327
1328        assert_eq!(
1329            tokens,
1330            vec!(
1331                (11, Token::HexNumber("0x00fead0_12"), 23),
1332                (24, Token::RationalNumber("9", "0008", ""), 30),
1333                (31, Token::Number("0_0", ""), 34)
1334            )
1335        );
1336
1337        let tokens = Lexer::new(
1338            "// foo bar\n0x00fead0_12 .0008 0.9e2",
1339            0,
1340            &mut comments,
1341            &mut errors,
1342        )
1343        .collect::<Vec<_>>();
1344
1345        assert_eq!(
1346            tokens,
1347            vec!(
1348                (11, Token::HexNumber("0x00fead0_12"), 23),
1349                (24, Token::RationalNumber("", "0008", ""), 29),
1350                (30, Token::RationalNumber("0", "9", "2"), 35)
1351            )
1352        );
1353
1354        let tokens = Lexer::new(
1355            "// foo bar\n0x00fead0_12 .0008 0.9e-2-2",
1356            0,
1357            &mut comments,
1358            &mut errors,
1359        )
1360        .collect::<Vec<_>>();
1361
1362        assert_eq!(
1363            tokens,
1364            vec!(
1365                (11, Token::HexNumber("0x00fead0_12"), 23),
1366                (24, Token::RationalNumber("", "0008", ""), 29),
1367                (30, Token::RationalNumber("0", "9", "-2"), 36),
1368                (36, Token::Subtract, 37),
1369                (37, Token::Number("2", ""), 38)
1370            )
1371        );
1372
1373        let tokens = Lexer::new("1.2_3e2-", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1374
1375        assert_eq!(
1376            tokens,
1377            vec!(
1378                (0, Token::RationalNumber("1", "2_3", "2"), 7),
1379                (7, Token::Subtract, 8)
1380            )
1381        );
1382
1383        let tokens = Lexer::new("\"foo\"", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1384
1385        assert_eq!(tokens, vec!((0, Token::StringLiteral(false, "foo"), 5)));
1386
1387        let tokens = Lexer::new(
1388            "pragma solidity >=0.5.0 <0.7.0;",
1389            0,
1390            &mut comments,
1391            &mut errors,
1392        )
1393        .collect::<Vec<_>>();
1394
1395        assert_eq!(
1396            tokens,
1397            vec!(
1398                (0, Token::Pragma, 6),
1399                (7, Token::Identifier("solidity"), 15),
1400                (16, Token::MoreEqual, 18),
1401                (18, Token::Number("0", ""), 19),
1402                (19, Token::Member, 20),
1403                (20, Token::Number("5", ""), 21),
1404                (21, Token::Member, 22),
1405                (22, Token::Number("0", ""), 23),
1406                (24, Token::Less, 25),
1407                (25, Token::Number("0", ""), 26),
1408                (26, Token::Member, 27),
1409                (27, Token::Number("7", ""), 28),
1410                (28, Token::Member, 29),
1411                (29, Token::Number("0", ""), 30),
1412                (30, Token::Semicolon, 31),
1413            )
1414        );
1415
1416        let tokens = Lexer::new(
1417            "pragma solidity \t>=0.5.0 <0.7.0 \n ;",
1418            0,
1419            &mut comments,
1420            &mut errors,
1421        )
1422        .collect::<Vec<_>>();
1423
1424        assert_eq!(
1425            tokens,
1426            vec!(
1427                (0, Token::Pragma, 6),
1428                (7, Token::Identifier("solidity"), 15),
1429                (17, Token::MoreEqual, 19),
1430                (19, Token::Number("0", ""), 20),
1431                (20, Token::Member, 21),
1432                (21, Token::Number("5", ""), 22),
1433                (22, Token::Member, 23),
1434                (23, Token::Number("0", ""), 24),
1435                (25, Token::Less, 26),
1436                (26, Token::Number("0", ""), 27),
1437                (27, Token::Member, 28),
1438                (28, Token::Number("7", ""), 29),
1439                (29, Token::Member, 30),
1440                (30, Token::Number("0", ""), 31),
1441                (34, Token::Semicolon, 35),
1442            )
1443        );
1444
1445        let tokens =
1446            Lexer::new("pragma solidity 赤;", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1447
1448        assert_eq!(
1449            tokens,
1450            vec!(
1451                (0, Token::Pragma, 6),
1452                (7, Token::Identifier("solidity"), 15),
1453                (16, Token::Identifier("赤"), 19),
1454                (19, Token::Semicolon, 20)
1455            )
1456        );
1457
1458        let tokens = Lexer::new(">>= >> >= >", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1459
1460        assert_eq!(
1461            tokens,
1462            vec!(
1463                (0, Token::ShiftRightAssign, 3),
1464                (4, Token::ShiftRight, 6),
1465                (7, Token::MoreEqual, 9),
1466                (10, Token::More, 11),
1467            )
1468        );
1469
1470        let tokens = Lexer::new("<<= << <= <", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1471
1472        assert_eq!(
1473            tokens,
1474            vec!(
1475                (0, Token::ShiftLeftAssign, 3),
1476                (4, Token::ShiftLeft, 6),
1477                (7, Token::LessEqual, 9),
1478                (10, Token::Less, 11),
1479            )
1480        );
1481
1482        let tokens = Lexer::new("-16 -- - -=", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1483
1484        assert_eq!(
1485            tokens,
1486            vec!(
1487                (0, Token::Subtract, 1),
1488                (1, Token::Number("16", ""), 3),
1489                (4, Token::Decrement, 6),
1490                (7, Token::Subtract, 8),
1491                (9, Token::SubtractAssign, 11),
1492            )
1493        );
1494
1495        let tokens = Lexer::new("-4 ", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1496
1497        assert_eq!(
1498            tokens,
1499            vec!((0, Token::Subtract, 1), (1, Token::Number("4", ""), 2),)
1500        );
1501
1502        let mut errors = Vec::new();
1503        let _ = Lexer::new(r#"hex"abcdefg""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1504
1505        assert_eq!(
1506            errors,
1507            vec![LexicalError::InvalidCharacterInHexLiteral(
1508                Loc::File(0, 10, 11),
1509                'g'
1510            )]
1511        );
1512
1513        let mut errors = Vec::new();
1514        let _ = Lexer::new(r#" € "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1515
1516        assert_eq!(
1517            errors,
1518            vec!(LexicalError::UnrecognisedToken(
1519                Loc::File(0, 1, 4),
1520                "€".to_owned()
1521            ))
1522        );
1523
1524        let mut errors = Vec::new();
1525        let _ = Lexer::new(r#"€"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1526
1527        assert_eq!(
1528            errors,
1529            vec!(LexicalError::UnrecognisedToken(
1530                Loc::File(0, 0, 3),
1531                "€".to_owned()
1532            ))
1533        );
1534
1535        let tokens =
1536            Lexer::new(r#"pragma foo bar"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1537
1538        assert_eq!(
1539            tokens,
1540            vec!(
1541                (0, Token::Pragma, 6),
1542                (7, Token::Identifier("foo"), 10),
1543                (11, Token::Identifier("bar"), 14),
1544            )
1545        );
1546
1547        comments.truncate(0);
1548
1549        let tokens = Lexer::new(r#"/// foo"#, 0, &mut comments, &mut errors).count();
1550
1551        assert_eq!(tokens, 0);
1552        assert_eq!(
1553            comments,
1554            vec![Comment::DocLine(Loc::File(0, 0, 7), "/// foo".to_owned())],
1555        );
1556
1557        comments.truncate(0);
1558
1559        let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1560
1561        assert_eq!(tokens, 0);
1562        assert_eq!(
1563            comments,
1564            vec!(
1565                Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1566                Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1567            )
1568        );
1569
1570        comments.truncate(0);
1571
1572        let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1573
1574        assert_eq!(tokens, 0);
1575        assert_eq!(
1576            comments,
1577            vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1578        );
1579
1580        comments.truncate(0);
1581
1582        let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1583
1584        assert_eq!(tokens, 0);
1585        assert_eq!(
1586            comments,
1587            vec!(Comment::DocBlock(
1588                Loc::File(0, 0, 10),
1589                "/** foo */".to_owned()
1590            ))
1591        );
1592
1593        comments.truncate(0);
1594
1595        let tokens = Lexer::new(
1596            "/** jadajadadjada */\n/* bar */",
1597            0,
1598            &mut comments,
1599            &mut errors,
1600        )
1601        .count();
1602
1603        assert_eq!(tokens, 0);
1604        assert_eq!(
1605            comments,
1606            vec!(
1607                Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1608                Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1609            )
1610        );
1611
1612        let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1613        assert_eq!(tokens, None);
1614
1615        let mut errors = Vec::new();
1616        let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1617        assert_eq!(
1618            errors,
1619            vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1620        );
1621
1622        let mut errors = Vec::new();
1623        let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1624        assert_eq!(tokens, None);
1625
1626        // some unicode tests
1627        let tokens = Lexer::new(
1628            ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1629            0,
1630            &mut comments,
1631            &mut errors,
1632        )
1633        .collect::<Vec<_>>();
1634
1635        assert_eq!(
1636            tokens,
1637            vec!(
1638                (0, Token::MoreEqual, 2),
1639                (5, Token::Member, 6),
1640                (7, Token::Identifier("très"), 12),
1641                (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1642                (65, Token::Identifier("カラス"), 74)
1643            )
1644        );
1645
1646        let tokens = Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1647
1648        assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1649
1650        let tokens =
1651            Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1652
1653        assert_eq!(
1654            tokens,
1655            vec!(
1656                (0, Token::Identifier("unicode"), 7),
1657                (8, Token::StringLiteral(false, "€"), 13),
1658            )
1659        );
1660
1661        // scientific notation
1662        let tokens = Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1663
1664        assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1665
1666        let tokens = Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1667
1668        assert_eq!(
1669            tokens,
1670            vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1671        );
1672
1673        let mut errors = Vec::new();
1674        let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1675
1676        assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1677        assert_eq!(
1678            errors,
1679            vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1680        );
1681
1682        let mut errors = Vec::new();
1683        let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1684
1685        assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1686        assert_eq!(
1687            errors,
1688            vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1689        );
1690
1691        let mut errors = Vec::new();
1692        let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1693
1694        assert_eq!(
1695            tokens,
1696            vec!(
1697                (0, Token::Number("42", ""), 2),
1698                (2, Token::Member, 3),
1699                (3, Token::Identifier("a"), 4)
1700            )
1701        );
1702
1703        let tokens = Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors).collect::<Vec<_>>();
1704
1705        assert_eq!(
1706            tokens,
1707            vec!(
1708                (0, Token::Number("42", ""), 2),
1709                (2, Token::Member, 3),
1710                (3, Token::Member, 4),
1711                (4, Token::Identifier("a"), 5)
1712            )
1713        );
1714
1715        comments.truncate(0);
1716
1717        let tokens = Lexer::new("/// jadajadadjada\n// bar", 0, &mut comments, &mut errors).count();
1718
1719        assert_eq!(tokens, 0);
1720        assert_eq!(
1721            comments,
1722            vec!(
1723                Comment::DocLine(Loc::File(0, 0, 17), "/// jadajadadjada".to_owned()),
1724                Comment::Line(Loc::File(0, 18, 24), "// bar".to_owned())
1725            )
1726        );
1727
1728        comments.truncate(0);
1729
1730        let tokens = Lexer::new("/**/", 0, &mut comments, &mut errors).count();
1731
1732        assert_eq!(tokens, 0);
1733        assert_eq!(
1734            comments,
1735            vec!(Comment::Block(Loc::File(0, 0, 4), "/**/".to_owned()))
1736        );
1737
1738        comments.truncate(0);
1739
1740        let tokens = Lexer::new(r#"/** foo */"#, 0, &mut comments, &mut errors).count();
1741
1742        assert_eq!(tokens, 0);
1743        assert_eq!(
1744            comments,
1745            vec!(Comment::DocBlock(
1746                Loc::File(0, 0, 10),
1747                "/** foo */".to_owned()
1748            ))
1749        );
1750
1751        comments.truncate(0);
1752
1753        let tokens = Lexer::new(
1754            "/** jadajadadjada */\n/* bar */",
1755            0,
1756            &mut comments,
1757            &mut errors,
1758        )
1759        .count();
1760
1761        assert_eq!(tokens, 0);
1762        assert_eq!(
1763            comments,
1764            vec!(
1765                Comment::DocBlock(Loc::File(0, 0, 20), "/** jadajadadjada */".to_owned()),
1766                Comment::Block(Loc::File(0, 21, 30), "/* bar */".to_owned())
1767            )
1768        );
1769
1770        let tokens = Lexer::new("/************/", 0, &mut comments, &mut errors).next();
1771        assert_eq!(tokens, None);
1772
1773        let mut errors = Vec::new();
1774        let _ = Lexer::new("/**", 0, &mut comments, &mut errors).next();
1775        assert_eq!(
1776            errors,
1777            vec!(LexicalError::EndOfFileInComment(Loc::File(0, 0, 3)))
1778        );
1779
1780        let mut errors = Vec::new();
1781        let tokens = Lexer::new("//////////////", 0, &mut comments, &mut errors).next();
1782        assert_eq!(tokens, None);
1783
1784        // some unicode tests
1785        let tokens = Lexer::new(
1786            ">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス",
1787            0,
1788            &mut comments,
1789            &mut errors,
1790        )
1791        .collect::<Vec<(usize, Token, usize)>>();
1792
1793        assert_eq!(
1794            tokens,
1795            vec!(
1796                (0, Token::MoreEqual, 2),
1797                (5, Token::Member, 6),
1798                (7, Token::Identifier("très"), 12),
1799                (15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63),
1800                (65, Token::Identifier("カラス"), 74)
1801            )
1802        );
1803
1804        let tokens =
1805            Lexer::new(r#"unicode"€""#, 0, &mut comments, &mut errors)
1806                .collect::<Vec<(usize, Token, usize)>>();
1807
1808        assert_eq!(tokens, vec!((0, Token::StringLiteral(true, "€"), 12)));
1809
1810        let tokens =
1811            Lexer::new(r#"unicode "€""#, 0, &mut comments, &mut errors)
1812                .collect::<Vec<(usize, Token, usize)>>();
1813
1814        assert_eq!(
1815            tokens,
1816            vec!(
1817                (0, Token::Identifier("unicode"), 7),
1818                (8, Token::StringLiteral(false, "€"), 13),
1819            )
1820        );
1821
1822        // scientific notation
1823        let tokens =
1824            Lexer::new(r#" 1e0 "#, 0, &mut comments, &mut errors)
1825                .collect::<Vec<(usize, Token, usize)>>();
1826
1827        assert_eq!(tokens, vec!((1, Token::Number("1", "0"), 4)));
1828
1829        let tokens =
1830            Lexer::new(r#" -9e0123"#, 0, &mut comments, &mut errors)
1831                .collect::<Vec<(usize, Token, usize)>>();
1832
1833        assert_eq!(
1834            tokens,
1835            vec!((1, Token::Subtract, 2), (2, Token::Number("9", "0123"), 8),)
1836        );
1837
1838        let mut errors = Vec::new();
1839        let tokens = Lexer::new(r#" -9e"#, 0, &mut comments, &mut errors)
1840            .collect::<Vec<(usize, Token, usize)>>();
1841
1842        assert_eq!(tokens, vec!((1, Token::Subtract, 2)));
1843        assert_eq!(
1844            errors,
1845            vec!(LexicalError::MissingExponent(Loc::File(0, 2, 4)))
1846        );
1847
1848        let mut errors = Vec::new();
1849        let tokens = Lexer::new(r#"9ea"#, 0, &mut comments, &mut errors)
1850            .collect::<Vec<(usize, Token, usize)>>();
1851
1852        assert_eq!(tokens, vec!((2, Token::Identifier("a"), 3)));
1853        assert_eq!(
1854            errors,
1855            vec!(LexicalError::MissingExponent(Loc::File(0, 0, 3)))
1856        );
1857
1858        let mut errors = Vec::new();
1859        let tokens = Lexer::new(r#"42.a"#, 0, &mut comments, &mut errors)
1860            .collect::<Vec<(usize, Token, usize)>>();
1861
1862        assert_eq!(
1863            tokens,
1864            vec!(
1865                (0, Token::Number("42", ""), 2),
1866                (2, Token::Member, 3),
1867                (3, Token::Identifier("a"), 4)
1868            )
1869        );
1870
1871        let tokens =
1872            Lexer::new(r#"42..a"#, 0, &mut comments, &mut errors)
1873                .collect::<Vec<(usize, Token, usize)>>();
1874
1875        assert_eq!(
1876            tokens,
1877            vec!(
1878                (0, Token::Number("42", ""), 2),
1879                (2, Token::Member, 3),
1880                (3, Token::Member, 4),
1881                (4, Token::Identifier("a"), 5)
1882            )
1883        );
1884
1885        let mut errors = Vec::new();
1886        let _ = Lexer::new(r#"hex"g""#, 0, &mut comments, &mut errors)
1887            .collect::<Vec<(usize, Token, usize)>>();
1888        assert_eq!(
1889            errors,
1890            vec!(LexicalError::InvalidCharacterInHexLiteral(
1891                Loc::File(0, 4, 5),
1892                'g'
1893            ),)
1894        );
1895
1896        let mut errors = Vec::new();
1897        let tokens =
1898            Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<(usize, Token, usize)>>();
1899
1900        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1901
1902        let mut errors = Vec::new();
1903        let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors)
1904            .collect::<Vec<(usize, Token, usize)>>();
1905
1906        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1907
1908        let mut errors = Vec::new();
1909        let tokens = Lexer::new(".9", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1910
1911        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", ""), 2)));
1912
1913        let mut errors = Vec::new();
1914        let tokens = Lexer::new(".9e10", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1915
1916        assert_eq!(tokens, vec!((0, Token::RationalNumber("", "9", "10"), 5)));
1917
1918        errors.clear();
1919        comments.clear();
1920        let tokens =
1921            Lexer::new("@my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1922        assert_eq!(tokens, vec![(0, Token::Annotation("my_annotation"), 14)]);
1923        assert!(errors.is_empty());
1924        assert!(comments.is_empty());
1925
1926        errors.clear();
1927        comments.clear();
1928        let tokens =
1929            Lexer::new("@ my_annotation", 0, &mut comments, &mut errors).collect::<Vec<_>>();
1930        assert_eq!(tokens, vec![(2, Token::Identifier("my_annotation"), 15)]);
1931        assert_eq!(
1932            errors,
1933            vec![LexicalError::UnrecognisedToken(
1934                Loc::File(0, 0, 1),
1935                "@".to_string()
1936            )]
1937        );
1938        assert!(comments.is_empty());
1939    }
1940}