solar_parse/lexer/
mod.rs

1//! Solidity and Yul lexer.
2
3use solar_ast::{
4    token::{BinOpToken, CommentKind, Delimiter, Token, TokenKind, TokenLitKind},
5    Base, StrKind,
6};
7use solar_interface::{
8    diagnostics::DiagCtxt, source_map::SourceFile, BytePos, Session, Span, Symbol,
9};
10
11mod cursor;
12use cursor::token::{RawLiteralKind, RawToken, RawTokenKind};
13pub use cursor::*;
14
15pub mod unescape;
16
17mod unicode_chars;
18
19mod utf8;
20
21/// Solidity and Yul lexer.
22///
23/// Converts a [`Cursor`]'s output from simple [`RawTokenKind`]s into rich [`TokenKind`]s, by
24/// converting strings into interned symbols, concatenating tokens together, and running additional
25/// validation.
26pub struct Lexer<'sess, 'src> {
27    /// The parsing context.
28    pub(crate) sess: &'sess Session,
29
30    /// Initial position, read-only.
31    start_pos: BytePos,
32
33    /// The absolute offset within the source_map of the current character.
34    pos: BytePos,
35
36    /// Source text to tokenize.
37    src: &'src str,
38
39    /// Cursor for getting lexer tokens.
40    cursor: Cursor<'src>,
41
42    /// The current token which has not been processed by `next_token` yet.
43    token: Token,
44
45    /// When a "unknown start of token: \u{a0}" has already been emitted earlier
46    /// in this file, it's safe to treat further occurrences of the non-breaking
47    /// space character as whitespace.
48    nbsp_is_whitespace: bool,
49}
50
51impl<'sess, 'src> Lexer<'sess, 'src> {
52    /// Creates a new `Lexer` for the given source string.
53    pub fn new(sess: &'sess Session, src: &'src str) -> Self {
54        Self::with_start_pos(sess, src, BytePos(0))
55    }
56
57    /// Creates a new `Lexer` for the given source file.
58    ///
59    /// Note that the source file must be added to the source map before calling this function.
60    pub fn from_source_file(sess: &'sess Session, file: &'src SourceFile) -> Self {
61        Self::with_start_pos(sess, &file.src, file.start_pos)
62    }
63
64    /// Creates a new `Lexer` for the given source string and starting position.
65    pub fn with_start_pos(sess: &'sess Session, src: &'src str, start_pos: BytePos) -> Self {
66        let mut lexer = Self {
67            sess,
68            start_pos,
69            pos: start_pos,
70            src,
71            cursor: Cursor::new(src),
72            token: Token::DUMMY,
73            nbsp_is_whitespace: false,
74        };
75        (lexer.token, _) = lexer.bump();
76        lexer
77    }
78
79    /// Returns a reference to the diagnostic context.
80    #[inline]
81    pub fn dcx(&self) -> &'sess DiagCtxt {
82        &self.sess.dcx
83    }
84
85    /// Consumes the lexer and collects the remaining tokens into a vector.
86    ///
87    /// Note that this skips comments, as [required by the parser](crate::Parser::new).
88    ///
89    /// Prefer using this method instead of manually collecting tokens using [`Iterator`].
90    #[instrument(name = "lex", level = "debug", skip_all)]
91    pub fn into_tokens(mut self) -> Vec<Token> {
92        // This is an estimate of the number of tokens in the source.
93        let mut tokens = Vec::with_capacity(self.src.len() / 4);
94        loop {
95            let token = self.next_token();
96            if token.is_eof() {
97                break;
98            }
99            if token.is_comment() {
100                continue;
101            }
102            tokens.push(token);
103        }
104        trace!(
105            src.len = self.src.len(),
106            tokens.len = tokens.len(),
107            tokens.capacity = tokens.capacity(),
108            ratio = %format_args!("{:.2}", self.src.len() as f64 / tokens.len() as f64),
109            "lexed"
110        );
111        tokens
112    }
113
114    /// Returns the next token, advancing the lexer.
115    pub fn next_token(&mut self) -> Token {
116        let mut next_token;
117        loop {
118            let preceded_by_whitespace;
119            (next_token, preceded_by_whitespace) = self.bump();
120            if preceded_by_whitespace {
121                break;
122            } else if let Some(glued) = self.token.glue(next_token) {
123                self.token = glued;
124            } else {
125                break;
126            }
127        }
128        std::mem::replace(&mut self.token, next_token)
129    }
130
131    fn bump(&mut self) -> (Token, bool) {
132        let mut preceded_by_whitespace = false;
133        let mut swallow_next_invalid = 0;
134        loop {
135            let RawToken { kind: raw_kind, len } = self.cursor.advance_token();
136            let start = self.pos;
137            self.pos += len;
138
139            // Now "cook" the token, converting the simple `RawTokenKind` into a rich `TokenKind`.
140            // This turns strings into interned symbols and runs additional validation.
141            let kind = match raw_kind {
142                RawTokenKind::LineComment { is_doc } => {
143                    preceded_by_whitespace = true;
144
145                    // Opening delimiter is not included into the symbol.
146                    let content_start = start + BytePos(if is_doc { 3 } else { 2 });
147                    let content = self.str_from(content_start);
148                    self.cook_doc_comment(content_start, content, is_doc, CommentKind::Line)
149                }
150                RawTokenKind::BlockComment { is_doc, terminated } => {
151                    preceded_by_whitespace = true;
152
153                    if !terminated {
154                        let msg = if is_doc {
155                            "unterminated block doc-comment"
156                        } else {
157                            "unterminated block comment"
158                        };
159                        self.dcx().err(msg).span(self.new_span(start, self.pos)).emit();
160                    }
161
162                    // Opening delimiter and closing delimiter are not included into the symbol.
163                    let content_start = start + BytePos(if is_doc { 3 } else { 2 });
164                    let content_end = self.pos - (terminated as u32) * 2;
165                    let content = self.str_from_to(content_start, content_end);
166                    self.cook_doc_comment(content_start, content, is_doc, CommentKind::Block)
167                }
168                RawTokenKind::Whitespace => {
169                    preceded_by_whitespace = true;
170                    continue;
171                }
172                RawTokenKind::Ident => {
173                    let sym = self.symbol_from(start);
174                    TokenKind::Ident(sym)
175                }
176                RawTokenKind::Literal { kind } => {
177                    let (kind, symbol) = self.cook_literal(start, self.pos, kind);
178                    TokenKind::Literal(kind, symbol)
179                }
180
181                RawTokenKind::Semi => TokenKind::Semi,
182                RawTokenKind::Comma => TokenKind::Comma,
183                RawTokenKind::Dot => TokenKind::Dot,
184                RawTokenKind::OpenParen => TokenKind::OpenDelim(Delimiter::Parenthesis),
185                RawTokenKind::CloseParen => TokenKind::CloseDelim(Delimiter::Parenthesis),
186                RawTokenKind::OpenBrace => TokenKind::OpenDelim(Delimiter::Brace),
187                RawTokenKind::CloseBrace => TokenKind::CloseDelim(Delimiter::Brace),
188                RawTokenKind::OpenBracket => TokenKind::OpenDelim(Delimiter::Bracket),
189                RawTokenKind::CloseBracket => TokenKind::CloseDelim(Delimiter::Bracket),
190                RawTokenKind::Tilde => TokenKind::Tilde,
191                RawTokenKind::Question => TokenKind::Question,
192                RawTokenKind::Colon => TokenKind::Colon,
193                RawTokenKind::Eq => TokenKind::Eq,
194                RawTokenKind::Bang => TokenKind::Not,
195                RawTokenKind::Lt => TokenKind::Lt,
196                RawTokenKind::Gt => TokenKind::Gt,
197                RawTokenKind::Minus => TokenKind::BinOp(BinOpToken::Minus),
198                RawTokenKind::And => TokenKind::BinOp(BinOpToken::And),
199                RawTokenKind::Or => TokenKind::BinOp(BinOpToken::Or),
200                RawTokenKind::Plus => TokenKind::BinOp(BinOpToken::Plus),
201                RawTokenKind::Star => TokenKind::BinOp(BinOpToken::Star),
202                RawTokenKind::Slash => TokenKind::BinOp(BinOpToken::Slash),
203                RawTokenKind::Caret => TokenKind::BinOp(BinOpToken::Caret),
204                RawTokenKind::Percent => TokenKind::BinOp(BinOpToken::Percent),
205
206                RawTokenKind::Unknown => {
207                    // Don't emit diagnostics for sequences of the same invalid token
208                    if swallow_next_invalid > 0 {
209                        swallow_next_invalid -= 1;
210                        continue;
211                    }
212                    let mut it = self.str_from_to_end(start).chars();
213                    let c = it.next().unwrap();
214                    if c == '\u{00a0}' {
215                        // If an error has already been reported on non-breaking
216                        // space characters earlier in the file, treat all
217                        // subsequent occurrences as whitespace.
218                        if self.nbsp_is_whitespace {
219                            preceded_by_whitespace = true;
220                            continue;
221                        }
222                        self.nbsp_is_whitespace = true;
223                    }
224
225                    let repeats = it.take_while(|c1| *c1 == c).count();
226                    swallow_next_invalid = repeats;
227
228                    let (token, sugg) =
229                        unicode_chars::check_for_substitution(self, start, c, repeats + 1);
230
231                    let span = self
232                        .new_span(start, self.pos + BytePos::from_usize(repeats * c.len_utf8()));
233                    let msg = format!("unknown start of token: {}", escaped_char(c));
234                    let mut err = self.dcx().err(msg).span(span);
235                    if let Some(sugg) = sugg {
236                        match sugg {
237                            unicode_chars::TokenSubstitution::DirectedQuotes {
238                                span,
239                                suggestion: _,
240                                ascii_str,
241                                ascii_name,
242                            } => {
243                                let msg = format!("Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '{ascii_str}' ({ascii_name}), but are not");
244                                err = err.span_help(span, msg);
245                            }
246                            unicode_chars::TokenSubstitution::Other {
247                                span,
248                                suggestion: _,
249                                ch,
250                                u_name,
251                                ascii_str,
252                                ascii_name,
253                            } => {
254                                let msg = format!("Unicode character '{ch}' ({u_name}) looks like '{ascii_str}' ({ascii_name}), but it is not");
255                                err = err.span_help(span, msg);
256                            }
257                        }
258                    }
259                    if c == '\0' {
260                        let help = "source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used";
261                        err = err.help(help);
262                    }
263                    if repeats > 0 {
264                        let note = match repeats {
265                            1 => "once more".to_string(),
266                            _ => format!("{repeats} more times"),
267                        };
268                        err = err.note(format!("character repeats {note}"));
269                    }
270                    err.emit();
271
272                    if let Some(token) = token {
273                        token
274                    } else {
275                        preceded_by_whitespace = true;
276                        continue;
277                    }
278                }
279
280                RawTokenKind::Eof => TokenKind::Eof,
281            };
282            let span = self.new_span(start, self.pos);
283            return (Token::new(kind, span), preceded_by_whitespace);
284        }
285    }
286
287    fn cook_doc_comment(
288        &self,
289        _content_start: BytePos,
290        content: &str,
291        is_doc: bool,
292        comment_kind: CommentKind,
293    ) -> TokenKind {
294        TokenKind::Comment(is_doc, comment_kind, Symbol::intern(content))
295    }
296
297    fn cook_literal(
298        &self,
299        start: BytePos,
300        end: BytePos,
301        kind: RawLiteralKind,
302    ) -> (TokenLitKind, Symbol) {
303        match kind {
304            RawLiteralKind::Str { kind, terminated } => {
305                if !terminated {
306                    let span = self.new_span(start, end);
307                    let guar = self.dcx().err("unterminated string").span(span).emit();
308                    (TokenLitKind::Err(guar), self.symbol_from_to(start, end))
309                } else {
310                    (kind.into(), self.cook_quoted(kind, start, end))
311                }
312            }
313            RawLiteralKind::Int { base, empty_int } => {
314                if empty_int {
315                    let span = self.new_span(start, end);
316                    self.dcx().err("no valid digits found for number").span(span).emit();
317                    (TokenLitKind::Integer, self.symbol_from_to(start, end))
318                } else {
319                    if matches!(base, Base::Binary | Base::Octal) {
320                        let start = start + 2;
321                        // To uncomment if binary and octal literals are ever supported.
322                        /*
323                        let base = base as u32;
324                        let s = self.str_from_to(start, end);
325                        for (i, c) in s.char_indices() {
326                            if c != '_' && c.to_digit(base).is_none() {
327                                let msg = format!("invalid digit for a base {base} literal");
328                                let lo = start + BytePos::from_usize(i);
329                                let hi = lo + BytePos::from_usize(c.len_utf8());
330                                let span = self.new_span(lo, hi);
331                                self.dcx().err(msg).span(span).emit();
332                            }
333                        }
334                        */
335                        let msg = format!("integers in base {base} are not supported");
336                        self.dcx().err(msg).span(self.new_span(start, end)).emit();
337                    }
338                    (TokenLitKind::Integer, self.symbol_from_to(start, end))
339                }
340            }
341            RawLiteralKind::Rational { base, empty_exponent } => {
342                if empty_exponent {
343                    let span = self.new_span(start, self.pos);
344                    self.dcx().err("expected at least one digit in exponent").span(span).emit();
345                }
346
347                let unsupported_base =
348                    matches!(base, Base::Binary | Base::Octal | Base::Hexadecimal);
349                if unsupported_base {
350                    let msg = format!("{base} rational numbers are not supported");
351                    self.dcx().err(msg).span(self.new_span(start, end)).emit();
352                }
353
354                (TokenLitKind::Rational, self.symbol_from_to(start, end))
355            }
356        }
357    }
358
359    fn cook_quoted(&self, kind: StrKind, start: BytePos, end: BytePos) -> Symbol {
360        let (mode, prefix_len) = match kind {
361            StrKind::Str => (unescape::Mode::Str, 0),
362            StrKind::Unicode => (unescape::Mode::UnicodeStr, 7),
363            StrKind::Hex => (unescape::Mode::HexStr, 3),
364        };
365
366        // Account for quote (`"` or `'`) and prefix.
367        let content_start = start + 1 + BytePos(prefix_len);
368        let content_end = end - 1;
369        let lit_content = self.str_from_to(content_start, content_end);
370
371        let mut has_err = false;
372        unescape::unescape_literal(lit_content, mode, |range, result| {
373            // Here we only check for errors. The actual unescaping is done later.
374            if let Err(err) = result {
375                has_err = true;
376                let (start, end) = (range.start as u32, range.end as u32);
377                let lo = content_start + BytePos(start);
378                let hi = lo + BytePos(end - start);
379                let span = self.new_span(lo, hi);
380                unescape::emit_unescape_error(self.dcx(), lit_content, span, range, err);
381            }
382        });
383
384        // We normally exclude the quotes for the symbol, but for errors we
385        // include it because it results in clearer error messages.
386        if has_err {
387            self.symbol_from_to(start, end)
388        } else {
389            Symbol::intern(lit_content)
390        }
391    }
392
393    #[inline]
394    fn new_span(&self, lo: BytePos, hi: BytePos) -> Span {
395        Span::new(lo, hi)
396    }
397
398    #[inline]
399    fn src_index(&self, pos: BytePos) -> usize {
400        (pos - self.start_pos).to_usize()
401    }
402
403    /// Slice of the source text from `start` up to but excluding `self.pos`,
404    /// meaning the slice does not include the character `self.ch`.
405    fn symbol_from(&self, start: BytePos) -> Symbol {
406        self.symbol_from_to(start, self.pos)
407    }
408
409    /// Slice of the source text from `start` up to but excluding `self.pos`,
410    /// meaning the slice does not include the character `self.ch`.
411    fn str_from(&self, start: BytePos) -> &'src str {
412        self.str_from_to(start, self.pos)
413    }
414
415    /// Same as `symbol_from`, with an explicit endpoint.
416    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
417        Symbol::intern(self.str_from_to(start, end))
418    }
419
420    /// Slice of the source text spanning from `start` up to but excluding `end`.
421    #[track_caller]
422    fn str_from_to(&self, start: BytePos, end: BytePos) -> &'src str {
423        &self.src[self.src_index(start)..self.src_index(end)]
424    }
425
426    /// Slice of the source text spanning from `start` until the end.
427    fn str_from_to_end(&self, start: BytePos) -> &'src str {
428        &self.src[self.src_index(start)..]
429    }
430}
431
432impl Iterator for Lexer<'_, '_> {
433    type Item = Token;
434
435    #[inline]
436    fn next(&mut self) -> Option<Token> {
437        let token = self.next_token();
438        if token.is_eof() {
439            None
440        } else {
441            Some(token)
442        }
443    }
444}
445
446impl std::iter::FusedIterator for Lexer<'_, '_> {}
447
448/// Pushes a character to a message string for error reporting
449fn escaped_char(c: char) -> String {
450    match c {
451        '\u{20}'..='\u{7e}' => {
452            // Don't escape \, ' or " for user-facing messages
453            c.to_string()
454        }
455        _ => c.escape_default().to_string(),
456    }
457}
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462    use std::ops::Range;
463    use BinOpToken::*;
464    use TokenKind::*;
465
466    type Expected<'a> = &'a [(Range<usize>, TokenKind)];
467
468    fn check(src: &str, should_fail: bool, expected: Expected<'_>) {
469        let sess = Session::builder().with_silent_emitter(None).build();
470        let tokens: Vec<_> = Lexer::new(&sess, src)
471            .filter(|t| !t.is_comment())
472            .map(|t| (t.span.lo().to_usize()..t.span.hi().to_usize(), t.kind))
473            .collect();
474        assert_eq!(sess.dcx.has_errors().is_err(), should_fail, "{src:?}");
475        assert_eq!(tokens, expected, "{src:?}");
476    }
477
478    fn checks(tests: &[(&str, Expected<'_>)]) {
479        for &(src, expected) in tests {
480            check(src, false, expected);
481        }
482    }
483
484    fn checks_full(tests: &[(&str, bool, Expected<'_>)]) {
485        for &(src, should_fail, expected) in tests {
486            check(src, should_fail, expected);
487        }
488    }
489
490    fn lit(kind: TokenLitKind, symbol: &str) -> TokenKind {
491        Literal(kind, sym(symbol))
492    }
493
494    fn id(symbol: &str) -> TokenKind {
495        Ident(sym(symbol))
496    }
497
498    fn sym(s: &str) -> Symbol {
499        Symbol::intern(s)
500    }
501
502    #[test]
503    fn empty() {
504        checks(&[
505            ("", &[]),
506            (" ", &[]),
507            (" \n", &[]),
508            ("\n", &[]),
509            ("\n\t", &[]),
510            ("\n \t", &[]),
511            ("\n \t ", &[]),
512            (" \n \t \t", &[]),
513        ]);
514    }
515
516    #[test]
517    fn literals() {
518        use TokenLitKind::*;
519        solar_interface::SessionGlobals::new().set(|| {
520            checks(&[
521                ("\"\"", &[(0..2, lit(Str, ""))]),
522                ("\"\"\"\"", &[(0..2, lit(Str, "")), (2..4, lit(Str, ""))]),
523                ("\"\" \"\"", &[(0..2, lit(Str, "")), (3..5, lit(Str, ""))]),
524                ("\"\\\"\"", &[(0..4, lit(Str, "\\\""))]),
525                ("unicode\"\"", &[(0..9, lit(UnicodeStr, ""))]),
526                ("unicode \"\"", &[(0..7, id("unicode")), (8..10, lit(Str, ""))]),
527                ("hex\"\"", &[(0..5, lit(HexStr, ""))]),
528                ("hex \"\"", &[(0..3, id("hex")), (4..6, lit(Str, ""))]),
529                //
530                ("0", &[(0..1, lit(Integer, "0"))]),
531                ("0a", &[(0..1, lit(Integer, "0")), (1..2, id("a"))]),
532                ("0.e1", &[(0..1, lit(Integer, "0")), (1..2, Dot), (2..4, id("e1"))]),
533                (
534                    "0.e-1",
535                    &[
536                        (0..1, lit(Integer, "0")),
537                        (1..2, Dot),
538                        (2..3, id("e")),
539                        (3..4, BinOp(Minus)),
540                        (4..5, lit(Integer, "1")),
541                    ],
542                ),
543                ("0.0", &[(0..3, lit(Rational, "0.0"))]),
544                ("0.", &[(0..2, lit(Rational, "0."))]),
545                (".0", &[(0..2, lit(Rational, ".0"))]),
546                ("0.0e1", &[(0..5, lit(Rational, "0.0e1"))]),
547                ("0.0e-1", &[(0..6, lit(Rational, "0.0e-1"))]),
548                ("0e1", &[(0..3, lit(Rational, "0e1"))]),
549                ("0e1.", &[(0..3, lit(Rational, "0e1")), (3..4, Dot)]),
550            ]);
551
552            checks_full(&[
553                ("0b0", true, &[(0..3, lit(Integer, "0b0"))]),
554                ("0B0", false, &[(0..1, lit(Integer, "0")), (1..3, id("B0"))]),
555                ("0o0", true, &[(0..3, lit(Integer, "0o0"))]),
556                ("0O0", false, &[(0..1, lit(Integer, "0")), (1..3, id("O0"))]),
557                ("0xa", false, &[(0..3, lit(Integer, "0xa"))]),
558                ("0Xa", false, &[(0..1, lit(Integer, "0")), (1..3, id("Xa"))]),
559            ]);
560        });
561    }
562
563    #[test]
564    fn idents() {
565        solar_interface::SessionGlobals::new().set(|| {
566            checks(&[
567                ("$", &[(0..1, id("$"))]),
568                ("a$", &[(0..2, id("a$"))]),
569                ("a_$123_", &[(0..7, id("a_$123_"))]),
570                ("   b", &[(3..4, id("b"))]),
571                (" c\t ", &[(1..2, id("c"))]),
572                (" \td ", &[(2..3, id("d"))]),
573                (" \t\nef ", &[(3..5, id("ef"))]),
574                (" \t\n\tghi ", &[(4..7, id("ghi"))]),
575            ]);
576        });
577    }
578
579    #[test]
580    fn doc_comments() {
581        use CommentKind::*;
582
583        fn doc(kind: CommentKind, symbol: &str) -> TokenKind {
584            Comment(true, kind, sym(symbol))
585        }
586
587        solar_interface::SessionGlobals::new().set(|| {
588            checks(&[
589                ("// line comment", &[]),
590                ("// / line comment", &[]),
591                ("// ! line comment", &[]),
592                ("// /* line comment", &[]), // */ <-- aaron-bond.better-comments doesn't like this
593                ("/// line doc-comment", &[(0..20, doc(Line, " line doc-comment"))]),
594                ("//// invalid doc-comment", &[]),
595                ("///// invalid doc-comment", &[]),
596                //
597                ("/**/", &[]),
598                ("/***/", &[]),
599                ("/****/", &[]),
600                ("/*/*/", &[]),
601                ("/* /*/", &[]),
602                ("/*/**/", &[]),
603                ("/* /**/", &[]),
604                ("/* normal block comment */", &[]),
605                ("/* /* normal block comment */", &[]),
606                ("/** block doc-comment */", &[(0..24, doc(Block, " block doc-comment "))]),
607                ("/** /* block doc-comment */", &[(0..27, doc(Block, " /* block doc-comment "))]),
608                ("/** block doc-comment /*/", &[(0..25, doc(Block, " block doc-comment /"))]),
609            ]);
610        });
611    }
612
613    #[test]
614    fn operators() {
615        use Delimiter::*;
616        // From Solc `TOKEN_LIST`: https://github.com/ethereum/solidity/blob/194b114664c7daebc2ff68af3c573272f5d28913/liblangutil/Token.h#L67
617        checks(&[
618            (")", &[(0..1, CloseDelim(Parenthesis))]),
619            ("(", &[(0..1, OpenDelim(Parenthesis))]),
620            ("[", &[(0..1, OpenDelim(Bracket))]),
621            ("]", &[(0..1, CloseDelim(Bracket))]),
622            ("{", &[(0..1, OpenDelim(Brace))]),
623            ("}", &[(0..1, CloseDelim(Brace))]),
624            (":", &[(0..1, Colon)]),
625            (";", &[(0..1, Semi)]),
626            (".", &[(0..1, Dot)]),
627            ("?", &[(0..1, Question)]),
628            ("=>", &[(0..2, FatArrow)]),
629            ("->", &[(0..2, Arrow)]),
630            ("=", &[(0..1, Eq)]),
631            ("|=", &[(0..2, BinOpEq(Or))]),
632            ("^=", &[(0..2, BinOpEq(Caret))]),
633            ("&=", &[(0..2, BinOpEq(And))]),
634            ("<<=", &[(0..3, BinOpEq(Shl))]),
635            (">>=", &[(0..3, BinOpEq(Shr))]),
636            (">>>=", &[(0..4, BinOpEq(Sar))]),
637            ("+=", &[(0..2, BinOpEq(Plus))]),
638            ("-=", &[(0..2, BinOpEq(Minus))]),
639            ("*=", &[(0..2, BinOpEq(Star))]),
640            ("/=", &[(0..2, BinOpEq(Slash))]),
641            ("%=", &[(0..2, BinOpEq(Percent))]),
642            (",", &[(0..1, Comma)]),
643            ("||", &[(0..2, OrOr)]),
644            ("&&", &[(0..2, AndAnd)]),
645            ("|", &[(0..1, BinOp(Or))]),
646            ("^", &[(0..1, BinOp(Caret))]),
647            ("&", &[(0..1, BinOp(And))]),
648            ("<<", &[(0..2, BinOp(Shl))]),
649            (">>", &[(0..2, BinOp(Shr))]),
650            (">>>", &[(0..3, BinOp(Sar))]),
651            ("+", &[(0..1, BinOp(Plus))]),
652            ("-", &[(0..1, BinOp(Minus))]),
653            ("*", &[(0..1, BinOp(Star))]),
654            ("/", &[(0..1, BinOp(Slash))]),
655            ("%", &[(0..1, BinOp(Percent))]),
656            ("**", &[(0..2, StarStar)]),
657            ("==", &[(0..2, EqEq)]),
658            ("!=", &[(0..2, Ne)]),
659            ("<", &[(0..1, Lt)]),
660            (">", &[(0..1, Gt)]),
661            ("<=", &[(0..2, Le)]),
662            (">=", &[(0..2, Ge)]),
663            ("!", &[(0..1, Not)]),
664            ("~", &[(0..1, Tilde)]),
665            ("++", &[(0..2, PlusPlus)]),
666            ("--", &[(0..2, MinusMinus)]),
667            (":=", &[(0..2, Walrus)]),
668        ]);
669    }
670
671    #[test]
672    fn glueing() {
673        checks(&[
674            ("=", &[(0..1, Eq)]),
675            ("==", &[(0..2, EqEq)]),
676            ("= =", &[(0..1, Eq), (2..3, Eq)]),
677            ("===", &[(0..2, EqEq), (2..3, Eq)]),
678            ("== =", &[(0..2, EqEq), (3..4, Eq)]),
679            ("= ==", &[(0..1, Eq), (2..4, EqEq)]),
680            ("====", &[(0..2, EqEq), (2..4, EqEq)]),
681            ("== ==", &[(0..2, EqEq), (3..5, EqEq)]),
682            ("= ===", &[(0..1, Eq), (2..4, EqEq), (4..5, Eq)]),
683            ("=====", &[(0..2, EqEq), (2..4, EqEq), (4..5, Eq)]),
684            //
685            (" <", &[(1..2, Lt)]),
686            (" <=", &[(1..3, Le)]),
687            (" < =", &[(1..2, Lt), (3..4, Eq)]),
688            (" <<", &[(1..3, BinOp(Shl))]),
689            (" <<=", &[(1..4, BinOpEq(Shl))]),
690            //
691            (" >", &[(1..2, Gt)]),
692            (" >=", &[(1..3, Ge)]),
693            (" > =", &[(1..2, Gt), (3..4, Eq)]),
694            (" >>", &[(1..3, BinOp(Shr))]),
695            (" >>>", &[(1..4, BinOp(Sar))]),
696            (" >>>=", &[(1..5, BinOpEq(Sar))]),
697            //
698            ("+", &[(0..1, BinOp(Plus))]),
699            ("++", &[(0..2, PlusPlus)]),
700            ("+++", &[(0..2, PlusPlus), (2..3, BinOp(Plus))]),
701            ("+ =", &[(0..1, BinOp(Plus)), (2..3, Eq)]),
702            ("+ +=", &[(0..1, BinOp(Plus)), (2..4, BinOpEq(Plus))]),
703            ("+++=", &[(0..2, PlusPlus), (2..4, BinOpEq(Plus))]),
704            ("+ +", &[(0..1, BinOp(Plus)), (2..3, BinOp(Plus))]),
705            //
706            ("-", &[(0..1, BinOp(Minus))]),
707            ("--", &[(0..2, MinusMinus)]),
708            ("---", &[(0..2, MinusMinus), (2..3, BinOp(Minus))]),
709            ("- =", &[(0..1, BinOp(Minus)), (2..3, Eq)]),
710            ("- -=", &[(0..1, BinOp(Minus)), (2..4, BinOpEq(Minus))]),
711            ("---=", &[(0..2, MinusMinus), (2..4, BinOpEq(Minus))]),
712            ("- -", &[(0..1, BinOp(Minus)), (2..3, BinOp(Minus))]),
713        ]);
714    }
715}