solar_parse/lexer/
mod.rs

1//! Solidity and Yul lexer.
2
3use solar_ast::{
4    Base, StrKind,
5    token::{CommentKind, Token, TokenKind, TokenLitKind},
6};
7use solar_data_structures::hint::cold_path;
8use solar_interface::{
9    BytePos, Session, Span, Symbol, diagnostics::DiagCtxt, source_map::SourceFile,
10};
11
12mod cursor;
13use cursor::token::{RawLiteralKind, RawToken, RawTokenKind};
14pub use cursor::*;
15
16pub mod unescape;
17
18mod unicode_chars;
19
20mod utf8;
21
22/// Solidity and Yul lexer.
23///
24/// Converts a [`Cursor`]'s output from simple [`RawTokenKind`]s into rich [`TokenKind`]s, by
25/// converting strings into interned symbols, and running additional validation.
26pub struct Lexer<'sess, 'src> {
27    /// Cursor for getting lexer tokens.
28    cursor: Cursor<'src>,
29    /// The absolute offset within the source_map of the current character.
30    pos: BytePos,
31
32    /// The parsing context.
33    pub(crate) sess: &'sess Session,
34    /// Initial position, read-only.
35    start_pos: BytePos,
36    /// Source text to tokenize.
37    src: &'src str,
38
39    /// When a "unknown start of token: \u{a0}" has already been emitted earlier
40    /// in this file, it's safe to treat further occurrences of the non-breaking
41    /// space character as whitespace.
42    nbsp_is_whitespace: bool,
43}
44
45impl<'sess, 'src> Lexer<'sess, 'src> {
46    /// Creates a new `Lexer` for the given source string.
47    pub fn new(sess: &'sess Session, src: &'src str) -> Self {
48        Self::with_start_pos(sess, src, BytePos(0))
49    }
50
51    /// Creates a new `Lexer` for the given source file.
52    ///
53    /// Note that the source file must be added to the source map before calling this function.
54    pub fn from_source_file(sess: &'sess Session, file: &'src SourceFile) -> Self {
55        Self::with_start_pos(sess, &file.src, file.start_pos)
56    }
57
58    /// Creates a new `Lexer` for the given source string and starting position.
59    pub fn with_start_pos(sess: &'sess Session, src: &'src str, start_pos: BytePos) -> Self {
60        assert!(sess.is_entered(), "session should be entered before lexing");
61        Self {
62            sess,
63            start_pos,
64            pos: start_pos,
65            src,
66            cursor: Cursor::new(src),
67            nbsp_is_whitespace: false,
68        }
69    }
70
71    /// Returns a reference to the diagnostic context.
72    #[inline]
73    pub fn dcx(&self) -> &'sess DiagCtxt {
74        &self.sess.dcx
75    }
76
77    /// Consumes the lexer and collects the remaining tokens into a vector.
78    ///
79    /// Note that this skips comments, as [required by the parser](crate::Parser::new).
80    ///
81    /// Prefer using this method instead of manually collecting tokens using [`Iterator`].
82    #[instrument(name = "lex", level = "debug", skip_all)]
83    pub fn into_tokens(mut self) -> Vec<Token> {
84        // This is an estimate of the number of tokens in the source.
85        let mut tokens = Vec::with_capacity(self.src.len() / 4);
86        loop {
87            let token = self.slop();
88            if token.is_eof() {
89                break;
90            }
91            if token.is_comment() {
92                continue;
93            }
94            tokens.push(token);
95        }
96        trace!(
97            src.len = self.src.len(),
98            tokens.len = tokens.len(),
99            tokens.capacity = tokens.capacity(),
100            ratio = %format_args!("{:.2}", self.src.len() as f64 / tokens.len() as f64),
101            "lexed"
102        );
103        tokens
104    }
105
106    /// Slops up a token from the input string.
107    ///
108    /// Advances the lexer by the length of the token.
109    /// Prefer using `self` as an iterator instead.
110    pub fn slop(&mut self) -> Token {
111        let mut swallow_next_invalid = 0;
112        loop {
113            let RawToken { kind: raw_kind, len } = self.cursor.slop();
114            let start = self.pos;
115            self.pos += len;
116
117            // Now "cook" the token, converting the simple `RawTokenKind` into a rich `TokenKind`.
118            // This turns strings into interned symbols and runs additional validation.
119            let kind = match raw_kind {
120                RawTokenKind::LineComment { is_doc } => {
121                    // Opening delimiter is not included into the symbol.
122                    let content_start = start + BytePos(if is_doc { 3 } else { 2 });
123                    let content = self.str_from(content_start);
124                    self.cook_doc_comment(content_start, content, is_doc, CommentKind::Line)
125                }
126                RawTokenKind::BlockComment { is_doc, terminated } => {
127                    if !terminated {
128                        cold_path();
129                        let msg = if is_doc {
130                            "unterminated block doc-comment"
131                        } else {
132                            "unterminated block comment"
133                        };
134                        self.dcx().err(msg).span(self.new_span(start, self.pos)).emit();
135                    }
136
137                    // Opening delimiter and closing delimiter are not included into the symbol.
138                    let content_start = start + BytePos(if is_doc { 3 } else { 2 });
139                    let content_end = self.pos - (terminated as u32) * 2;
140                    let content = self.str_from_to(content_start, content_end);
141                    self.cook_doc_comment(content_start, content, is_doc, CommentKind::Block)
142                }
143                RawTokenKind::Whitespace => {
144                    continue;
145                }
146                RawTokenKind::Ident => {
147                    let sym = self.symbol_from(start);
148                    TokenKind::Ident(sym)
149                }
150                RawTokenKind::Literal { kind } => {
151                    let (kind, symbol) = self.cook_literal(start, self.pos, kind);
152                    TokenKind::Literal(kind, symbol)
153                }
154
155                // Expression-operator symbols.
156                RawTokenKind::Eq => TokenKind::Eq,
157                RawTokenKind::Lt => TokenKind::Lt,
158                RawTokenKind::Le => TokenKind::Le,
159                RawTokenKind::EqEq => TokenKind::EqEq,
160                RawTokenKind::Ne => TokenKind::Ne,
161                RawTokenKind::Ge => TokenKind::Ge,
162                RawTokenKind::Gt => TokenKind::Gt,
163                RawTokenKind::AndAnd => TokenKind::AndAnd,
164                RawTokenKind::OrOr => TokenKind::OrOr,
165                RawTokenKind::Not => TokenKind::Not,
166                RawTokenKind::Tilde => TokenKind::Tilde,
167                RawTokenKind::Walrus => TokenKind::Walrus,
168                RawTokenKind::PlusPlus => TokenKind::PlusPlus,
169                RawTokenKind::MinusMinus => TokenKind::MinusMinus,
170                RawTokenKind::StarStar => TokenKind::StarStar,
171                RawTokenKind::BinOp(binop) => TokenKind::BinOp(binop),
172                RawTokenKind::BinOpEq(binop) => TokenKind::BinOpEq(binop),
173
174                // Structural symbols.
175                RawTokenKind::At => TokenKind::At,
176                RawTokenKind::Dot => TokenKind::Dot,
177                RawTokenKind::Comma => TokenKind::Comma,
178                RawTokenKind::Semi => TokenKind::Semi,
179                RawTokenKind::Colon => TokenKind::Colon,
180                RawTokenKind::Arrow => TokenKind::Arrow,
181                RawTokenKind::FatArrow => TokenKind::FatArrow,
182                RawTokenKind::Question => TokenKind::Question,
183                RawTokenKind::OpenDelim(delim) => TokenKind::OpenDelim(delim),
184                RawTokenKind::CloseDelim(delim) => TokenKind::CloseDelim(delim),
185
186                RawTokenKind::Unknown => {
187                    if let Some(token) = self.handle_unknown_token(start, &mut swallow_next_invalid)
188                    {
189                        token
190                    } else {
191                        continue;
192                    }
193                }
194
195                RawTokenKind::Eof => TokenKind::Eof,
196            };
197            let span = self.new_span(start, self.pos);
198            return Token::new(kind, span);
199        }
200    }
201
202    #[cold]
203    fn handle_unknown_token(
204        &mut self,
205        start: BytePos,
206        swallow_next_invalid: &mut usize,
207    ) -> Option<TokenKind> {
208        // Don't emit diagnostics for sequences of the same invalid token
209        if *swallow_next_invalid > 0 {
210            *swallow_next_invalid -= 1;
211            return None;
212        }
213        let mut it = self.str_from_to_end(start).chars();
214        let c = it.next().unwrap();
215        if c == '\u{00a0}' {
216            // If an error has already been reported on non-breaking
217            // space characters earlier in the file, treat all
218            // subsequent occurrences as whitespace.
219            if self.nbsp_is_whitespace {
220                return None;
221            }
222            self.nbsp_is_whitespace = true;
223        }
224
225        let repeats = it.take_while(|c1| *c1 == c).count();
226        *swallow_next_invalid = repeats;
227
228        let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats + 1);
229
230        let span = self.new_span(start, self.pos + BytePos::from_usize(repeats * c.len_utf8()));
231        let msg = format!("unknown start of token: {}", escaped_char(c));
232        let mut err = self.dcx().err(msg).span(span);
233        if let Some(sugg) = sugg {
234            match sugg {
235                unicode_chars::TokenSubstitution::DirectedQuotes {
236                    span,
237                    suggestion: _,
238                    ascii_str,
239                    ascii_name,
240                } => {
241                    let msg = format!(
242                        "Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '{ascii_str}' ({ascii_name}), but are not"
243                    );
244                    err = err.span_help(span, msg);
245                }
246                unicode_chars::TokenSubstitution::Other {
247                    span,
248                    suggestion: _,
249                    ch,
250                    u_name,
251                    ascii_str,
252                    ascii_name,
253                } => {
254                    let msg = format!(
255                        "Unicode character '{ch}' ({u_name}) looks like '{ascii_str}' ({ascii_name}), but it is not"
256                    );
257                    err = err.span_help(span, msg);
258                }
259            }
260        }
261        if c == '\0' {
262            let help = "source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used";
263            err = err.help(help);
264        }
265        if repeats > 0 {
266            let note = match repeats {
267                1 => "once more".to_string(),
268                _ => format!("{repeats} more times"),
269            };
270            err = err.note(format!("character repeats {note}"));
271        }
272        err.emit();
273
274        token
275    }
276
277    fn cook_doc_comment(
278        &self,
279        _content_start: BytePos,
280        content: &str,
281        is_doc: bool,
282        comment_kind: CommentKind,
283    ) -> TokenKind {
284        TokenKind::Comment(is_doc, comment_kind, self.intern(content))
285    }
286
287    fn cook_literal(
288        &self,
289        start: BytePos,
290        end: BytePos,
291        kind: RawLiteralKind,
292    ) -> (TokenLitKind, Symbol) {
293        match kind {
294            RawLiteralKind::Str { kind, terminated } => {
295                if !terminated {
296                    cold_path();
297                    let span = self.new_span(start, end);
298                    let guar = self.dcx().err("unterminated string").span(span).emit();
299                    (TokenLitKind::Err(guar), self.symbol_from_to(start, end))
300                } else {
301                    (kind.into(), self.cook_quoted(kind, start, end))
302                }
303            }
304            RawLiteralKind::Int { base, empty_int } => {
305                if empty_int {
306                    cold_path();
307                    let span = self.new_span(start, end);
308                    self.dcx().err("no valid digits found for number").span(span).emit();
309                    (TokenLitKind::Integer, self.symbol_from_to(start, end))
310                } else {
311                    if matches!(base, Base::Binary | Base::Octal) {
312                        cold_path();
313                        let start = start + 2;
314                        // To uncomment if binary and octal literals are ever supported.
315                        /*
316                        let base = base as u32;
317                        let s = self.str_from_to(start, end);
318                        for (i, c) in s.char_indices() {
319                            if c != '_' && c.to_digit(base).is_none() {
320                                cold_path();
321                                let msg = format!("invalid digit for a base {base} literal");
322                                let lo = start + BytePos::from_usize(i);
323                                let hi = lo + BytePos::from_usize(c.len_utf8());
324                                let span = self.new_span(lo, hi);
325                                self.dcx().err(msg).span(span).emit();
326                            }
327                        }
328                        */
329                        let msg = format!("integers in base {base} are not supported");
330                        self.dcx().err(msg).span(self.new_span(start, end)).emit();
331                    }
332                    (TokenLitKind::Integer, self.symbol_from_to(start, end))
333                }
334            }
335            RawLiteralKind::Rational { base, empty_exponent } => {
336                if empty_exponent {
337                    cold_path();
338                    let span = self.new_span(start, self.pos);
339                    self.dcx().err("expected at least one digit in exponent").span(span).emit();
340                }
341
342                let unsupported_base =
343                    matches!(base, Base::Binary | Base::Octal | Base::Hexadecimal);
344                if unsupported_base {
345                    cold_path();
346                    let msg = format!("{base} rational numbers are not supported");
347                    self.dcx().err(msg).span(self.new_span(start, end)).emit();
348                }
349
350                (TokenLitKind::Rational, self.symbol_from_to(start, end))
351            }
352        }
353    }
354
355    fn cook_quoted(&self, kind: StrKind, start: BytePos, end: BytePos) -> Symbol {
356        // Account for quote (`"` or `'`) and prefix.
357        let content_start = start + 1 + BytePos(kind.prefix().len() as u32);
358        let content_end = end - 1;
359        let lit_content = self.str_from_to(content_start, content_end);
360        self.intern(lit_content)
361    }
362
363    #[inline]
364    fn new_span(&self, lo: BytePos, hi: BytePos) -> Span {
365        Span::new_unchecked(lo, hi)
366    }
367
368    #[inline]
369    fn src_index(&self, pos: BytePos) -> usize {
370        (pos - self.start_pos).to_usize()
371    }
372
373    /// Slice of the source text from `start` up to but excluding `self.pos`,
374    /// meaning the slice does not include the character `self.ch`.
375    #[cfg_attr(debug_assertions, track_caller)]
376    fn symbol_from(&self, start: BytePos) -> Symbol {
377        self.symbol_from_to(start, self.pos)
378    }
379
380    /// Slice of the source text from `start` up to but excluding `self.pos`,
381    /// meaning the slice does not include the character `self.ch`.
382    #[cfg_attr(debug_assertions, track_caller)]
383    fn str_from(&self, start: BytePos) -> &'src str {
384        self.str_from_to(start, self.pos)
385    }
386
387    /// Same as `symbol_from`, with an explicit endpoint.
388    #[cfg_attr(debug_assertions, track_caller)]
389    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
390        self.intern(self.str_from_to(start, end))
391    }
392
393    /// Slice of the source text spanning from `start` until the end.
394    #[cfg_attr(debug_assertions, track_caller)]
395    fn str_from_to_end(&self, start: BytePos) -> &'src str {
396        self.str_from_to(start, self.start_pos + BytePos::from_usize(self.src.len()))
397    }
398
399    /// Slice of the source text spanning from `start` up to but excluding `end`.
400    #[cfg_attr(debug_assertions, track_caller)]
401    fn str_from_to(&self, start: BytePos, end: BytePos) -> &'src str {
402        let range = self.src_index(start)..self.src_index(end);
403        if cfg!(debug_assertions) {
404            &self.src[range]
405        } else {
406            // SAFETY: Should never be out of bounds.
407            unsafe { self.src.get_unchecked(range) }
408        }
409    }
410
411    fn intern(&self, s: &str) -> Symbol {
412        self.sess.intern(s)
413    }
414}
415
416impl Iterator for Lexer<'_, '_> {
417    type Item = Token;
418
419    #[inline]
420    fn next(&mut self) -> Option<Token> {
421        let token = self.slop();
422        if token.is_eof() { None } else { Some(token) }
423    }
424}
425
426impl std::iter::FusedIterator for Lexer<'_, '_> {}
427
428/// Pushes a character to a message string for error reporting
429fn escaped_char(c: char) -> String {
430    match c {
431        '\u{20}'..='\u{7e}' => {
432            // Don't escape \, ' or " for user-facing messages
433            c.to_string()
434        }
435        _ => c.escape_default().to_string(),
436    }
437}
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442    use TokenKind::*;
443    use solar_ast::token::BinOpToken::*;
444    use std::ops::Range;
445
446    type Expected<'a> = &'a [(Range<usize>, TokenKind)];
447
448    // Run through the lexer to get the same input that the parser gets.
449    #[track_caller]
450    fn lex(src: &str, should_fail: bool, f: fn(Expected<'_>)) {
451        let sess =
452            Session::builder().with_buffer_emitter(Default::default()).single_threaded().build();
453        sess.enter_sequential(|| {
454            let file = sess.source_map().new_source_file("test".to_string(), src).unwrap();
455            let tokens: Vec<_> = Lexer::from_source_file(&sess, &file)
456                .filter(|t| !t.is_comment())
457                .map(|t| (t.span.lo().to_usize()..t.span.hi().to_usize(), t.kind))
458                .collect();
459            let diags = sess.dcx.emitted_diagnostics().unwrap();
460            assert_eq!(
461                sess.dcx.has_errors().is_err(),
462                should_fail,
463                "{src:?} -> {tokens:?};\n{diags}",
464            );
465            f(&tokens)
466        });
467    }
468
469    macro_rules! checks {
470        ($( ($src:expr, $expected:expr $(,)?) ),* $(,)?) => {
471            checks_full! { $( ($src, false, $expected) ),* }
472        };
473    }
474
475    macro_rules! checks_full {
476        ($( ($src:expr, $should_fail:expr, $expected:expr $(,)?) ),* $(,)?) => {{
477            $(
478                lex($src, $should_fail, |tokens| assert_eq!(tokens, $expected, "{:?}", $src));
479            )*
480        }};
481    }
482
483    fn lit(kind: TokenLitKind, symbol: &str) -> TokenKind {
484        Literal(kind, sym(symbol))
485    }
486
487    fn id(symbol: &str) -> TokenKind {
488        Ident(sym(symbol))
489    }
490
491    fn sym(s: &str) -> Symbol {
492        Symbol::intern(s)
493    }
494
495    #[test]
496    fn empty() {
497        checks![
498            ("", &[]),
499            (" ", &[]),
500            (" \n", &[]),
501            ("\n", &[]),
502            ("\n\t", &[]),
503            ("\n \t", &[]),
504            ("\n \t ", &[]),
505            (" \n \t \t", &[]),
506        ];
507    }
508
509    #[test]
510    fn literals() {
511        use TokenLitKind::*;
512        checks![
513            ("\"\"", &[(0..2, lit(Str, ""))]),
514            ("\"\"\"\"", &[(0..2, lit(Str, "")), (2..4, lit(Str, ""))]),
515            ("\"\" \"\"", &[(0..2, lit(Str, "")), (3..5, lit(Str, ""))]),
516            ("\"\\\"\"", &[(0..4, lit(Str, "\\\""))]),
517            ("unicode\"\"", &[(0..9, lit(UnicodeStr, ""))]),
518            ("unicode \"\"", &[(0..7, id("unicode")), (8..10, lit(Str, ""))]),
519            ("hex\"\"", &[(0..5, lit(HexStr, ""))]),
520            ("hex \"\"", &[(0..3, id("hex")), (4..6, lit(Str, ""))]),
521            //
522            ("0", &[(0..1, lit(Integer, "0"))]),
523            ("0a", &[(0..1, lit(Integer, "0")), (1..2, id("a"))]),
524            ("0.e1", &[(0..1, lit(Integer, "0")), (1..2, Dot), (2..4, id("e1"))]),
525            (
526                "0.e-1",
527                &[
528                    (0..1, lit(Integer, "0")),
529                    (1..2, Dot),
530                    (2..3, id("e")),
531                    (3..4, BinOp(Minus)),
532                    (4..5, lit(Integer, "1")),
533                ],
534            ),
535            ("0.0", &[(0..3, lit(Rational, "0.0"))]),
536            ("0.", &[(0..2, lit(Rational, "0."))]),
537            (".0", &[(0..2, lit(Rational, ".0"))]),
538            ("0.0e1", &[(0..5, lit(Rational, "0.0e1"))]),
539            ("0.0e-1", &[(0..6, lit(Rational, "0.0e-1"))]),
540            ("0e1", &[(0..3, lit(Rational, "0e1"))]),
541            ("0e1.", &[(0..3, lit(Rational, "0e1")), (3..4, Dot)]),
542        ];
543
544        checks_full![
545            ("0b0", true, &[(0..3, lit(Integer, "0b0"))]),
546            ("0B0", false, &[(0..1, lit(Integer, "0")), (1..3, id("B0"))]),
547            ("0o0", true, &[(0..3, lit(Integer, "0o0"))]),
548            ("0O0", false, &[(0..1, lit(Integer, "0")), (1..3, id("O0"))]),
549            ("0xa", false, &[(0..3, lit(Integer, "0xa"))]),
550            ("0Xa", false, &[(0..1, lit(Integer, "0")), (1..3, id("Xa"))]),
551        ];
552    }
553
554    #[test]
555    fn idents() {
556        checks![
557            ("$", &[(0..1, id("$"))]),
558            ("a$", &[(0..2, id("a$"))]),
559            ("a_$123_", &[(0..7, id("a_$123_"))]),
560            ("   b", &[(3..4, id("b"))]),
561            (" c\t ", &[(1..2, id("c"))]),
562            (" \td ", &[(2..3, id("d"))]),
563            (" \t\nef ", &[(3..5, id("ef"))]),
564            (" \t\n\tghi ", &[(4..7, id("ghi"))]),
565        ];
566    }
567
568    #[test]
569    fn doc_comments() {
570        use CommentKind::*;
571
572        fn doc(kind: CommentKind, symbol: &str) -> TokenKind {
573            Comment(true, kind, sym(symbol))
574        }
575
576        checks![
577            ("// line comment", &[]),
578            ("// / line comment", &[]),
579            ("// ! line comment", &[]),
580            ("// /* line comment", &[]), // */ <-- aaron-bond.better-comments doesn't like this
581            ("/// line doc-comment", &[(0..20, doc(Line, " line doc-comment"))]),
582            ("//// invalid doc-comment", &[]),
583            ("///// invalid doc-comment", &[]),
584            //
585            ("/**/", &[]),
586            ("/***/", &[]),
587            ("/****/", &[]),
588            ("/*/*/", &[]),
589            ("/* /*/", &[]),
590            ("/*/**/", &[]),
591            ("/* /**/", &[]),
592            ("/* normal block comment */", &[]),
593            ("/* /* normal block comment */", &[]),
594            ("/** block doc-comment */", &[(0..24, doc(Block, " block doc-comment "))]),
595            ("/** /* block doc-comment */", &[(0..27, doc(Block, " /* block doc-comment "))]),
596            ("/** block doc-comment /*/", &[(0..25, doc(Block, " block doc-comment /"))]),
597        ];
598    }
599
600    #[test]
601    fn operators() {
602        use solar_ast::token::Delimiter::*;
603        // From Solc `TOKEN_LIST`: https://github.com/argotorg/solidity/blob/194b114664c7daebc2ff68af3c573272f5d28913/liblangutil/Token.h#L67
604        checks![
605            (")", &[(0..1, CloseDelim(Parenthesis))]),
606            ("(", &[(0..1, OpenDelim(Parenthesis))]),
607            ("[", &[(0..1, OpenDelim(Bracket))]),
608            ("]", &[(0..1, CloseDelim(Bracket))]),
609            ("{", &[(0..1, OpenDelim(Brace))]),
610            ("}", &[(0..1, CloseDelim(Brace))]),
611            (":", &[(0..1, Colon)]),
612            (";", &[(0..1, Semi)]),
613            (".", &[(0..1, Dot)]),
614            ("?", &[(0..1, Question)]),
615            ("=>", &[(0..2, FatArrow)]),
616            ("->", &[(0..2, Arrow)]),
617            ("=", &[(0..1, Eq)]),
618            ("|=", &[(0..2, BinOpEq(Or))]),
619            ("^=", &[(0..2, BinOpEq(Caret))]),
620            ("&=", &[(0..2, BinOpEq(And))]),
621            ("<<=", &[(0..3, BinOpEq(Shl))]),
622            (">>=", &[(0..3, BinOpEq(Shr))]),
623            (">>>=", &[(0..4, BinOpEq(Sar))]),
624            ("+=", &[(0..2, BinOpEq(Plus))]),
625            ("-=", &[(0..2, BinOpEq(Minus))]),
626            ("*=", &[(0..2, BinOpEq(Star))]),
627            ("/=", &[(0..2, BinOpEq(Slash))]),
628            ("%=", &[(0..2, BinOpEq(Percent))]),
629            (",", &[(0..1, Comma)]),
630            ("||", &[(0..2, OrOr)]),
631            ("&&", &[(0..2, AndAnd)]),
632            ("|", &[(0..1, BinOp(Or))]),
633            ("^", &[(0..1, BinOp(Caret))]),
634            ("&", &[(0..1, BinOp(And))]),
635            ("<<", &[(0..2, BinOp(Shl))]),
636            (">>", &[(0..2, BinOp(Shr))]),
637            (">>>", &[(0..3, BinOp(Sar))]),
638            ("+", &[(0..1, BinOp(Plus))]),
639            ("-", &[(0..1, BinOp(Minus))]),
640            ("*", &[(0..1, BinOp(Star))]),
641            ("/", &[(0..1, BinOp(Slash))]),
642            ("%", &[(0..1, BinOp(Percent))]),
643            ("**", &[(0..2, StarStar)]),
644            ("==", &[(0..2, EqEq)]),
645            ("!=", &[(0..2, Ne)]),
646            ("<", &[(0..1, Lt)]),
647            (">", &[(0..1, Gt)]),
648            ("<=", &[(0..2, Le)]),
649            (">=", &[(0..2, Ge)]),
650            ("!", &[(0..1, Not)]),
651            ("~", &[(0..1, Tilde)]),
652            ("++", &[(0..2, PlusPlus)]),
653            ("--", &[(0..2, MinusMinus)]),
654            (":=", &[(0..2, Walrus)]),
655        ];
656    }
657
658    #[test]
659    fn glueing() {
660        checks![
661            ("=", &[(0..1, Eq)]),
662            ("==", &[(0..2, EqEq)]),
663            ("= =", &[(0..1, Eq), (2..3, Eq)]),
664            ("===", &[(0..2, EqEq), (2..3, Eq)]),
665            ("== =", &[(0..2, EqEq), (3..4, Eq)]),
666            ("= ==", &[(0..1, Eq), (2..4, EqEq)]),
667            ("====", &[(0..2, EqEq), (2..4, EqEq)]),
668            ("== ==", &[(0..2, EqEq), (3..5, EqEq)]),
669            ("= ===", &[(0..1, Eq), (2..4, EqEq), (4..5, Eq)]),
670            ("=====", &[(0..2, EqEq), (2..4, EqEq), (4..5, Eq)]),
671            //
672            (" <", &[(1..2, Lt)]),
673            (" <=", &[(1..3, Le)]),
674            (" < =", &[(1..2, Lt), (3..4, Eq)]),
675            (" <<", &[(1..3, BinOp(Shl))]),
676            (" <<=", &[(1..4, BinOpEq(Shl))]),
677            //
678            (" >", &[(1..2, Gt)]),
679            (" >=", &[(1..3, Ge)]),
680            (" > =", &[(1..2, Gt), (3..4, Eq)]),
681            (" >>", &[(1..3, BinOp(Shr))]),
682            (" >>>", &[(1..4, BinOp(Sar))]),
683            (" >>>=", &[(1..5, BinOpEq(Sar))]),
684            //
685            ("+", &[(0..1, BinOp(Plus))]),
686            ("++", &[(0..2, PlusPlus)]),
687            ("+++", &[(0..2, PlusPlus), (2..3, BinOp(Plus))]),
688            ("+ =", &[(0..1, BinOp(Plus)), (2..3, Eq)]),
689            ("+ +=", &[(0..1, BinOp(Plus)), (2..4, BinOpEq(Plus))]),
690            ("+++=", &[(0..2, PlusPlus), (2..4, BinOpEq(Plus))]),
691            ("+ +", &[(0..1, BinOp(Plus)), (2..3, BinOp(Plus))]),
692            //
693            ("-", &[(0..1, BinOp(Minus))]),
694            ("--", &[(0..2, MinusMinus)]),
695            ("---", &[(0..2, MinusMinus), (2..3, BinOp(Minus))]),
696            ("- =", &[(0..1, BinOp(Minus)), (2..3, Eq)]),
697            ("- -=", &[(0..1, BinOp(Minus)), (2..4, BinOpEq(Minus))]),
698            ("---=", &[(0..2, MinusMinus), (2..4, BinOpEq(Minus))]),
699            ("- -", &[(0..1, BinOp(Minus)), (2..3, BinOp(Minus))]),
700        ];
701    }
702}