solar_parse/lexer/
mod.rs

1//! Solidity and Yul lexer.
2
3use solar_ast::{
4    Base, StrKind,
5    token::{CommentKind, Token, TokenKind, TokenLitKind},
6};
7use solar_data_structures::hint::cold_path;
8use solar_interface::{
9    BytePos, Session, Span, Symbol, diagnostics::DiagCtxt, source_map::SourceFile,
10};
11
12mod cursor;
13use cursor::token::{RawLiteralKind, RawToken, RawTokenKind};
14pub use cursor::*;
15
16pub mod unescape;
17
18mod unicode_chars;
19
20mod utf8;
21
22/// Solidity and Yul lexer.
23///
24/// Converts a [`Cursor`]'s output from simple [`RawTokenKind`]s into rich [`TokenKind`]s, by
25/// converting strings into interned symbols, and running additional validation.
26pub struct Lexer<'sess, 'src> {
27    /// Cursor for getting lexer tokens.
28    cursor: Cursor<'src>,
29    /// The absolute offset within the source_map of the current character.
30    pos: BytePos,
31
32    /// The parsing context.
33    pub(crate) sess: &'sess Session,
34    /// Initial position, read-only.
35    start_pos: BytePos,
36    /// Source text to tokenize.
37    src: &'src str,
38
39    /// When a "unknown start of token: \u{a0}" has already been emitted earlier
40    /// in this file, it's safe to treat further occurrences of the non-breaking
41    /// space character as whitespace.
42    nbsp_is_whitespace: bool,
43}
44
45impl<'sess, 'src> Lexer<'sess, 'src> {
46    /// Creates a new `Lexer` for the given source string.
47    pub fn new(sess: &'sess Session, src: &'src str) -> Self {
48        Self::with_start_pos(sess, src, BytePos(0))
49    }
50
51    /// Creates a new `Lexer` for the given source file.
52    ///
53    /// Note that the source file must be added to the source map before calling this function.
54    pub fn from_source_file(sess: &'sess Session, file: &'src SourceFile) -> Self {
55        Self::with_start_pos(sess, &file.src, file.start_pos)
56    }
57
58    /// Creates a new `Lexer` for the given source string and starting position.
59    pub fn with_start_pos(sess: &'sess Session, src: &'src str, start_pos: BytePos) -> Self {
60        Self {
61            sess,
62            start_pos,
63            pos: start_pos,
64            src,
65            cursor: Cursor::new(src),
66            nbsp_is_whitespace: false,
67        }
68    }
69
70    /// Returns a reference to the diagnostic context.
71    #[inline]
72    pub fn dcx(&self) -> &'sess DiagCtxt {
73        &self.sess.dcx
74    }
75
76    /// Consumes the lexer and collects the remaining tokens into a vector.
77    ///
78    /// Note that this skips comments, as [required by the parser](crate::Parser::new).
79    ///
80    /// Prefer using this method instead of manually collecting tokens using [`Iterator`].
81    #[instrument(name = "lex", level = "debug", skip_all)]
82    pub fn into_tokens(mut self) -> Vec<Token> {
83        // This is an estimate of the number of tokens in the source.
84        let mut tokens = Vec::with_capacity(self.src.len() / 4);
85        loop {
86            let token = self.slop();
87            if token.is_eof() {
88                break;
89            }
90            if token.is_comment() {
91                continue;
92            }
93            tokens.push(token);
94        }
95        trace!(
96            src.len = self.src.len(),
97            tokens.len = tokens.len(),
98            tokens.capacity = tokens.capacity(),
99            ratio = %format_args!("{:.2}", self.src.len() as f64 / tokens.len() as f64),
100            "lexed"
101        );
102        tokens
103    }
104
105    /// Slops up a token from the input string.
106    ///
107    /// Advances the lexer by the length of the token.
108    /// Prefer using `self` as an iterator instead.
109    pub fn slop(&mut self) -> Token {
110        let mut swallow_next_invalid = 0;
111        loop {
112            let RawToken { kind: raw_kind, len } = self.cursor.slop();
113            let start = self.pos;
114            self.pos += len;
115
116            // Now "cook" the token, converting the simple `RawTokenKind` into a rich `TokenKind`.
117            // This turns strings into interned symbols and runs additional validation.
118            let kind = match raw_kind {
119                RawTokenKind::LineComment { is_doc } => {
120                    // Opening delimiter is not included into the symbol.
121                    let content_start = start + BytePos(if is_doc { 3 } else { 2 });
122                    let content = self.str_from(content_start);
123                    self.cook_doc_comment(content_start, content, is_doc, CommentKind::Line)
124                }
125                RawTokenKind::BlockComment { is_doc, terminated } => {
126                    if !terminated {
127                        cold_path();
128                        let msg = if is_doc {
129                            "unterminated block doc-comment"
130                        } else {
131                            "unterminated block comment"
132                        };
133                        self.dcx().err(msg).span(self.new_span(start, self.pos)).emit();
134                    }
135
136                    // Opening delimiter and closing delimiter are not included into the symbol.
137                    let content_start = start + BytePos(if is_doc { 3 } else { 2 });
138                    let content_end = self.pos - (terminated as u32) * 2;
139                    let content = self.str_from_to(content_start, content_end);
140                    self.cook_doc_comment(content_start, content, is_doc, CommentKind::Block)
141                }
142                RawTokenKind::Whitespace => {
143                    continue;
144                }
145                RawTokenKind::Ident => {
146                    let sym = self.symbol_from(start);
147                    TokenKind::Ident(sym)
148                }
149                RawTokenKind::Literal { kind } => {
150                    let (kind, symbol) = self.cook_literal(start, self.pos, kind);
151                    TokenKind::Literal(kind, symbol)
152                }
153
154                // Expression-operator symbols.
155                RawTokenKind::Eq => TokenKind::Eq,
156                RawTokenKind::Lt => TokenKind::Lt,
157                RawTokenKind::Le => TokenKind::Le,
158                RawTokenKind::EqEq => TokenKind::EqEq,
159                RawTokenKind::Ne => TokenKind::Ne,
160                RawTokenKind::Ge => TokenKind::Ge,
161                RawTokenKind::Gt => TokenKind::Gt,
162                RawTokenKind::AndAnd => TokenKind::AndAnd,
163                RawTokenKind::OrOr => TokenKind::OrOr,
164                RawTokenKind::Not => TokenKind::Not,
165                RawTokenKind::Tilde => TokenKind::Tilde,
166                RawTokenKind::Walrus => TokenKind::Walrus,
167                RawTokenKind::PlusPlus => TokenKind::PlusPlus,
168                RawTokenKind::MinusMinus => TokenKind::MinusMinus,
169                RawTokenKind::StarStar => TokenKind::StarStar,
170                RawTokenKind::BinOp(binop) => TokenKind::BinOp(binop),
171                RawTokenKind::BinOpEq(binop) => TokenKind::BinOpEq(binop),
172
173                // Structural symbols.
174                RawTokenKind::At => TokenKind::At,
175                RawTokenKind::Dot => TokenKind::Dot,
176                RawTokenKind::Comma => TokenKind::Comma,
177                RawTokenKind::Semi => TokenKind::Semi,
178                RawTokenKind::Colon => TokenKind::Colon,
179                RawTokenKind::Arrow => TokenKind::Arrow,
180                RawTokenKind::FatArrow => TokenKind::FatArrow,
181                RawTokenKind::Question => TokenKind::Question,
182                RawTokenKind::OpenDelim(delim) => TokenKind::OpenDelim(delim),
183                RawTokenKind::CloseDelim(delim) => TokenKind::CloseDelim(delim),
184
185                RawTokenKind::Unknown => {
186                    if let Some(token) = self.handle_unknown_token(start, &mut swallow_next_invalid)
187                    {
188                        token
189                    } else {
190                        continue;
191                    }
192                }
193
194                RawTokenKind::Eof => TokenKind::Eof,
195            };
196            let span = self.new_span(start, self.pos);
197            return Token::new(kind, span);
198        }
199    }
200
201    #[cold]
202    fn handle_unknown_token(
203        &mut self,
204        start: BytePos,
205        swallow_next_invalid: &mut usize,
206    ) -> Option<TokenKind> {
207        // Don't emit diagnostics for sequences of the same invalid token
208        if *swallow_next_invalid > 0 {
209            *swallow_next_invalid -= 1;
210            return None;
211        }
212        let mut it = self.str_from_to_end(start).chars();
213        let c = it.next().unwrap();
214        if c == '\u{00a0}' {
215            // If an error has already been reported on non-breaking
216            // space characters earlier in the file, treat all
217            // subsequent occurrences as whitespace.
218            if self.nbsp_is_whitespace {
219                return None;
220            }
221            self.nbsp_is_whitespace = true;
222        }
223
224        let repeats = it.take_while(|c1| *c1 == c).count();
225        *swallow_next_invalid = repeats;
226
227        let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats + 1);
228
229        let span = self.new_span(start, self.pos + BytePos::from_usize(repeats * c.len_utf8()));
230        let msg = format!("unknown start of token: {}", escaped_char(c));
231        let mut err = self.dcx().err(msg).span(span);
232        if let Some(sugg) = sugg {
233            match sugg {
234                unicode_chars::TokenSubstitution::DirectedQuotes {
235                    span,
236                    suggestion: _,
237                    ascii_str,
238                    ascii_name,
239                } => {
240                    let msg = format!(
241                        "Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '{ascii_str}' ({ascii_name}), but are not"
242                    );
243                    err = err.span_help(span, msg);
244                }
245                unicode_chars::TokenSubstitution::Other {
246                    span,
247                    suggestion: _,
248                    ch,
249                    u_name,
250                    ascii_str,
251                    ascii_name,
252                } => {
253                    let msg = format!(
254                        "Unicode character '{ch}' ({u_name}) looks like '{ascii_str}' ({ascii_name}), but it is not"
255                    );
256                    err = err.span_help(span, msg);
257                }
258            }
259        }
260        if c == '\0' {
261            let help = "source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used";
262            err = err.help(help);
263        }
264        if repeats > 0 {
265            let note = match repeats {
266                1 => "once more".to_string(),
267                _ => format!("{repeats} more times"),
268            };
269            err = err.note(format!("character repeats {note}"));
270        }
271        err.emit();
272
273        token
274    }
275
276    fn cook_doc_comment(
277        &self,
278        _content_start: BytePos,
279        content: &str,
280        is_doc: bool,
281        comment_kind: CommentKind,
282    ) -> TokenKind {
283        TokenKind::Comment(is_doc, comment_kind, Symbol::intern(content))
284    }
285
286    fn cook_literal(
287        &self,
288        start: BytePos,
289        end: BytePos,
290        kind: RawLiteralKind,
291    ) -> (TokenLitKind, Symbol) {
292        match kind {
293            RawLiteralKind::Str { kind, terminated } => {
294                if !terminated {
295                    cold_path();
296                    let span = self.new_span(start, end);
297                    let guar = self.dcx().err("unterminated string").span(span).emit();
298                    (TokenLitKind::Err(guar), self.symbol_from_to(start, end))
299                } else {
300                    (kind.into(), self.cook_quoted(kind, start, end))
301                }
302            }
303            RawLiteralKind::Int { base, empty_int } => {
304                if empty_int {
305                    cold_path();
306                    let span = self.new_span(start, end);
307                    self.dcx().err("no valid digits found for number").span(span).emit();
308                    (TokenLitKind::Integer, self.symbol_from_to(start, end))
309                } else {
310                    if matches!(base, Base::Binary | Base::Octal) {
311                        cold_path();
312                        let start = start + 2;
313                        // To uncomment if binary and octal literals are ever supported.
314                        /*
315                        let base = base as u32;
316                        let s = self.str_from_to(start, end);
317                        for (i, c) in s.char_indices() {
318                            if c != '_' && c.to_digit(base).is_none() {
319                                cold_path();
320                                let msg = format!("invalid digit for a base {base} literal");
321                                let lo = start + BytePos::from_usize(i);
322                                let hi = lo + BytePos::from_usize(c.len_utf8());
323                                let span = self.new_span(lo, hi);
324                                self.dcx().err(msg).span(span).emit();
325                            }
326                        }
327                        */
328                        let msg = format!("integers in base {base} are not supported");
329                        self.dcx().err(msg).span(self.new_span(start, end)).emit();
330                    }
331                    (TokenLitKind::Integer, self.symbol_from_to(start, end))
332                }
333            }
334            RawLiteralKind::Rational { base, empty_exponent } => {
335                if empty_exponent {
336                    cold_path();
337                    let span = self.new_span(start, self.pos);
338                    self.dcx().err("expected at least one digit in exponent").span(span).emit();
339                }
340
341                let unsupported_base =
342                    matches!(base, Base::Binary | Base::Octal | Base::Hexadecimal);
343                if unsupported_base {
344                    cold_path();
345                    let msg = format!("{base} rational numbers are not supported");
346                    self.dcx().err(msg).span(self.new_span(start, end)).emit();
347                }
348
349                (TokenLitKind::Rational, self.symbol_from_to(start, end))
350            }
351        }
352    }
353
354    fn cook_quoted(&self, kind: StrKind, start: BytePos, end: BytePos) -> Symbol {
355        // Account for quote (`"` or `'`) and prefix.
356        let content_start = start + 1 + BytePos(kind.prefix().len() as u32);
357        let content_end = end - 1;
358        let lit_content = self.str_from_to(content_start, content_end);
359        Symbol::intern(lit_content)
360    }
361
362    #[inline]
363    fn new_span(&self, lo: BytePos, hi: BytePos) -> Span {
364        Span::new_unchecked(lo, hi)
365    }
366
367    #[inline]
368    fn src_index(&self, pos: BytePos) -> usize {
369        (pos - self.start_pos).to_usize()
370    }
371
372    /// Slice of the source text from `start` up to but excluding `self.pos`,
373    /// meaning the slice does not include the character `self.ch`.
374    #[cfg_attr(debug_assertions, track_caller)]
375    fn symbol_from(&self, start: BytePos) -> Symbol {
376        self.symbol_from_to(start, self.pos)
377    }
378
379    /// Slice of the source text from `start` up to but excluding `self.pos`,
380    /// meaning the slice does not include the character `self.ch`.
381    #[cfg_attr(debug_assertions, track_caller)]
382    fn str_from(&self, start: BytePos) -> &'src str {
383        self.str_from_to(start, self.pos)
384    }
385
386    /// Same as `symbol_from`, with an explicit endpoint.
387    #[cfg_attr(debug_assertions, track_caller)]
388    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
389        Symbol::intern(self.str_from_to(start, end))
390    }
391
392    /// Slice of the source text spanning from `start` until the end.
393    #[cfg_attr(debug_assertions, track_caller)]
394    fn str_from_to_end(&self, start: BytePos) -> &'src str {
395        self.str_from_to(start, BytePos::from_usize(self.src.len()))
396    }
397
398    /// Slice of the source text spanning from `start` up to but excluding `end`.
399    #[cfg_attr(debug_assertions, track_caller)]
400    fn str_from_to(&self, start: BytePos, end: BytePos) -> &'src str {
401        let range = self.src_index(start)..self.src_index(end);
402        if cfg!(debug_assertions) {
403            &self.src[range]
404        } else {
405            // SAFETY: Should never be out of bounds.
406            unsafe { self.src.get_unchecked(range) }
407        }
408    }
409}
410
411impl Iterator for Lexer<'_, '_> {
412    type Item = Token;
413
414    #[inline]
415    fn next(&mut self) -> Option<Token> {
416        let token = self.slop();
417        if token.is_eof() { None } else { Some(token) }
418    }
419}
420
421impl std::iter::FusedIterator for Lexer<'_, '_> {}
422
423/// Pushes a character to a message string for error reporting
424fn escaped_char(c: char) -> String {
425    match c {
426        '\u{20}'..='\u{7e}' => {
427            // Don't escape \, ' or " for user-facing messages
428            c.to_string()
429        }
430        _ => c.escape_default().to_string(),
431    }
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437    use TokenKind::*;
438    use solar_ast::token::BinOpToken::*;
439    use std::ops::Range;
440
441    type Expected<'a> = &'a [(Range<usize>, TokenKind)];
442
443    fn check(src: &str, should_fail: bool, expected: Expected<'_>) {
444        let sess = Session::builder().with_silent_emitter(None).build();
445        let tokens: Vec<_> = Lexer::new(&sess, src)
446            .filter(|t| !t.is_comment())
447            .map(|t| (t.span.lo().to_usize()..t.span.hi().to_usize(), t.kind))
448            .collect();
449        assert_eq!(sess.dcx.has_errors().is_err(), should_fail, "{src:?}");
450        assert_eq!(tokens, expected, "{src:?}");
451    }
452
453    fn checks(tests: &[(&str, Expected<'_>)]) {
454        for &(src, expected) in tests {
455            check(src, false, expected);
456        }
457    }
458
459    fn checks_full(tests: &[(&str, bool, Expected<'_>)]) {
460        for &(src, should_fail, expected) in tests {
461            check(src, should_fail, expected);
462        }
463    }
464
465    fn lit(kind: TokenLitKind, symbol: &str) -> TokenKind {
466        Literal(kind, sym(symbol))
467    }
468
469    fn id(symbol: &str) -> TokenKind {
470        Ident(sym(symbol))
471    }
472
473    fn sym(s: &str) -> Symbol {
474        Symbol::intern(s)
475    }
476
477    #[test]
478    fn empty() {
479        checks(&[
480            ("", &[]),
481            (" ", &[]),
482            (" \n", &[]),
483            ("\n", &[]),
484            ("\n\t", &[]),
485            ("\n \t", &[]),
486            ("\n \t ", &[]),
487            (" \n \t \t", &[]),
488        ]);
489    }
490
491    #[test]
492    fn literals() {
493        use TokenLitKind::*;
494        solar_interface::SessionGlobals::default().set(|| {
495            checks(&[
496                ("\"\"", &[(0..2, lit(Str, ""))]),
497                ("\"\"\"\"", &[(0..2, lit(Str, "")), (2..4, lit(Str, ""))]),
498                ("\"\" \"\"", &[(0..2, lit(Str, "")), (3..5, lit(Str, ""))]),
499                ("\"\\\"\"", &[(0..4, lit(Str, "\\\""))]),
500                ("unicode\"\"", &[(0..9, lit(UnicodeStr, ""))]),
501                ("unicode \"\"", &[(0..7, id("unicode")), (8..10, lit(Str, ""))]),
502                ("hex\"\"", &[(0..5, lit(HexStr, ""))]),
503                ("hex \"\"", &[(0..3, id("hex")), (4..6, lit(Str, ""))]),
504                //
505                ("0", &[(0..1, lit(Integer, "0"))]),
506                ("0a", &[(0..1, lit(Integer, "0")), (1..2, id("a"))]),
507                ("0.e1", &[(0..1, lit(Integer, "0")), (1..2, Dot), (2..4, id("e1"))]),
508                (
509                    "0.e-1",
510                    &[
511                        (0..1, lit(Integer, "0")),
512                        (1..2, Dot),
513                        (2..3, id("e")),
514                        (3..4, BinOp(Minus)),
515                        (4..5, lit(Integer, "1")),
516                    ],
517                ),
518                ("0.0", &[(0..3, lit(Rational, "0.0"))]),
519                ("0.", &[(0..2, lit(Rational, "0."))]),
520                (".0", &[(0..2, lit(Rational, ".0"))]),
521                ("0.0e1", &[(0..5, lit(Rational, "0.0e1"))]),
522                ("0.0e-1", &[(0..6, lit(Rational, "0.0e-1"))]),
523                ("0e1", &[(0..3, lit(Rational, "0e1"))]),
524                ("0e1.", &[(0..3, lit(Rational, "0e1")), (3..4, Dot)]),
525            ]);
526
527            checks_full(&[
528                ("0b0", true, &[(0..3, lit(Integer, "0b0"))]),
529                ("0B0", false, &[(0..1, lit(Integer, "0")), (1..3, id("B0"))]),
530                ("0o0", true, &[(0..3, lit(Integer, "0o0"))]),
531                ("0O0", false, &[(0..1, lit(Integer, "0")), (1..3, id("O0"))]),
532                ("0xa", false, &[(0..3, lit(Integer, "0xa"))]),
533                ("0Xa", false, &[(0..1, lit(Integer, "0")), (1..3, id("Xa"))]),
534            ]);
535        });
536    }
537
538    #[test]
539    fn idents() {
540        solar_interface::SessionGlobals::default().set(|| {
541            checks(&[
542                ("$", &[(0..1, id("$"))]),
543                ("a$", &[(0..2, id("a$"))]),
544                ("a_$123_", &[(0..7, id("a_$123_"))]),
545                ("   b", &[(3..4, id("b"))]),
546                (" c\t ", &[(1..2, id("c"))]),
547                (" \td ", &[(2..3, id("d"))]),
548                (" \t\nef ", &[(3..5, id("ef"))]),
549                (" \t\n\tghi ", &[(4..7, id("ghi"))]),
550            ]);
551        });
552    }
553
554    #[test]
555    fn doc_comments() {
556        use CommentKind::*;
557
558        fn doc(kind: CommentKind, symbol: &str) -> TokenKind {
559            Comment(true, kind, sym(symbol))
560        }
561
562        solar_interface::SessionGlobals::default().set(|| {
563            checks(&[
564                ("// line comment", &[]),
565                ("// / line comment", &[]),
566                ("// ! line comment", &[]),
567                ("// /* line comment", &[]), // */ <-- aaron-bond.better-comments doesn't like this
568                ("/// line doc-comment", &[(0..20, doc(Line, " line doc-comment"))]),
569                ("//// invalid doc-comment", &[]),
570                ("///// invalid doc-comment", &[]),
571                //
572                ("/**/", &[]),
573                ("/***/", &[]),
574                ("/****/", &[]),
575                ("/*/*/", &[]),
576                ("/* /*/", &[]),
577                ("/*/**/", &[]),
578                ("/* /**/", &[]),
579                ("/* normal block comment */", &[]),
580                ("/* /* normal block comment */", &[]),
581                ("/** block doc-comment */", &[(0..24, doc(Block, " block doc-comment "))]),
582                ("/** /* block doc-comment */", &[(0..27, doc(Block, " /* block doc-comment "))]),
583                ("/** block doc-comment /*/", &[(0..25, doc(Block, " block doc-comment /"))]),
584            ]);
585        });
586    }
587
588    #[test]
589    fn operators() {
590        use solar_ast::token::Delimiter::*;
591        // From Solc `TOKEN_LIST`: https://github.com/argotorg/solidity/blob/194b114664c7daebc2ff68af3c573272f5d28913/liblangutil/Token.h#L67
592        checks(&[
593            (")", &[(0..1, CloseDelim(Parenthesis))]),
594            ("(", &[(0..1, OpenDelim(Parenthesis))]),
595            ("[", &[(0..1, OpenDelim(Bracket))]),
596            ("]", &[(0..1, CloseDelim(Bracket))]),
597            ("{", &[(0..1, OpenDelim(Brace))]),
598            ("}", &[(0..1, CloseDelim(Brace))]),
599            (":", &[(0..1, Colon)]),
600            (";", &[(0..1, Semi)]),
601            (".", &[(0..1, Dot)]),
602            ("?", &[(0..1, Question)]),
603            ("=>", &[(0..2, FatArrow)]),
604            ("->", &[(0..2, Arrow)]),
605            ("=", &[(0..1, Eq)]),
606            ("|=", &[(0..2, BinOpEq(Or))]),
607            ("^=", &[(0..2, BinOpEq(Caret))]),
608            ("&=", &[(0..2, BinOpEq(And))]),
609            ("<<=", &[(0..3, BinOpEq(Shl))]),
610            (">>=", &[(0..3, BinOpEq(Shr))]),
611            (">>>=", &[(0..4, BinOpEq(Sar))]),
612            ("+=", &[(0..2, BinOpEq(Plus))]),
613            ("-=", &[(0..2, BinOpEq(Minus))]),
614            ("*=", &[(0..2, BinOpEq(Star))]),
615            ("/=", &[(0..2, BinOpEq(Slash))]),
616            ("%=", &[(0..2, BinOpEq(Percent))]),
617            (",", &[(0..1, Comma)]),
618            ("||", &[(0..2, OrOr)]),
619            ("&&", &[(0..2, AndAnd)]),
620            ("|", &[(0..1, BinOp(Or))]),
621            ("^", &[(0..1, BinOp(Caret))]),
622            ("&", &[(0..1, BinOp(And))]),
623            ("<<", &[(0..2, BinOp(Shl))]),
624            (">>", &[(0..2, BinOp(Shr))]),
625            (">>>", &[(0..3, BinOp(Sar))]),
626            ("+", &[(0..1, BinOp(Plus))]),
627            ("-", &[(0..1, BinOp(Minus))]),
628            ("*", &[(0..1, BinOp(Star))]),
629            ("/", &[(0..1, BinOp(Slash))]),
630            ("%", &[(0..1, BinOp(Percent))]),
631            ("**", &[(0..2, StarStar)]),
632            ("==", &[(0..2, EqEq)]),
633            ("!=", &[(0..2, Ne)]),
634            ("<", &[(0..1, Lt)]),
635            (">", &[(0..1, Gt)]),
636            ("<=", &[(0..2, Le)]),
637            (">=", &[(0..2, Ge)]),
638            ("!", &[(0..1, Not)]),
639            ("~", &[(0..1, Tilde)]),
640            ("++", &[(0..2, PlusPlus)]),
641            ("--", &[(0..2, MinusMinus)]),
642            (":=", &[(0..2, Walrus)]),
643        ]);
644    }
645
646    #[test]
647    fn glueing() {
648        checks(&[
649            ("=", &[(0..1, Eq)]),
650            ("==", &[(0..2, EqEq)]),
651            ("= =", &[(0..1, Eq), (2..3, Eq)]),
652            ("===", &[(0..2, EqEq), (2..3, Eq)]),
653            ("== =", &[(0..2, EqEq), (3..4, Eq)]),
654            ("= ==", &[(0..1, Eq), (2..4, EqEq)]),
655            ("====", &[(0..2, EqEq), (2..4, EqEq)]),
656            ("== ==", &[(0..2, EqEq), (3..5, EqEq)]),
657            ("= ===", &[(0..1, Eq), (2..4, EqEq), (4..5, Eq)]),
658            ("=====", &[(0..2, EqEq), (2..4, EqEq), (4..5, Eq)]),
659            //
660            (" <", &[(1..2, Lt)]),
661            (" <=", &[(1..3, Le)]),
662            (" < =", &[(1..2, Lt), (3..4, Eq)]),
663            (" <<", &[(1..3, BinOp(Shl))]),
664            (" <<=", &[(1..4, BinOpEq(Shl))]),
665            //
666            (" >", &[(1..2, Gt)]),
667            (" >=", &[(1..3, Ge)]),
668            (" > =", &[(1..2, Gt), (3..4, Eq)]),
669            (" >>", &[(1..3, BinOp(Shr))]),
670            (" >>>", &[(1..4, BinOp(Sar))]),
671            (" >>>=", &[(1..5, BinOpEq(Sar))]),
672            //
673            ("+", &[(0..1, BinOp(Plus))]),
674            ("++", &[(0..2, PlusPlus)]),
675            ("+++", &[(0..2, PlusPlus), (2..3, BinOp(Plus))]),
676            ("+ =", &[(0..1, BinOp(Plus)), (2..3, Eq)]),
677            ("+ +=", &[(0..1, BinOp(Plus)), (2..4, BinOpEq(Plus))]),
678            ("+++=", &[(0..2, PlusPlus), (2..4, BinOpEq(Plus))]),
679            ("+ +", &[(0..1, BinOp(Plus)), (2..3, BinOp(Plus))]),
680            //
681            ("-", &[(0..1, BinOp(Minus))]),
682            ("--", &[(0..2, MinusMinus)]),
683            ("---", &[(0..2, MinusMinus), (2..3, BinOp(Minus))]),
684            ("- =", &[(0..1, BinOp(Minus)), (2..3, Eq)]),
685            ("- -=", &[(0..1, BinOp(Minus)), (2..4, BinOpEq(Minus))]),
686            ("---=", &[(0..2, MinusMinus), (2..4, BinOpEq(Minus))]),
687            ("- -", &[(0..1, BinOp(Minus)), (2..3, BinOp(Minus))]),
688        ]);
689    }
690}