zig_lexer/
lib.rs

1#![no_std]
2
3#[derive(Debug, Copy, Clone, Eq, PartialEq)]
4pub enum IntegerBase {
5    Binary,
6    Octal,
7    Decimal,
8    Hexadecimal,
9}
10
11#[derive(Debug, Copy, Clone, Eq, PartialEq)]
12pub enum FloatBase {
13    Decimal,
14    Hexadecimal,
15}
16
17#[derive(Debug, Copy, Clone, Eq, PartialEq)]
18pub enum Token {
19    Invalid,
20    Whitespace,
21    Comment,
22    DocComment,
23    Builtin,
24    Identifier,
25    RawIdentifier {
26        is_unterminated: bool,
27        has_invalid_escape: bool,
28    },
29    IntegerLiteral {
30        base: IntegerBase,
31        is_unterminated: bool,
32        has_invalid_characters: bool,
33        has_duplicate_underscore: bool,
34    },
35    FloatLiteral {
36        base: FloatBase,
37        is_unterminated: bool,
38        has_invalid_characters: bool,
39        has_duplicate_underscore: bool,
40    },
41    StringLiteral {
42        is_unterminated: bool,
43        has_invalid_escape: bool,
44    },
45    CharacterLiteral {
46        is_empty: bool,
47        is_unterminated: bool,
48        has_invalid_escape: bool,
49    },
50    MultilineStringLiteralLine,
51    Bang,              // !
52    BangEqual,         // !=
53    Percent,           // %
54    PercentEqual,      // %=
55    And,               // &
56    And2,              // &&
57    AndEqual,          // &=
58    LParen,            // (
59    RParen,            // )
60    Star,              // *
61    Star2,             // **
62    StarEqual,         // *=
63    StarPercent,       // *%
64    StarPercentEqual,  // *%=
65    Plus,              // +
66    Plus2,             // ++
67    PlusEqual,         // +=
68    PlusPercent,       // +%
69    PlusPercentEqual,  // +%=
70    Comma,             // ,
71    Minus,             // -
72    MinusEqual,        // -=
73    MinusPercent,      // -%
74    MinusPercentEqual, // -%=
75    Dot,               // .
76    Dot2,              // ..
77    Dot3,              // ...
78    DotStar,           // .*
79    Slash,             // /
80    SlashEqual,        // /=
81    Colon,             // :
82    Semicolon,         // ;
83    LAngle,            // <
84    LAngleEqual,       // <=
85    LAngle2,           // <<
86    LAngle2Equal,      // <<=
87    Equal,             // =
88    Equal2,            // ==
89    EqualRAngle,       // =>
90    RAngle,            // >
91    RAngleEqual,       // >=
92    RAngle2,           // >>
93    RAngle2Equal,      // >>=
94    Question,          // ?
95    At,                // @
96    LBracket,          // [
97    RBracket,          // ]
98    Caret,             // ^
99    CaretEqual,        // ^=
100    LBrace,            // {
101    Or,                // |
102    Or2,               // ||
103    OrEqual,           // |=
104    RBrace,            // }
105}
106
107#[derive(PartialEq)]
108enum EscapeKind {
109    RawIdentifier,
110    StringLiteral,
111    CharacterLiteral,
112}
113
114enum State {
115    Start,
116    Invalid,
117    Whitespace,
118    Bang,
119    Percent,
120    And,
121    Star,
122    StarPercent,
123    Plus,
124    PlusPercent,
125    Minus,
126    MinusPercent,
127    Dot,
128    Dot2,
129    Slash,
130    Slash2,
131    LAngle,
132    LAngle2,
133    Equal,
134    RAngle,
135    RAngle2,
136    At,
137    Caret,
138    Or,
139    Comment,
140    DocComment,
141    MultilineStringLiteralLine,
142    Identifier,
143    Builtin,
144    StringLiteral {
145        is_empty: bool,
146        escape_kind: EscapeKind,
147        has_invalid_escape: bool,
148    },
149    StringEscape {
150        is_empty: bool,
151        escape_kind: EscapeKind,
152        has_invalid_escape: bool,
153    },
154    StringEscapeHex1 {
155        escape_kind: EscapeKind,
156        has_invalid_escape: bool,
157    },
158    StringEscapeHex2 {
159        escape_kind: EscapeKind,
160        has_invalid_escape: bool,
161    },
162    StringEscapeUnicode1 {
163        escape_kind: EscapeKind,
164        has_invalid_escape: bool,
165    },
166    StringEscapeUnicode2 {
167        escape_kind: EscapeKind,
168        has_invalid_escape: bool,
169    },
170    StringEscapeUnicode3 {
171        escape_kind: EscapeKind,
172        has_invalid_escape: bool,
173    },
174    Zero,
175    Number {
176        has_invalid_characters: bool,
177        has_duplicate_underscore: bool,
178    },
179    NumberUnderscore {
180        has_invalid_characters: bool,
181        has_duplicate_underscore: bool,
182    },
183    NumberExponent {
184        has_invalid_characters: bool,
185        has_duplicate_underscore: bool,
186    },
187    NumberExponentSign {
188        is_unterminated: bool,
189        has_invalid_characters: bool,
190        has_duplicate_underscore: bool,
191    },
192    NumberExponentSignUnderscore {
193        has_invalid_characters: bool,
194        has_duplicate_underscore: bool,
195    },
196    NumberBinary {
197        is_unterminated: bool,
198        has_invalid_characters: bool,
199        has_duplicate_underscore: bool,
200    },
201    NumberBinaryUnderscore {
202        has_invalid_characters: bool,
203        has_duplicate_underscore: bool,
204    },
205    NumberOctal {
206        is_unterminated: bool,
207        has_invalid_characters: bool,
208        has_duplicate_underscore: bool,
209    },
210    NumberOctalUnderscore {
211        has_invalid_characters: bool,
212        has_duplicate_underscore: bool,
213    },
214    NumberHex {
215        is_unterminated: bool,
216        has_invalid_characters: bool,
217        has_duplicate_underscore: bool,
218    },
219    NumberHexUnderscore {
220        has_invalid_characters: bool,
221        has_duplicate_underscore: bool,
222    },
223    NumberHexDot {
224        is_unterminated: bool,
225        has_invalid_characters: bool,
226        has_duplicate_underscore: bool,
227    },
228    NumberHexDotUnderscore {
229        has_invalid_characters: bool,
230        has_duplicate_underscore: bool,
231    },
232    NumberHexExponent {
233        has_invalid_characters: bool,
234        has_duplicate_underscore: bool,
235    },
236    NumberHexExponentSign {
237        is_unterminated: bool,
238        has_invalid_characters: bool,
239        has_duplicate_underscore: bool,
240    },
241    NumberHexExponentSignUnderscore {
242        has_invalid_characters: bool,
243        has_duplicate_underscore: bool,
244    },
245    NumberDot {
246        is_unterminated: bool,
247        has_invalid_characters: bool,
248        has_duplicate_underscore: bool,
249    },
250    NumberDotUnderscore {
251        has_invalid_characters: bool,
252        has_duplicate_underscore: bool,
253    },
254}
255
256enum Step {
257    Continue(State),
258    Reprocess(State),
259    Backtrack,
260    Abort(Token),
261    End(Token),
262}
263
264fn step(state: State, c: Option<char>) -> Step {
265    match state {
266        State::Start => match c {
267            Some(' ') | Some('\t') | Some('\r') | Some('\n') => Step::Continue(State::Whitespace),
268            Some('!') => Step::Continue(State::Bang),
269            Some('"') => Step::Continue(State::StringLiteral {
270                is_empty: true,
271                escape_kind: EscapeKind::StringLiteral,
272                has_invalid_escape: false,
273            }),
274            Some('%') => Step::Continue(State::Percent),
275            Some('&') => Step::Continue(State::And),
276            Some('\'') => Step::Continue(State::StringLiteral {
277                is_empty: true,
278                escape_kind: EscapeKind::CharacterLiteral,
279                has_invalid_escape: false,
280            }),
281            Some('(') => Step::End(Token::LParen),
282            Some(')') => Step::End(Token::RParen),
283            Some('*') => Step::Continue(State::Star),
284            Some('+') => Step::Continue(State::Plus),
285            Some(',') => Step::End(Token::Comma),
286            Some('-') => Step::Continue(State::Minus),
287            Some('.') => Step::Continue(State::Dot),
288            Some('/') => Step::Continue(State::Slash),
289            Some('0') => Step::Continue(State::Zero),
290            Some('1'..='9') => Step::Continue(State::Number {
291                has_invalid_characters: false,
292                has_duplicate_underscore: false,
293            }),
294            Some(':') => Step::End(Token::Colon),
295            Some(';') => Step::End(Token::Semicolon),
296            Some('<') => Step::Continue(State::LAngle),
297            Some('=') => Step::Continue(State::Equal),
298            Some('>') => Step::Continue(State::RAngle),
299            Some('?') => Step::End(Token::Question),
300            Some('@') => Step::Continue(State::At),
301            Some('A'..='Z') | Some('a'..='z') | Some('_') => Step::Continue(State::Identifier),
302            Some('[') => Step::End(Token::LBracket),
303            Some('\\') => Step::Continue(State::MultilineStringLiteralLine),
304            Some(']') => Step::End(Token::RBracket),
305            Some('^') => Step::Continue(State::Caret),
306            Some('{') => Step::End(Token::LBrace),
307            Some('|') => Step::Continue(State::Or),
308            Some('}') => Step::End(Token::RBrace),
309            Some(_) => Step::Continue(State::Invalid),
310            None => Step::Abort(Token::Invalid),
311        },
312        State::Invalid => match step(State::Start, c) {
313            Step::Continue(State::Invalid) | Step::Abort(Token::Invalid) => match c {
314                Some(_) => Step::Continue(State::Invalid),
315                None => Step::Abort(Token::Invalid),
316            },
317            _ => Step::Abort(Token::Invalid),
318        },
319        State::Whitespace => match c {
320            Some(' ') | Some('\t') | Some('\r') | Some('\n') => Step::Continue(State::Whitespace),
321            Some(_) | None => Step::Abort(Token::Whitespace),
322        },
323        State::Bang => match c {
324            Some('=') => Step::End(Token::BangEqual),
325            Some(_) | None => Step::Abort(Token::Bang),
326        },
327        State::Percent => match c {
328            Some('=') => Step::End(Token::PercentEqual),
329            Some(_) | None => Step::Abort(Token::Percent),
330        },
331        State::And => match c {
332            Some('&') => Step::End(Token::And2),
333            Some('=') => Step::End(Token::AndEqual),
334            Some(_) | None => Step::Abort(Token::And),
335        },
336        State::Star => match c {
337            Some('*') => Step::End(Token::Star2),
338            Some('=') => Step::End(Token::StarEqual),
339            Some('%') => Step::Continue(State::StarPercent),
340            Some(_) | None => Step::Abort(Token::Star),
341        },
342        State::StarPercent => match c {
343            Some('=') => Step::End(Token::StarPercentEqual),
344            Some(_) | None => Step::Abort(Token::StarPercent),
345        },
346        State::Plus => match c {
347            Some('+') => Step::End(Token::Plus2),
348            Some('=') => Step::End(Token::PlusEqual),
349            Some('%') => Step::Continue(State::PlusPercent),
350            Some(_) | None => Step::Abort(Token::Plus),
351        },
352        State::PlusPercent => match c {
353            Some('=') => Step::End(Token::PlusPercentEqual),
354            Some(_) | None => Step::Abort(Token::PlusPercent),
355        },
356        State::Minus => match c {
357            Some('=') => Step::End(Token::MinusEqual),
358            Some('%') => Step::Continue(State::MinusPercent),
359            Some(_) | None => Step::Abort(Token::Minus),
360        },
361        State::MinusPercent => match c {
362            Some('=') => Step::End(Token::MinusPercentEqual),
363            Some(_) | None => Step::Abort(Token::MinusPercent),
364        },
365        State::Dot => match c {
366            Some('.') => Step::Continue(State::Dot2),
367            Some('*') => Step::End(Token::DotStar),
368            Some(_) | None => Step::Abort(Token::Dot),
369        },
370        State::Dot2 => match c {
371            Some('.') => Step::End(Token::Dot3),
372            Some(_) | None => Step::Abort(Token::Dot2),
373        },
374        State::Slash => match c {
375            Some('/') => Step::Continue(State::Slash2),
376            Some('=') => Step::End(Token::SlashEqual),
377            Some(_) | None => Step::Abort(Token::Slash),
378        },
379        State::Slash2 => match c {
380            Some('\n') | None => Step::Abort(Token::Comment),
381            Some('/') => Step::Continue(State::DocComment),
382            Some(_) => Step::Continue(State::Comment),
383        },
384        State::LAngle => match c {
385            Some('=') => Step::End(Token::LAngleEqual),
386            Some('<') => Step::Continue(State::LAngle2),
387            Some(_) | None => Step::Abort(Token::LAngle),
388        },
389        State::LAngle2 => match c {
390            Some('=') => Step::End(Token::LAngle2Equal),
391            Some(_) | None => Step::Abort(Token::LAngle2),
392        },
393        State::Equal => match c {
394            Some('=') => Step::End(Token::Equal2),
395            Some('>') => Step::End(Token::EqualRAngle),
396            Some(_) | None => Step::Abort(Token::Equal),
397        },
398        State::RAngle => match c {
399            Some('=') => Step::End(Token::RAngleEqual),
400            Some('>') => Step::Continue(State::RAngle2),
401            Some(_) | None => Step::Abort(Token::RAngle),
402        },
403        State::RAngle2 => match c {
404            Some('=') => Step::End(Token::RAngle2Equal),
405            Some(_) | None => Step::Abort(Token::RAngle2),
406        },
407        State::At => match c {
408            Some('"') => Step::Continue(State::StringLiteral {
409                escape_kind: EscapeKind::RawIdentifier,
410                has_invalid_escape: false,
411                is_empty: true,
412            }),
413            Some('A'..='Z') | Some('a'..='z') | Some('_') => Step::Continue(State::Builtin),
414            Some(_) | None => Step::Abort(Token::At),
415        },
416        State::Caret => match c {
417            Some('=') => Step::End(Token::CaretEqual),
418            Some(_) | None => Step::Abort(Token::Caret),
419        },
420        State::Or => match c {
421            Some('|') => Step::End(Token::Or2),
422            Some('=') => Step::End(Token::OrEqual),
423            Some(_) | None => Step::Abort(Token::Or),
424        },
425        State::Comment => match c {
426            Some('\n') | None => Step::Abort(Token::Comment),
427            Some(_) => Step::Continue(State::Comment),
428        },
429        State::DocComment => match c {
430            Some('\n') | None => Step::Abort(Token::DocComment),
431            Some(_) => Step::Continue(State::DocComment),
432        },
433        State::MultilineStringLiteralLine => match c {
434            Some('\n') | None => Step::Abort(Token::MultilineStringLiteralLine),
435            Some(_) => Step::Continue(State::MultilineStringLiteralLine),
436        },
437        State::Identifier => match c {
438            Some('0'..='9') | Some('A'..='Z') | Some('a'..='z') | Some('_') => {
439                Step::Continue(State::Identifier)
440            }
441            Some(_) | None => Step::Abort(Token::Identifier),
442        },
443        State::Builtin => match c {
444            Some('0'..='9') | Some('A'..='Z') | Some('a'..='z') | Some('_') => {
445                Step::Continue(State::Builtin)
446            }
447            Some(_) | None => Step::Abort(Token::Builtin),
448        },
449        State::StringLiteral {
450            is_empty,
451            escape_kind,
452            has_invalid_escape,
453        } => match c {
454            Some('\\') => Step::Continue(State::StringEscape {
455                is_empty,
456                escape_kind,
457                has_invalid_escape,
458            }),
459            Some('"') if escape_kind != EscapeKind::CharacterLiteral => match escape_kind {
460                EscapeKind::RawIdentifier => Step::End(Token::RawIdentifier {
461                    is_unterminated: false,
462                    has_invalid_escape,
463                }),
464                EscapeKind::StringLiteral => Step::End(Token::StringLiteral {
465                    is_unterminated: false,
466                    has_invalid_escape,
467                }),
468                EscapeKind::CharacterLiteral => unreachable!(),
469            },
470            Some('\'') if escape_kind == EscapeKind::CharacterLiteral => match escape_kind {
471                EscapeKind::CharacterLiteral => Step::End(Token::CharacterLiteral {
472                    is_empty,
473                    is_unterminated: false,
474                    has_invalid_escape,
475                }),
476                EscapeKind::RawIdentifier | EscapeKind::StringLiteral => unreachable!(),
477            },
478            Some('\n') | None => match escape_kind {
479                EscapeKind::RawIdentifier => Step::Abort(Token::RawIdentifier {
480                    is_unterminated: true,
481                    has_invalid_escape,
482                }),
483                EscapeKind::StringLiteral => Step::Abort(Token::StringLiteral {
484                    is_unterminated: true,
485                    has_invalid_escape,
486                }),
487                EscapeKind::CharacterLiteral => Step::Abort(Token::CharacterLiteral {
488                    is_empty,
489                    is_unterminated: true,
490                    has_invalid_escape,
491                }),
492            },
493            Some(_) => Step::Continue(State::StringLiteral {
494                is_empty: false,
495                escape_kind,
496                has_invalid_escape,
497            }),
498        },
499        State::StringEscape {
500            is_empty,
501            escape_kind,
502            has_invalid_escape,
503        } => match c {
504            Some('n') | Some('r') | Some('\\') | Some('t') | Some('\'') | Some('"') => {
505                Step::Continue(State::StringLiteral {
506                    is_empty: false,
507                    escape_kind,
508                    has_invalid_escape,
509                })
510            }
511            Some('x') => Step::Continue(State::StringEscapeHex1 {
512                escape_kind,
513                has_invalid_escape,
514            }),
515            Some('u') => Step::Continue(State::StringEscapeUnicode1 {
516                escape_kind,
517                has_invalid_escape,
518            }),
519            Some(_) | None => Step::Reprocess(State::StringLiteral {
520                is_empty,
521                escape_kind,
522                has_invalid_escape: true,
523            }),
524        },
525        State::StringEscapeHex1 {
526            escape_kind,
527            has_invalid_escape,
528        } => match c {
529            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
530                Step::Continue(State::StringEscapeHex2 {
531                    escape_kind,
532                    has_invalid_escape,
533                })
534            }
535            Some('G'..='Z') | Some('g'..='z') => Step::Continue(State::StringEscapeHex2 {
536                escape_kind,
537                has_invalid_escape: true,
538            }),
539            Some(_) | None => Step::Reprocess(State::StringLiteral {
540                is_empty: false,
541                escape_kind,
542                has_invalid_escape: true,
543            }),
544        },
545        State::StringEscapeHex2 {
546            escape_kind,
547            has_invalid_escape,
548        } => match c {
549            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
550                Step::Continue(State::StringLiteral {
551                    is_empty: false,
552                    escape_kind,
553                    has_invalid_escape,
554                })
555            }
556            Some('G'..='Z') | Some('g'..='z') => Step::Continue(State::StringLiteral {
557                is_empty: false,
558                escape_kind,
559                has_invalid_escape: true,
560            }),
561            Some(_) | None => Step::Reprocess(State::StringLiteral {
562                is_empty: false,
563                escape_kind,
564                has_invalid_escape: true,
565            }),
566        },
567        State::StringEscapeUnicode1 {
568            escape_kind,
569            has_invalid_escape,
570        } => match c {
571            Some('{') => Step::Continue(State::StringEscapeUnicode2 {
572                escape_kind,
573                has_invalid_escape,
574            }),
575            Some(_) | None => Step::Reprocess(State::StringLiteral {
576                is_empty: false,
577                escape_kind,
578                has_invalid_escape: true,
579            }),
580        },
581        State::StringEscapeUnicode2 {
582            escape_kind,
583            has_invalid_escape,
584        } => match c {
585            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
586                Step::Continue(State::StringEscapeUnicode3 {
587                    escape_kind,
588                    has_invalid_escape,
589                })
590            }
591            Some(_) | None => Step::Reprocess(State::StringLiteral {
592                is_empty: false,
593                escape_kind,
594                has_invalid_escape: true,
595            }),
596        },
597        State::StringEscapeUnicode3 {
598            escape_kind,
599            has_invalid_escape,
600        } => match c {
601            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
602                Step::Continue(State::StringEscapeUnicode3 {
603                    escape_kind,
604                    has_invalid_escape,
605                })
606            }
607            Some('}') => Step::Continue(State::StringLiteral {
608                is_empty: false,
609                escape_kind,
610                has_invalid_escape,
611            }),
612            Some(_) | None => Step::Reprocess(State::StringLiteral {
613                is_empty: false,
614                escape_kind,
615                has_invalid_escape: true,
616            }),
617        },
618        State::Zero => match c {
619            Some('b') => Step::Continue(State::NumberBinary {
620                is_unterminated: true,
621                has_invalid_characters: false,
622                has_duplicate_underscore: false,
623            }),
624            Some('o') => Step::Continue(State::NumberOctal {
625                is_unterminated: true,
626                has_invalid_characters: false,
627                has_duplicate_underscore: false,
628            }),
629            Some('x') => Step::Continue(State::NumberHex {
630                is_unterminated: true,
631                has_invalid_characters: false,
632                has_duplicate_underscore: false,
633            }),
634            Some(_) | None => Step::Reprocess(State::Number {
635                has_invalid_characters: false,
636                has_duplicate_underscore: false,
637            }),
638        },
639        State::Number {
640            has_invalid_characters,
641            has_duplicate_underscore,
642        } => match c {
643            Some('0'..='9') => Step::Continue(State::Number {
644                has_invalid_characters,
645                has_duplicate_underscore,
646            }),
647            Some('A'..='D') | Some('a'..='d') | Some('F'..='O') | Some('f'..='o')
648            | Some('Q'..='Z') | Some('q'..='z') => Step::Continue(State::Number {
649                has_invalid_characters: true,
650                has_duplicate_underscore,
651            }),
652            Some('_') => Step::Continue(State::NumberUnderscore {
653                has_invalid_characters,
654                has_duplicate_underscore,
655            }),
656            Some('.') => Step::Continue(State::NumberDot {
657                is_unterminated: true,
658                has_invalid_characters,
659                has_duplicate_underscore,
660            }),
661            Some('e') | Some('E') => Step::Continue(State::NumberExponent {
662                has_invalid_characters,
663                has_duplicate_underscore,
664            }),
665            Some('p') | Some('P') => Step::Continue(State::NumberHexExponent {
666                has_invalid_characters,
667                has_duplicate_underscore,
668            }),
669            Some(_) | None => Step::Abort(Token::IntegerLiteral {
670                base: IntegerBase::Decimal,
671                is_unterminated: false,
672                has_invalid_characters,
673                has_duplicate_underscore,
674            }),
675        },
676        State::NumberUnderscore {
677            has_invalid_characters,
678            has_duplicate_underscore,
679        } => match c {
680            Some('0'..='9') => Step::Continue(State::Number {
681                has_invalid_characters,
682                has_duplicate_underscore,
683            }),
684            Some('A'..='Z') | Some('a'..='z') => Step::Continue(State::Number {
685                has_invalid_characters: true,
686                has_duplicate_underscore,
687            }),
688            Some('_') => Step::Continue(State::Number {
689                has_invalid_characters,
690                has_duplicate_underscore: true,
691            }),
692            Some(_) | None => Step::Abort(Token::IntegerLiteral {
693                base: IntegerBase::Decimal,
694                is_unterminated: true,
695                has_invalid_characters,
696                has_duplicate_underscore,
697            }),
698        },
699        State::NumberExponent {
700            has_invalid_characters,
701            has_duplicate_underscore,
702        } => match c {
703            Some('+') | Some('-') => Step::Continue(State::NumberExponentSign {
704                is_unterminated: true,
705                has_invalid_characters,
706                has_duplicate_underscore,
707            }),
708            Some(_) | None => Step::Reprocess(State::NumberExponentSign {
709                is_unterminated: true,
710                has_invalid_characters,
711                has_duplicate_underscore,
712            }),
713        },
714        State::NumberExponentSign {
715            is_unterminated,
716            has_invalid_characters,
717            has_duplicate_underscore,
718        } => match c {
719            Some('0'..='9') => Step::Continue(State::NumberExponentSign {
720                is_unterminated: false,
721                has_invalid_characters,
722                has_duplicate_underscore,
723            }),
724            Some('A'..='Z') | Some('a'..='z') => Step::Continue(State::NumberExponentSign {
725                is_unterminated: false,
726                has_invalid_characters: true,
727                has_duplicate_underscore,
728            }),
729            Some('_') => Step::Continue(State::NumberExponentSignUnderscore {
730                has_invalid_characters,
731                has_duplicate_underscore,
732            }),
733            Some(_) | None => Step::Abort(Token::FloatLiteral {
734                base: FloatBase::Decimal,
735                is_unterminated,
736                has_invalid_characters,
737                has_duplicate_underscore,
738            }),
739        },
740        State::NumberExponentSignUnderscore {
741            has_invalid_characters,
742            has_duplicate_underscore,
743        } => match c {
744            Some('0'..='9') => Step::Continue(State::NumberExponentSign {
745                is_unterminated: false,
746                has_invalid_characters,
747                has_duplicate_underscore,
748            }),
749            Some('A'..='Z') | Some('a'..='z') => Step::Continue(State::NumberExponentSign {
750                is_unterminated: false,
751                has_invalid_characters: true,
752                has_duplicate_underscore,
753            }),
754            Some('_') => Step::Continue(State::NumberExponentSign {
755                is_unterminated: false,
756                has_invalid_characters,
757                has_duplicate_underscore: true,
758            }),
759            Some(_) | None => Step::Abort(Token::FloatLiteral {
760                base: FloatBase::Decimal,
761                is_unterminated: true,
762                has_invalid_characters,
763                has_duplicate_underscore,
764            }),
765        },
766        State::NumberBinary {
767            is_unterminated,
768            has_invalid_characters,
769            has_duplicate_underscore,
770        } => match c {
771            Some('0'..='1') => Step::Continue(State::NumberBinary {
772                is_unterminated: false,
773                has_invalid_characters,
774                has_duplicate_underscore,
775            }),
776            Some('2'..='9') | Some('A'..='Z') | Some('a'..='z') => {
777                Step::Continue(State::NumberBinary {
778                    is_unterminated: false,
779                    has_invalid_characters: true,
780                    has_duplicate_underscore,
781                })
782            }
783            Some('_') => Step::Continue(State::NumberBinaryUnderscore {
784                has_invalid_characters,
785                has_duplicate_underscore,
786            }),
787            Some(_) | None => Step::Abort(Token::IntegerLiteral {
788                base: IntegerBase::Binary,
789                is_unterminated,
790                has_invalid_characters,
791                has_duplicate_underscore,
792            }),
793        },
794        State::NumberBinaryUnderscore {
795            has_invalid_characters,
796            has_duplicate_underscore,
797        } => match c {
798            Some('0'..='1') => Step::Continue(State::NumberBinary {
799                is_unterminated: false,
800                has_invalid_characters,
801                has_duplicate_underscore,
802            }),
803            Some('2'..='9') | Some('A'..='Z') | Some('a'..='z') => {
804                Step::Continue(State::NumberBinary {
805                    is_unterminated: false,
806                    has_invalid_characters: true,
807                    has_duplicate_underscore,
808                })
809            }
810            Some('_') => Step::Continue(State::NumberBinary {
811                is_unterminated: true,
812                has_invalid_characters,
813                has_duplicate_underscore: true,
814            }),
815            Some(_) | None => Step::Abort(Token::IntegerLiteral {
816                base: IntegerBase::Binary,
817                is_unterminated: true,
818                has_invalid_characters,
819                has_duplicate_underscore,
820            }),
821        },
822        State::NumberOctal {
823            is_unterminated,
824            has_invalid_characters,
825            has_duplicate_underscore,
826        } => match c {
827            Some('0'..='7') => Step::Continue(State::NumberOctal {
828                is_unterminated: false,
829                has_invalid_characters,
830                has_duplicate_underscore,
831            }),
832            Some('2'..='9') | Some('A'..='Z') | Some('a'..='z') => {
833                Step::Continue(State::NumberOctal {
834                    is_unterminated: false,
835                    has_invalid_characters: true,
836                    has_duplicate_underscore,
837                })
838            }
839            Some('_') => Step::Continue(State::NumberOctalUnderscore {
840                has_invalid_characters,
841                has_duplicate_underscore,
842            }),
843            Some(_) | None => Step::Abort(Token::IntegerLiteral {
844                base: IntegerBase::Octal,
845                is_unterminated,
846                has_invalid_characters,
847                has_duplicate_underscore,
848            }),
849        },
850        State::NumberOctalUnderscore {
851            has_invalid_characters,
852            has_duplicate_underscore,
853        } => match c {
854            Some('0'..='7') => Step::Continue(State::NumberOctal {
855                is_unterminated: false,
856                has_invalid_characters,
857                has_duplicate_underscore,
858            }),
859            Some('2'..='9') | Some('A'..='Z') | Some('a'..='z') => {
860                Step::Continue(State::NumberOctal {
861                    is_unterminated: false,
862                    has_invalid_characters: true,
863                    has_duplicate_underscore,
864                })
865            }
866            Some('_') => Step::Continue(State::NumberOctal {
867                is_unterminated: true,
868                has_invalid_characters,
869                has_duplicate_underscore: true,
870            }),
871            Some(_) | None => Step::Abort(Token::IntegerLiteral {
872                base: IntegerBase::Octal,
873                is_unterminated: true,
874                has_invalid_characters,
875                has_duplicate_underscore,
876            }),
877        },
878        State::NumberHex {
879            is_unterminated,
880            has_invalid_characters,
881            has_duplicate_underscore,
882        } => match c {
883            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
884                Step::Continue(State::NumberHex {
885                    is_unterminated: false,
886                    has_invalid_characters,
887                    has_duplicate_underscore,
888                })
889            }
890            Some('G'..='O') | Some('g'..='o') | Some('Q'..='Z') | Some('q'..='z') => {
891                Step::Continue(State::NumberHex {
892                    is_unterminated: false,
893                    has_invalid_characters: true,
894                    has_duplicate_underscore,
895                })
896            }
897            Some('_') => Step::Continue(State::NumberHexUnderscore {
898                has_invalid_characters,
899                has_duplicate_underscore,
900            }),
901            Some('.') => Step::Continue(State::NumberHexDot {
902                is_unterminated: true,
903                has_invalid_characters,
904                has_duplicate_underscore,
905            }),
906            Some('p') | Some('P') => Step::Continue(State::NumberHexExponent {
907                has_invalid_characters,
908                has_duplicate_underscore,
909            }),
910            Some(_) | None => Step::Abort(Token::IntegerLiteral {
911                base: IntegerBase::Hexadecimal,
912                is_unterminated,
913                has_invalid_characters,
914                has_duplicate_underscore,
915            }),
916        },
917        State::NumberHexUnderscore {
918            has_invalid_characters,
919            has_duplicate_underscore,
920        } => match c {
921            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
922                Step::Continue(State::NumberHex {
923                    is_unterminated: false,
924                    has_invalid_characters,
925                    has_duplicate_underscore,
926                })
927            }
928            Some('G'..='Z') | Some('g'..='z') => Step::Continue(State::NumberHex {
929                is_unterminated: false,
930                has_invalid_characters: true,
931                has_duplicate_underscore,
932            }),
933            Some('_') => Step::Continue(State::NumberHex {
934                is_unterminated: false,
935                has_invalid_characters,
936                has_duplicate_underscore: false,
937            }),
938            Some(_) | None => Step::Abort(Token::IntegerLiteral {
939                base: IntegerBase::Hexadecimal,
940                is_unterminated: true,
941                has_invalid_characters,
942                has_duplicate_underscore,
943            }),
944        },
945        State::NumberHexDot {
946            is_unterminated,
947            has_invalid_characters,
948            has_duplicate_underscore,
949        } => match c {
950            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
951                Step::Continue(State::NumberHexDot {
952                    is_unterminated: false,
953                    has_invalid_characters,
954                    has_duplicate_underscore,
955                })
956            }
957            Some('_') => Step::Continue(State::NumberHexDotUnderscore {
958                has_invalid_characters,
959                has_duplicate_underscore,
960            }),
961            Some('.') => Step::Backtrack,
962            Some('p') | Some('P') => Step::Continue(State::NumberHexExponent {
963                has_invalid_characters,
964                has_duplicate_underscore,
965            }),
966            Some(_) | None => Step::Abort(Token::FloatLiteral {
967                base: FloatBase::Hexadecimal,
968                is_unterminated,
969                has_invalid_characters,
970                has_duplicate_underscore,
971            }),
972        },
973        State::NumberHexDotUnderscore {
974            has_invalid_characters,
975            has_duplicate_underscore,
976        } => match c {
977            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
978                Step::Continue(State::NumberHexDot {
979                    is_unterminated: false,
980                    has_invalid_characters,
981                    has_duplicate_underscore,
982                })
983            }
984            Some('_') => Step::Continue(State::NumberHexDot {
985                is_unterminated: false,
986                has_invalid_characters,
987                has_duplicate_underscore: true,
988            }),
989            Some(_) | None => Step::Abort(Token::FloatLiteral {
990                base: FloatBase::Hexadecimal,
991                is_unterminated: true,
992                has_invalid_characters,
993                has_duplicate_underscore,
994            }),
995        },
996        State::NumberHexExponent {
997            has_invalid_characters,
998            has_duplicate_underscore,
999        } => match c {
1000            Some('+') | Some('-') => Step::Continue(State::NumberHexExponentSign {
1001                is_unterminated: true,
1002                has_invalid_characters,
1003                has_duplicate_underscore,
1004            }),
1005            Some(_) | None => Step::Reprocess(State::NumberHexExponentSign {
1006                is_unterminated: true,
1007                has_invalid_characters,
1008                has_duplicate_underscore,
1009            }),
1010        },
1011        State::NumberHexExponentSign {
1012            is_unterminated,
1013            has_invalid_characters,
1014            has_duplicate_underscore,
1015        } => match c {
1016            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
1017                Step::Continue(State::NumberHexExponentSign {
1018                    is_unterminated: false,
1019                    has_invalid_characters,
1020                    has_duplicate_underscore,
1021                })
1022            }
1023            Some('_') => Step::Continue(State::NumberHexExponentSignUnderscore {
1024                has_invalid_characters,
1025                has_duplicate_underscore,
1026            }),
1027            Some(_) | None => Step::Abort(Token::FloatLiteral {
1028                base: FloatBase::Hexadecimal,
1029                is_unterminated,
1030                has_invalid_characters,
1031                has_duplicate_underscore,
1032            }),
1033        },
1034        State::NumberHexExponentSignUnderscore {
1035            has_invalid_characters,
1036            has_duplicate_underscore,
1037        } => match c {
1038            Some('0'..='9') | Some('A'..='F') | Some('a'..='f') => {
1039                Step::Continue(State::NumberHexExponentSign {
1040                    is_unterminated: false,
1041                    has_invalid_characters,
1042                    has_duplicate_underscore,
1043                })
1044            }
1045            Some('_') => Step::Continue(State::NumberHexExponentSign {
1046                is_unterminated: false,
1047                has_invalid_characters,
1048                has_duplicate_underscore: true,
1049            }),
1050            Some(_) | None => Step::Abort(Token::FloatLiteral {
1051                base: FloatBase::Hexadecimal,
1052                is_unterminated: true,
1053                has_invalid_characters,
1054                has_duplicate_underscore,
1055            }),
1056        },
1057        State::NumberDot {
1058            is_unterminated,
1059            has_invalid_characters,
1060            has_duplicate_underscore,
1061        } => match c {
1062            Some('0'..='9') => Step::Continue(State::NumberDot {
1063                is_unterminated: false,
1064                has_invalid_characters,
1065                has_duplicate_underscore,
1066            }),
1067            Some('_') => Step::Continue(State::NumberDotUnderscore {
1068                has_invalid_characters,
1069                has_duplicate_underscore,
1070            }),
1071            Some('.') => Step::Backtrack,
1072            Some('e') | Some('E') => Step::Continue(State::NumberExponent {
1073                has_invalid_characters,
1074                has_duplicate_underscore,
1075            }),
1076            Some(_) | None => Step::Abort(Token::FloatLiteral {
1077                base: FloatBase::Decimal,
1078                is_unterminated,
1079                has_invalid_characters,
1080                has_duplicate_underscore,
1081            }),
1082        },
1083        State::NumberDotUnderscore {
1084            has_invalid_characters,
1085            has_duplicate_underscore,
1086        } => match c {
1087            Some('0'..='9') => Step::Continue(State::NumberDot {
1088                is_unterminated: false,
1089                has_invalid_characters,
1090                has_duplicate_underscore,
1091            }),
1092            Some('A'..='Z') | Some('a'..='z') => Step::Continue(State::NumberDot {
1093                is_unterminated: false,
1094                has_invalid_characters: true,
1095                has_duplicate_underscore,
1096            }),
1097            Some('_') => Step::Continue(State::NumberDot {
1098                is_unterminated: true,
1099                has_invalid_characters,
1100                has_duplicate_underscore: true,
1101            }),
1102            Some(_) | None => Step::Abort(Token::FloatLiteral {
1103                base: FloatBase::Decimal,
1104                is_unterminated: true,
1105                has_invalid_characters,
1106                has_duplicate_underscore,
1107            }),
1108        },
1109    }
1110}
1111
1112pub fn lex(s: &str) -> (Token, usize) {
1113    let mut state = State::Start;
1114    let mut iter = s.chars();
1115    let (mut cur_pos, mut last_pos) = (0, None);
1116    'outer: while let Some(c) = iter.next() {
1117        'inner: loop {
1118            match step(state, Some(c)) {
1119                Step::Continue(new_state) => {
1120                    last_pos = Some(cur_pos);
1121                    cur_pos += c.len_utf8();
1122                    state = new_state;
1123                    continue 'outer;
1124                }
1125                Step::Reprocess(new_state) => {
1126                    state = new_state;
1127                    continue 'inner;
1128                }
1129                Step::Backtrack => {
1130                    return lex(&s[..last_pos.unwrap().into()]);
1131                }
1132                Step::Abort(kind) => {
1133                    return (kind, cur_pos);
1134                }
1135                Step::End(kind) => {
1136                    cur_pos += c.len_utf8();
1137                    return (kind, cur_pos);
1138                }
1139            }
1140        }
1141    }
1142    loop {
1143        match step(state, None) {
1144            Step::Reprocess(new_state) => {
1145                state = new_state;
1146                continue;
1147            }
1148            Step::Backtrack => {
1149                return lex(&s[..last_pos.unwrap().into()]);
1150            }
1151            Step::Abort(kind) => {
1152                return (kind, cur_pos);
1153            }
1154            Step::Continue(_) | Step::End(_) => unreachable!(),
1155        }
1156    }
1157}
1158
1159#[cfg(test)]
1160mod test {
1161    use super::*;
1162
1163    #[test]
1164    fn test_invalid() {
1165        assert_eq!(lex(""), (Token::Invalid, 0));
1166        assert_eq!(lex("$"), (Token::Invalid, 1));
1167        assert_eq!(lex("$$"), (Token::Invalid, 2));
1168        assert_eq!(lex("$0"), (Token::Invalid, 1));
1169    }
1170
1171    #[test]
1172    fn test_whitespace() {
1173        assert_eq!(lex(" "), (Token::Whitespace, 1));
1174        assert_eq!(lex(" \t\r\n"), (Token::Whitespace, 4));
1175    }
1176
1177    #[test]
1178    fn test_comment() {
1179        assert_eq!(lex("// \n"), (Token::Comment, 3));
1180        assert_eq!(lex("//hello\n"), (Token::Comment, 7));
1181        assert_eq!(lex("// hello\n"), (Token::Comment, 8));
1182    }
1183
1184    #[test]
1185    fn test_doc_comment() {
1186        assert_eq!(lex("/// \n"), (Token::DocComment, 4));
1187        assert_eq!(lex("///hello\n"), (Token::DocComment, 8));
1188        assert_eq!(lex("/// hello\n"), (Token::DocComment, 9));
1189    }
1190
1191    #[test]
1192    fn test_builtin() {
1193        assert_eq!(lex("@hello"), (Token::Builtin, 6));
1194        assert_eq!(lex("@hello "), (Token::Builtin, 6));
1195    }
1196
1197    #[test]
1198    fn test_identifier() {
1199        fn raw(is_unterminated: bool, has_invalid_escape: bool) -> Token {
1200            Token::RawIdentifier {
1201                is_unterminated,
1202                has_invalid_escape,
1203            }
1204        }
1205        assert_eq!(lex("foo"), (Token::Identifier, 3));
1206        assert_eq!(lex("foo1"), (Token::Identifier, 4));
1207        assert_eq!(lex("_foo1"), (Token::Identifier, 5));
1208        assert_eq!(lex(r#"@"foo bar""#), (raw(false, false), 10));
1209        assert_eq!(lex(r#"@"foo bar\x00""#), (raw(false, false), 14));
1210        assert_eq!(lex(r#"@"foo bar\x0""#), (raw(false, true), 13));
1211        assert_eq!(lex(r#"@"foo bar"#), (raw(true, false), 9));
1212    }
1213
1214    #[test]
1215    fn test_integer_literal() {
1216        fn lit(
1217            base: IntegerBase,
1218            is_unterminated: bool,
1219            has_invalid_characters: bool,
1220            has_duplicate_underscore: bool,
1221        ) -> Token {
1222            Token::IntegerLiteral {
1223                base,
1224                is_unterminated,
1225                has_invalid_characters,
1226                has_duplicate_underscore,
1227            }
1228        }
1229        use IntegerBase::{Binary as Bin, Decimal as Dec, Hexadecimal as Hex, Octal as Oct};
1230        assert_eq!(lex("100"), (lit(Dec, false, false, false), 3));
1231        assert_eq!(lex("001"), (lit(Dec, false, false, false), 3));
1232        assert_eq!(lex("100_000"), (lit(Dec, false, false, false), 7));
1233        assert_eq!(lex("0b"), (lit(Bin, true, false, false), 2));
1234        assert_eq!(lex("0b1"), (lit(Bin, false, false, false), 3));
1235        assert_eq!(lex("0b1_"), (lit(Bin, true, false, false), 4));
1236        assert_eq!(lex("0b1_0"), (lit(Bin, false, false, false), 5));
1237        assert_eq!(lex("0b12"), (lit(Bin, false, true, false), 4));
1238        assert_eq!(lex("0o"), (lit(Oct, true, false, false), 2));
1239        assert_eq!(lex("0x"), (lit(Hex, true, false, false), 2));
1240        assert_eq!(lex("1_"), (lit(Dec, true, false, false), 2));
1241    }
1242
1243    #[test]
1244    fn test_float_literal() {
1245        fn lit(
1246            base: FloatBase,
1247            is_unterminated: bool,
1248            has_invalid_characters: bool,
1249            has_duplicate_underscore: bool,
1250        ) -> Token {
1251            Token::FloatLiteral {
1252                base,
1253                is_unterminated,
1254                has_invalid_characters,
1255                has_duplicate_underscore,
1256            }
1257        }
1258        use FloatBase::{Decimal as Dec, Hexadecimal as Hex};
1259        assert_eq!(lex("1."), (lit(Dec, true, false, false), 2));
1260        assert_eq!(lex("1._"), (lit(Dec, true, false, false), 3));
1261        assert_eq!(lex("1.__"), (lit(Dec, true, false, true), 4));
1262        assert_eq!(lex("1.5"), (lit(Dec, false, false, false), 3));
1263        assert_eq!(lex("1.5f"), (lit(Dec, false, false, false), 3));
1264        assert_eq!(lex("1.5_"), (lit(Dec, true, false, false), 4));
1265        assert_eq!(lex("0x1.f"), (lit(Hex, false, false, false), 5));
1266        assert_eq!(lex("0x1f.5f"), (lit(Hex, false, false, false), 7));
1267        assert_eq!(lex("1p1"), (lit(Hex, false, false, false), 3));
1268        assert_eq!(lex("1P1"), (lit(Hex, false, false, false), 3));
1269        assert_eq!(lex("1p+1"), (lit(Hex, false, false, false), 4));
1270        assert_eq!(lex("1p-1"), (lit(Hex, false, false, false), 4));
1271        assert_eq!(lex("1p1_2"), (lit(Hex, false, false, false), 5));
1272        assert_eq!(lex("1p+1_2"), (lit(Hex, false, false, false), 6));
1273        assert_eq!(lex("1p-1_2"), (lit(Hex, false, false, false), 6));
1274        assert_eq!(lex("1p1_2_"), (lit(Hex, true, false, false), 6));
1275        assert_eq!(lex("1p+1_2_"), (lit(Hex, true, false, false), 7));
1276        assert_eq!(lex("1p-1_2_"), (lit(Hex, true, false, false), 7));
1277        assert_eq!(lex("1E1"), (lit(Dec, false, false, false), 3));
1278        assert_eq!(lex("1e1"), (lit(Dec, false, false, false), 3));
1279        assert_eq!(lex("1e+1"), (lit(Dec, false, false, false), 4));
1280        assert_eq!(lex("1e-1"), (lit(Dec, false, false, false), 4));
1281        assert_eq!(lex("0x1p27"), (lit(Hex, false, false, false), 6));
1282        assert_eq!(lex("0x1.p+64"), (lit(Hex, false, false, false), 8));
1283        assert_eq!(lex("1e-7"), (lit(Dec, false, false, false), 4));
1284    }
1285
1286    #[test]
1287    fn test_string_literal() {
1288        fn lit(is_unterminated: bool, has_invalid_escape: bool) -> Token {
1289            Token::StringLiteral {
1290                is_unterminated,
1291                has_invalid_escape,
1292            }
1293        }
1294        assert_eq!(lex(r#""foo bar""#), (lit(false, false), 9));
1295        assert_eq!(lex(r#""foo bar\x00""#), (lit(false, false), 13));
1296        assert_eq!(lex(r#""foo bar\x0""#), (lit(false, true), 12));
1297        assert_eq!(lex(r#""foo bar"#), (lit(true, false), 8));
1298        assert_eq!(lex(r#""foo bar\\\"\'\r\n\t""#), (lit(false, false), 21));
1299        assert_eq!(lex(r#""foo bar\xFF""#), (lit(false, false), 13));
1300        assert_eq!(lex(r#""foo bar\xFG""#), (lit(false, true), 13));
1301        assert_eq!(lex(r#""foo bar\xGF""#), (lit(false, true), 13));
1302        assert_eq!(lex(r#""foo bar\xGG""#), (lit(false, true), 13));
1303        assert_eq!(lex(r#""foo bar\u{""#), (lit(false, true), 12));
1304        assert_eq!(lex(r#""foo bar\u{1""#), (lit(false, true), 13));
1305        assert_eq!(lex(r#""foo bar\u{1F""#), (lit(false, true), 14));
1306        assert_eq!(lex(r#""foo bar\u{1G""#), (lit(false, true), 14));
1307        assert_eq!(lex(r#""foo bar\u{1G}""#), (lit(false, true), 15));
1308        assert_eq!(lex(r#""foo bar\u{1F}""#), (lit(false, false), 15));
1309    }
1310
1311    #[test]
1312    fn test_character_literal() {
1313        fn lit(is_empty: bool, is_unterminated: bool, has_invalid_escape: bool) -> Token {
1314            Token::CharacterLiteral {
1315                is_empty,
1316                is_unterminated,
1317                has_invalid_escape,
1318            }
1319        }
1320        assert_eq!(lex("''"), (lit(true, false, false), 2));
1321        assert_eq!(lex("'a"), (lit(false, true, false), 2));
1322        assert_eq!(lex("'\\x0'"), (lit(false, false, true), 5));
1323        assert_eq!(lex("'\\xzz'"), (lit(false, false, true), 6));
1324        assert_eq!(lex("'a'"), (lit(false, false, false), 3));
1325    }
1326
1327    #[test]
1328    fn test_multiline_string_literal() {
1329        assert_eq!(lex("\\\\"), (Token::MultilineStringLiteralLine, 2));
1330        assert_eq!(lex("\\\\hello"), (Token::MultilineStringLiteralLine, 7));
1331        assert_eq!(lex("\\\\ hello\n"), (Token::MultilineStringLiteralLine, 8));
1332    }
1333
1334    #[test]
1335    fn test_punctuation_1() {
1336        assert_eq!(lex("!"), (Token::Bang, 1));
1337        assert_eq!(lex("%"), (Token::Percent, 1));
1338        assert_eq!(lex("&"), (Token::And, 1));
1339        assert_eq!(lex("("), (Token::LParen, 1));
1340        assert_eq!(lex(")"), (Token::RParen, 1));
1341        assert_eq!(lex("*"), (Token::Star, 1));
1342        assert_eq!(lex("+"), (Token::Plus, 1));
1343        assert_eq!(lex(","), (Token::Comma, 1));
1344        assert_eq!(lex("-"), (Token::Minus, 1));
1345        assert_eq!(lex("."), (Token::Dot, 1));
1346        assert_eq!(lex("/"), (Token::Slash, 1));
1347        assert_eq!(lex(":"), (Token::Colon, 1));
1348        assert_eq!(lex(";"), (Token::Semicolon, 1));
1349        assert_eq!(lex("<"), (Token::LAngle, 1));
1350        assert_eq!(lex("="), (Token::Equal, 1));
1351        assert_eq!(lex(">"), (Token::RAngle, 1));
1352        assert_eq!(lex("?"), (Token::Question, 1));
1353        assert_eq!(lex("@"), (Token::At, 1));
1354        assert_eq!(lex("["), (Token::LBracket, 1));
1355        assert_eq!(lex("]"), (Token::RBracket, 1));
1356        assert_eq!(lex("^"), (Token::Caret, 1));
1357        assert_eq!(lex("{"), (Token::LBrace, 1));
1358        assert_eq!(lex("|"), (Token::Or, 1));
1359        assert_eq!(lex("}"), (Token::RBrace, 1));
1360    }
1361
1362    #[test]
1363    fn test_punctuation_2() {
1364        assert_eq!(lex("!="), (Token::BangEqual, 2));
1365        assert_eq!(lex("%="), (Token::PercentEqual, 2));
1366        assert_eq!(lex("&&"), (Token::And2, 2));
1367        assert_eq!(lex("&="), (Token::AndEqual, 2));
1368        assert_eq!(lex("**"), (Token::Star2, 2));
1369        assert_eq!(lex("*="), (Token::StarEqual, 2));
1370        assert_eq!(lex("*%"), (Token::StarPercent, 2));
1371        assert_eq!(lex("++"), (Token::Plus2, 2));
1372        assert_eq!(lex("+="), (Token::PlusEqual, 2));
1373        assert_eq!(lex("+%"), (Token::PlusPercent, 2));
1374        assert_eq!(lex("-="), (Token::MinusEqual, 2));
1375        assert_eq!(lex("-%"), (Token::MinusPercent, 2));
1376        assert_eq!(lex(".."), (Token::Dot2, 2));
1377        assert_eq!(lex(".*"), (Token::DotStar, 2));
1378        assert_eq!(lex("/="), (Token::SlashEqual, 2));
1379        assert_eq!(lex("<="), (Token::LAngleEqual, 2));
1380        assert_eq!(lex("<<"), (Token::LAngle2, 2));
1381        assert_eq!(lex("=="), (Token::Equal2, 2));
1382        assert_eq!(lex("=>"), (Token::EqualRAngle, 2));
1383        assert_eq!(lex(">="), (Token::RAngleEqual, 2));
1384        assert_eq!(lex(">>"), (Token::RAngle2, 2));
1385        assert_eq!(lex("^="), (Token::CaretEqual, 2));
1386        assert_eq!(lex("||"), (Token::Or2, 2));
1387        assert_eq!(lex("|="), (Token::OrEqual, 2));
1388    }
1389
1390    #[test]
1391    fn test_punctuation_3() {
1392        assert_eq!(lex("*%="), (Token::StarPercentEqual, 3));
1393        assert_eq!(lex("+%="), (Token::PlusPercentEqual, 3));
1394        assert_eq!(lex("-%="), (Token::MinusPercentEqual, 3));
1395        assert_eq!(lex("..."), (Token::Dot3, 3));
1396        assert_eq!(lex("<<="), (Token::LAngle2Equal, 3));
1397        assert_eq!(lex(">>="), (Token::RAngle2Equal, 3));
1398    }
1399}