mxmlextrema_as3parser/parser/
tokenizer.rs

1use crate::ns::*;
2
3pub struct Tokenizer<'input> {
4    compilation_unit: Rc<CompilationUnit>,
5    characters: CharacterReader<'input>,
6}
7
8impl<'input> Tokenizer<'input> {
9    /// Constructs a tokenizer.
10    pub fn new(compilation_unit: &'input Rc<CompilationUnit>, options: &ParserOptions) -> Self {
11        let text: &'input str = compilation_unit.text();
12        let compilation_unit = compilation_unit.clone();
13        let characters: CharacterReader<'input>;
14        if let Some(range) = options.byte_range {
15            characters = CharacterReader::from_offset(&text[0..range.1], range.0);
16        } else {
17            characters = CharacterReader::from(text);
18        }
19        Self {
20            compilation_unit,
21            characters,
22        }
23    }
24
25    pub fn compilation_unit(&self) -> &Rc<CompilationUnit> {
26        &self.compilation_unit
27    }
28
29    pub fn characters(&self) -> &CharacterReader<'input> {
30        &self.characters
31    }
32
33    fn add_syntax_error(&self, location: &Location, kind: DiagnosticKind, arguments: Vec<Rc<dyn DiagnosticArgument>>) {
34        if self.compilation_unit.prevent_equal_offset_error(location) {
35            return;
36        }
37        self.compilation_unit.add_diagnostic(Diagnostic::new_syntax_error(location, kind, arguments));
38    }
39
40    /// Scans for an *InputElementDiv* token.
41    pub fn scan_ie_div(&mut self) -> (Token, Location) {
42        loop {
43            let ch = self.characters.peek_or_zero();
44            if CharacterValidator::is_whitespace(ch) {
45                self.characters.next();
46            } else if self.consume_line_terminator() || self.consume_comment() {
47                // Consumed line terminator or comment
48            } else {
49                break;
50            }
51        }
52        if let Some(result) = self.scan_identifier() {
53            return result;
54        }
55        if let Some(result) = self.scan_dot_or_numeric_literal() {
56            return result;
57        }
58        if let Some(result) = self.scan_string_literal(false) {
59            return result;
60        }
61        let start = self.cursor_location();
62        match self.characters.peek_or_zero() {
63            ',' => {
64                // Comma
65                self.characters.next();
66                let location = start.combine_with(self.cursor_location());
67                return (Token::Comma, location);
68            },
69            '(' => {
70                // ParenOpen
71                self.characters.next();
72                let location = start.combine_with(self.cursor_location());
73                return (Token::ParenOpen, location);
74            },
75            ')' => {
76                // ParenClose
77                self.characters.next();
78                let location = start.combine_with(self.cursor_location());
79                return (Token::ParenClose, location);
80            },
81            '[' => {
82                // SquareOpen
83                self.characters.next();
84                let location = start.combine_with(self.cursor_location());
85                return (Token::SquareOpen, location);
86            },
87            ']' => {
88                // SquareClose
89                self.characters.next();
90                let location = start.combine_with(self.cursor_location());
91                return (Token::SquareClose, location);
92            },
93            '{' => {
94                // BlockOpen
95                self.characters.next();
96                let location = start.combine_with(self.cursor_location());
97                return (Token::BlockOpen, location);
98            },
99            '}' => {
100                // BlockClose
101                self.characters.next();
102                let location = start.combine_with(self.cursor_location());
103                return (Token::BlockClose, location);
104            },
105            ':' => {
106                self.characters.next();
107                // ColonColon
108                if self.characters.peek_or_zero() == ':' {
109                    self.characters.next();
110                    let location = start.combine_with(self.cursor_location());
111                    return (Token::ColonColon, location);
112                }
113                // Colon
114                let location = start.combine_with(self.cursor_location());
115                return (Token::Colon, location);
116            },
117            '=' => {
118                self.characters.next();
119                let ch = self.characters.peek_or_zero();
120                // StrictEquals
121                if ch == '=' && self.characters.peek_at_or_zero(1) == '=' {
122                    self.characters.skip_count_in_place(2);
123                    let location = start.combine_with(self.cursor_location());
124                    return (Token::StrictEquals, location);
125                }
126                // Equals
127                if ch == '=' {
128                    self.characters.next();
129                    let location = start.combine_with(self.cursor_location());
130                    return (Token::Equals, location);
131                }
132                // Assign
133                let location = start.combine_with(self.cursor_location());
134                return (Token::Assign, location);
135            },
136            '!' => {
137                self.characters.next();
138                let ch = self.characters.peek_or_zero();
139                // StrictNotEquals
140                if ch == '=' && self.characters.peek_at_or_zero(1) == '=' {
141                    self.characters.skip_count_in_place(2);
142                    let location = start.combine_with(self.cursor_location());
143                    return (Token::StrictNotEquals, location);
144                }
145                // NotEquals
146                if ch == '=' {
147                    self.characters.next();
148                    let location = start.combine_with(self.cursor_location());
149                    return (Token::NotEquals, location);
150                }
151                // Exclamation
152                let location = start.combine_with(self.cursor_location());
153                return (Token::Exclamation, location);
154            },
155            '?' => {
156                self.characters.next();
157                let ch = self.characters.peek_or_zero();
158                // OptionalChaining
159                if ch == '.' {
160                    self.characters.next();
161                    let location = start.combine_with(self.cursor_location());
162                    return (Token::OptionalChaining, location);
163                }
164                // NullCoalescingAssign
165                if ch == '?' && self.characters.peek_at_or_zero(1) == '=' {
166                    self.characters.skip_count_in_place(2);
167                    let location = start.combine_with(self.cursor_location());
168                    return (Token::NullCoalescingAssign, location);
169                }
170                // NullCoalescing
171                if ch == '?' {
172                    self.characters.next();
173                    let location = start.combine_with(self.cursor_location());
174                    return (Token::NullCoalescing, location);
175                }
176                // Question
177                let location = start.combine_with(self.cursor_location());
178                return (Token::Question, location);
179            },
180            ';' => {
181                // Semicolon
182                self.characters.next();
183                let location = start.combine_with(self.cursor_location());
184                return (Token::Semicolon, location);
185            },
186            '<' => {
187                self.characters.next();
188                let ch = self.characters.peek_or_zero();
189                // Le
190                if ch == '=' {
191                    self.characters.next();
192                    let location = start.combine_with(self.cursor_location());
193                    return (Token::Le, location);
194                }
195                // LeftShiftAssign
196                if ch == '<' && self.characters.peek_at_or_zero(1) == '=' {
197                    self.characters.skip_count_in_place(2);
198                    let location = start.combine_with(self.cursor_location());
199                    return (Token::LeftShiftAssign, location);
200                }
201                // LeftShift
202                if ch == '<' {
203                    self.characters.next();
204                    let location = start.combine_with(self.cursor_location());
205                    return (Token::LeftShift, location);
206                }
207                // Lt
208                let location = start.combine_with(self.cursor_location());
209                return (Token::Lt, location);
210            },
211            '>' => {
212                self.characters.next();
213                let ch = self.characters.peek_or_zero();
214                // Ge
215                if ch == '=' {
216                    self.characters.next();
217                    let location = start.combine_with(self.cursor_location());
218                    return (Token::Ge, location);
219                }
220                // RightShiftAssign
221                if ch == '>' && self.characters.peek_at_or_zero(1) == '=' {
222                    self.characters.skip_count_in_place(2);
223                    let location = start.combine_with(self.cursor_location());
224                    return (Token::RightShiftAssign, location);
225                }
226                // UnsignedRightShiftAssign
227                if ch == '>' && self.characters.peek_seq(3) == ">>=" {
228                    self.characters.skip_count_in_place(3);
229                    let location = start.combine_with(self.cursor_location());
230                    return (Token::UnsignedRightShiftAssign, location);
231                }
232                // UnsignedRightShift
233                if ch == '>' && self.characters.peek_at_or_zero(1) == '>' {
234                    self.characters.skip_count_in_place(2);
235                    let location = start.combine_with(self.cursor_location());
236                    return (Token::UnsignedRightShift, location);
237                }
238                // RightShift
239                if ch == '>' {
240                    self.characters.next();
241                    let location = start.combine_with(self.cursor_location());
242                    return (Token::RightShift, location);
243                }
244                // Gt
245                let location = start.combine_with(self.cursor_location());
246                return (Token::Gt, location);
247            },
248            '@' => {
249                // Attribute
250                self.characters.next();
251                if let Some(token) = self.scan_string_literal(true) {
252                    return token;
253                }
254                let location = start.combine_with(self.cursor_location());
255                return (Token::Attribute, location);
256            },
257            '+' => {
258                self.characters.next();
259                let ch = self.characters.peek_or_zero();
260                // Increment
261                if ch == '+' {
262                    self.characters.next();
263                    let location = start.combine_with(self.cursor_location());
264                    return (Token::Increment, location);
265                }
266                // AddAssign
267                if ch == '=' {
268                    self.characters.next();
269                    let location = start.combine_with(self.cursor_location());
270                    return (Token::AddAssign, location);
271                }
272                // Plus
273                let location = start.combine_with(self.cursor_location());
274                return (Token::Plus, location);
275            },
276            '-' => {
277                self.characters.next();
278                let ch = self.characters.peek_or_zero();
279                // Decrement
280                if ch == '-' {
281                    self.characters.next();
282                    let location = start.combine_with(self.cursor_location());
283                    return (Token::Decrement, location);
284                }
285                // SubtractAssign
286                if ch == '=' {
287                    self.characters.next();
288                    let location = start.combine_with(self.cursor_location());
289                    return (Token::SubtractAssign, location);
290                }
291                // Minus
292                let location = start.combine_with(self.cursor_location());
293                return (Token::Minus, location);
294            },
295            '*' => {
296                self.characters.next();
297                let ch = self.characters.peek_or_zero();
298                // PowerAssign
299                if ch == '*' && self.characters.peek_at_or_zero(1) == '=' {
300                    self.characters.skip_count_in_place(2);
301                    let location = start.combine_with(self.cursor_location());
302                    return (Token::PowerAssign, location);
303                }
304                // Power
305                if ch == '*' {
306                    self.characters.next();
307                    let location = start.combine_with(self.cursor_location());
308                    return (Token::Power, location);
309                }
310                // MultiplyAssign
311                if ch == '=' {
312                    self.characters.next();
313                    let location = start.combine_with(self.cursor_location());
314                    return (Token::MultiplyAssign, location);
315                }
316                // Times
317                let location = start.combine_with(self.cursor_location());
318                return (Token::Times, location);
319            },
320            '/' => {
321                self.characters.next();
322                let ch = self.characters.peek_or_zero();
323                // DivideAssign
324                if ch == '=' {
325                    self.characters.next();
326                    let location = start.combine_with(self.cursor_location());
327                    return (Token::DivideAssign, location);
328                }
329                // Div
330                let location = start.combine_with(self.cursor_location());
331                return (Token::Div, location);
332            },
333            '%' => {
334                self.characters.next();
335                let ch = self.characters.peek_or_zero();
336                // RemainderAssign
337                if ch == '=' {
338                    self.characters.next();
339                    let location = start.combine_with(self.cursor_location());
340                    return (Token::RemainderAssign, location);
341                }
342                // Percent
343                let location = start.combine_with(self.cursor_location());
344                return (Token::Percent, location);
345            },
346            '&' => {
347                self.characters.next();
348                let ch = self.characters.peek_or_zero();
349                // LogicalAndAssign
350                if ch == '&' && self.characters.peek_at_or_zero(1) == '=' {
351                    self.characters.skip_count_in_place(2);
352                    let location = start.combine_with(self.cursor_location());
353                    return (Token::LogicalAndAssign, location);
354                }
355                // LogicalAnd
356                if ch == '&' {
357                    self.characters.next();
358                    let location = start.combine_with(self.cursor_location());
359                    return (Token::LogicalAnd, location);
360                }
361                // BitwiseAndAssign
362                if ch == '=' {
363                    self.characters.next();
364                    let location = start.combine_with(self.cursor_location());
365                    return (Token::BitwiseAndAssign, location);
366                }
367                // BitwiseAnd
368                let location = start.combine_with(self.cursor_location());
369                return (Token::Ampersand, location);
370            },
371            '^' => {
372                self.characters.next();
373                let ch = self.characters.peek_or_zero();
374                // LogicalXorAssign
375                if ch == '^' && self.characters.peek_at_or_zero(1) == '=' {
376                    self.characters.skip_count_in_place(2);
377                    let location = start.combine_with(self.cursor_location());
378                    return (Token::LogicalXorAssign, location);
379                }
380                // LogicalXor
381                if ch == '^' {
382                    self.characters.next();
383                    let location = start.combine_with(self.cursor_location());
384                    return (Token::LogicalXor, location);
385                }
386                // BitwiseXorAssign
387                if ch == '=' {
388                    self.characters.next();
389                    let location = start.combine_with(self.cursor_location());
390                    return (Token::BitwiseXorAssign, location);
391                }
392                // BitwiseXor
393                let location = start.combine_with(self.cursor_location());
394                return (Token::Hat, location);
395            },
396            '|' => {
397                self.characters.next();
398                let ch = self.characters.peek_or_zero();
399                // LogicalOrAssign
400                if ch == '|' && self.characters.peek_at_or_zero(1) == '=' {
401                    self.characters.skip_count_in_place(2);
402                    let location = start.combine_with(self.cursor_location());
403                    return (Token::LogicalOrAssign, location);
404                }
405                // LogicalOr
406                if ch == '|' {
407                    self.characters.next();
408                    let location = start.combine_with(self.cursor_location());
409                    return (Token::LogicalOr, location);
410                }
411                // BitwiseOrAssign
412                if ch == '=' {
413                    self.characters.next();
414                    let location = start.combine_with(self.cursor_location());
415                    return (Token::BitwiseOrAssign, location);
416                }
417                // BitwiseOr
418                let location = start.combine_with(self.cursor_location());
419                return (Token::Pipe, location);
420            },
421            '~' => {
422                // BitwiseNot
423                self.characters.next();
424                let location = start.combine_with(self.cursor_location());
425                return (Token::Tilde, location);
426            },
427            _ => {
428                if self.characters.has_remaining() {
429                    self.add_unexpected_error();
430                    self.characters.next();
431                    return self.scan_ie_div();
432                // Eof
433                } else {
434                    return (Token::Eof, start)
435                }
436            },
437        }
438    }
439
440    /// Scans regular expression after a `/` or `/=` token has been scanned by
441    /// `scan_ie_div`.
442    pub fn scan_regexp_literal(&mut self, start: Location, mut body: String) -> (Token, Location) {
443        loop {
444            let ch = self.characters.peek_or_zero();
445            if ch == '/' {
446                self.characters.next();
447                break;
448            } else if ch == '\\' {
449                self.characters.next();
450                body.push('\\');
451                let ch = self.characters.peek_or_zero();
452                if self.characters.reached_end() {
453                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSlashForRegExp);
454                    break;
455                } else if CharacterValidator::is_line_terminator(ch) {
456                    self.add_unexpected_error();
457                    self.consume_line_terminator();
458                } else {
459                    self.characters.next();
460                    body.push(ch);
461                }
462            } else if CharacterValidator::is_line_terminator(ch) {
463                body.push('\n');
464                self.consume_line_terminator();
465            } else if self.characters.reached_end() {
466                self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSlashForRegExp);
467                break;
468            } else {
469                body.push(ch);
470                self.characters.next();
471            }
472        }
473
474        let mut flags = String::new();
475        while let Some((ch, _)) = self.consume_identifier_part() {
476            flags.push(ch);
477        }
478        
479        let location = start.combine_with(self.cursor_location());
480        (Token::RegExp { body, flags }, location)
481    }
482
483    fn character_ahead_location(&self) -> Location {
484        if self.characters.reached_end() {
485            return self.cursor_location();
486        }
487        let offset = self.characters.index();
488        let mut next_characters = self.characters.clone();
489        next_characters.next().unwrap();
490        Location::with_offsets(&self.compilation_unit, offset, next_characters.index())
491    }
492
493    pub fn cursor_location(&self) -> Location {
494        let offset = self.characters.index();
495        Location::with_offset(&self.compilation_unit, offset)
496    }
497
498    fn add_unexpected_error(&self) {
499        if self.characters.has_remaining() {
500            self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::UnexpectedCharacter, diagarg![self.characters.peek_or_zero().to_string()])
501        } else {
502            self.add_syntax_error(&self.cursor_location(), DiagnosticKind::UnexpectedEnd, vec![])
503        }
504    }
505
506    fn add_unexpected_eof_error(&self, kind: DiagnosticKind) {
507        self.add_syntax_error(&self.cursor_location(), kind, vec![]);
508    }
509
510    // LineTerminator
511    fn consume_line_terminator(&mut self) -> bool {
512        let ch = self.characters.peek_or_zero();
513        if ch == '\x0D' && self.characters.peek_at_or_zero(1) == '\x0A' {
514            self.characters.skip_count_in_place(2);
515            // self.line_number += 1;
516            return true;
517        }
518        if CharacterValidator::is_line_terminator(ch) {
519            self.characters.next();
520            // self.line_number += 1;
521            return true;
522        }
523        false
524    }
525
526    fn consume_comment(&mut self) -> bool {
527        let ch = self.characters.peek_or_zero();
528        if ch != '/' {
529            return false;
530        }
531        let ch2 = self.characters.peek_at_or_zero(1);
532        if ch2 == '/' {
533            let start = self.cursor_location();
534            self.characters.skip_count_in_place(2);
535            while !CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) && self.characters.has_remaining() {
536                self.characters.skip_in_place();
537            }
538            let location = start.combine_with(self.cursor_location());
539            self.consume_line_terminator();
540
541            self.compilation_unit.add_comment(Rc::new(Comment {
542                multiline: false,
543                content: RefCell::new(self.compilation_unit.text()[(location.first_offset() + 2)..location.last_offset()].to_owned()),
544                location: RefCell::new(location),
545            }));
546
547            return true;
548        }
549        if ch2 == '*' {
550            let start = self.cursor_location();
551            self.characters.skip_count_in_place(2);
552
553            loop {
554                if self.characters.peek_or_zero() == '*' && self.characters.peek_at_or_zero(1) == '/' {
555                    self.characters.skip_count_in_place(2);
556                    break;
557                } else if self.consume_line_terminator() {
558                    // Consumed LineTerminator
559                } else if self.characters.has_remaining() {
560                    self.characters.skip_in_place();
561                } else {
562                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForMultiLineComment);
563                    break;
564                }
565            }
566
567            let location = start.combine_with(self.cursor_location());
568
569            let i = location.first_offset() + 2;
570            let j = decrease_last_offset(i, location.last_offset(), 2);
571
572            self.compilation_unit.add_comment(Rc::new(Comment {
573                multiline: true,
574                content: RefCell::new(self.compilation_unit.text()[i..j].to_owned()),
575                location: RefCell::new(location),
576            }));
577
578            return true;
579        }
580        false
581    }
582
583    fn scan_identifier(&mut self) -> Option<(Token, Location)> {
584        let start = self.cursor_location();
585        let mut escaped = false;
586        let Some((ch, escaped_2)) = self.consume_identifier_start() else {
587            return None;
588        };
589        escaped = escaped || escaped_2;
590        let mut name = String::new();
591        name.push(ch);
592        while let Some((ch, escaped_2)) = self.consume_identifier_part() {
593            escaped = escaped || escaped_2;
594            name.push(ch);
595        }
596
597        let location = start.combine_with(self.cursor_location());
598        if !escaped {
599            if let Some(token) = As3ReservedWord::token(name.as_ref()) {
600                return Some((token, location));
601            }
602        }
603        Some((Token::Identifier(name), location))
604    }
605
606    /// Returns a tuple in the form (*character*, *escaped*).
607    fn consume_identifier_start(&mut self) -> Option<(char, bool)> {
608        let ch = self.characters.peek_or_zero();
609        if CharacterValidator::is_identifier_start(ch) {
610            self.characters.next();
611            return Some((ch, false));
612        }
613        if self.characters.peek_or_zero() == '\\' {
614            self.characters.next();
615            return Some((self.expect_unicode_escape_sequence(), true));
616        }
617        None
618    }
619
620    /// Returns a tuple in the form (*character*, *escaped*).
621    fn consume_identifier_part(&mut self) -> Option<(char, bool)> {
622        let ch = self.characters.peek_or_zero();
623        if CharacterValidator::is_identifier_part(ch) {
624            self.characters.next();
625            return Some((ch, false));
626        }
627        if self.characters.peek_or_zero() == '\\' {
628            self.characters.next();
629            return Some((self.expect_unicode_escape_sequence(), true));
630        }
631        None
632    }
633
634    /// Expects UnicodeEscapeSequence starting from `u`.
635    fn expect_unicode_escape_sequence(&mut self) -> char {
636        let start = self.cursor_location();
637        if self.characters.peek_or_zero() != 'u' {
638            self.add_unexpected_error();
639            return '\x5F';
640        }
641        self.characters.next();
642
643        // Scan \uXXXX
644        if CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
645            let r = char::from_u32(self.expect_hex_digit() << 12
646                | (self.expect_hex_digit() << 8)
647                | (self.expect_hex_digit() << 4)
648                | self.expect_hex_digit());
649            let Some(r) = r else {
650                self.add_syntax_error(&start.combine_with(self.cursor_location()), DiagnosticKind::InvalidEscapeValue, vec![]);
651                return '\x5F';
652            };
653            return r;
654        }
655
656        // Scan \u{}
657        if self.characters.peek_or_zero() != '{' {
658            self.add_unexpected_error();
659            return '\x5F';
660        }
661        self.characters.next();
662        while CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
663            self.characters.next();
664        }
665        if self.characters.peek_or_zero() != '}' {
666            self.add_unexpected_error();
667            return '\x5F';
668        }
669        self.characters.next();
670        let location = start.combine_with(self.cursor_location());
671        let r = u32::from_str_radix(&self.compilation_unit.text()[(start.first_offset + 2)..(location.last_offset - 1)], 16);
672        let Ok(r) = r else {
673            self.add_syntax_error(&location, DiagnosticKind::InvalidEscapeValue, vec![]);
674            return '\x5F';
675        };
676        let r = char::from_u32(r);
677        let Some(r) = r else {
678            self.add_syntax_error(&location, DiagnosticKind::InvalidEscapeValue, vec![]);
679            return '\x5F';
680        };
681        r
682    }
683
684    fn expect_hex_digit(&mut self) -> u32 {
685        let ch = self.characters.peek_or_zero();
686        let mv = CharacterValidator::hex_digit_mv(ch);
687        if mv.is_none() {
688            self.add_unexpected_error();
689            return 0x5F;
690        }
691        self.characters.next();
692        mv.unwrap()
693    }
694
695    fn scan_dot_or_numeric_literal(&mut self) -> Option<(Token, Location)> {
696        let start = self.cursor_location();
697        let ch = self.characters.peek_or_zero();
698        let mut initial_dot = false;
699        if ch == '.' {
700            initial_dot = true;
701            self.characters.next();
702
703            let seq = self.characters.peek_seq(2);
704            // Ellipsis
705            if seq == ".." {
706                self.characters.skip_count_in_place(2);
707                return Some((Token::Ellipsis, start.combine_with(self.cursor_location())));
708            }
709            let ch = seq.get(..1).map(|ch| ch.chars().next().unwrap()).unwrap_or('\x00');
710            // Descendants
711            if ch == '.' {
712                self.characters.next();
713                return Some((Token::Descendants, start.combine_with(self.cursor_location())));
714            }
715            // Dot
716            if !CharacterValidator::is_dec_digit(ch) {
717                return Some((Token::Dot, start.combine_with(self.cursor_location())));
718            }
719
720            // NumericLiteral
721            while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
722                self.characters.next();
723                self.consume_underscore_followed_by_dec_digit();
724            }
725        } else if ch == '0' {
726            self.characters.next();
727            let ch_2 = self.characters.peek_or_zero();
728
729            // HexLiteral
730            if ['X', 'x'].contains(&ch_2) {
731                self.characters.next();
732                return self.scan_hex_literal(start.clone());
733            }
734
735            // BinLiteral;
736            if ['B', 'b'].contains(&ch_2) {
737                self.characters.next();
738                return self.scan_bin_literal(start.clone());
739            }
740        } else if CharacterValidator::is_dec_digit(ch) {
741            while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
742                self.characters.next();
743                self.consume_underscore_followed_by_dec_digit();
744            }
745        } else {
746            return None;
747        }
748
749        if !initial_dot && self.characters.peek_or_zero() == '.' {
750            self.characters.next();
751            /*
752            if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
753                self.add_unexpected_error();
754            }
755            */
756            while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
757                self.characters.next();
758                self.consume_underscore_followed_by_dec_digit();
759            }
760        }
761
762        // Decimal exponent
763        if ['E', 'e'].contains(&self.characters.peek_or_zero()) {
764            self.characters.next();
765            if ['+', '-'].contains(&self.characters.peek_or_zero()) {
766                self.characters.next();
767            }
768            if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
769                self.add_unexpected_error();
770            }
771            while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
772                self.characters.next();
773                self.consume_underscore_followed_by_dec_digit();
774            }
775        }
776
777        let string = self.compilation_unit.text()[start.first_offset..self.characters.index()].to_owned();
778
779        let mut suffix = NumberSuffix::None;
780        if self.characters.peek_or_zero() == 'f' || self.characters.peek_or_zero() == 'F' {
781            suffix = NumberSuffix::F;
782            self.characters.next();
783        }
784        self.unallow_numeric_suffix();
785
786        let location = start.combine_with(self.cursor_location());
787
788        Some((Token::Number(string, suffix), location))
789    }
790
791    fn scan_hex_literal(&mut self, start: Location) -> Option<(Token, Location)> {
792        if !CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
793            self.add_unexpected_error();
794        }
795        while CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
796            self.characters.next();
797            self.consume_underscore_followed_by_hex_digit();
798        }
799
800        let suffix = NumberSuffix::None;
801        self.unallow_numeric_suffix();
802
803        let location = start.combine_with(self.cursor_location());
804        let s = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
805        Some((Token::Number(s, suffix), location))
806    }
807
808    fn scan_bin_literal(&mut self, start: Location) -> Option<(Token, Location)> {
809        if !CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
810            self.add_unexpected_error();
811        }
812        while CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
813            self.characters.next();
814            self.consume_underscore_followed_by_bin_digit();
815        }
816
817        let suffix = NumberSuffix::None;
818        self.unallow_numeric_suffix();
819
820        let location = start.combine_with(self.cursor_location());
821        let s = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
822        Some((Token::Number(s, suffix), location))
823    }
824
825    fn consume_underscore_followed_by_dec_digit(&mut self) {
826        if self.characters.peek_or_zero() == '_' {
827            self.characters.next();
828            if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
829                self.add_unexpected_error();
830            }
831            self.characters.next();
832        }
833    }
834
835    fn consume_underscore_followed_by_hex_digit(&mut self) {
836        if self.characters.peek_or_zero() == '_' {
837            self.characters.next();
838            if !CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
839                self.add_unexpected_error();
840            }
841            self.characters.next();
842        }
843    }
844
845    fn consume_underscore_followed_by_bin_digit(&mut self) {
846        if self.characters.peek_or_zero() == '_' {
847            self.characters.next();
848            if !CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
849                self.add_unexpected_error();
850            }
851            self.characters.next();
852        }
853    }
854
855    fn unallow_numeric_suffix(&self) {
856        if CharacterValidator::is_identifier_start(self.characters.peek_or_zero()) {
857            self.add_unexpected_error();
858        }
859    }
860
861    fn scan_string_literal(&mut self, raw: bool) -> Option<(Token, Location)> {
862        let delim = self.characters.peek_or_zero();
863        if !['"', '\''].contains(&delim) {
864            return None;
865        }
866        let mut start = self.cursor_location();
867        // Include the "@" punctuator as part of raw string literals
868        if raw {
869            start = Location::with_offset(&start.compilation_unit(), start.first_offset() - 1);
870        }
871
872        self.characters.next();
873
874        // Triple string literal
875        if self.characters.peek_or_zero() == delim && self.characters.peek_at_or_zero(1) == delim {
876            self.characters.skip_count_in_place(2);
877            return self.scan_triple_string_literal(delim, start, raw);
878        }
879
880        let mut value = String::new();
881
882        if raw {
883            loop {
884                let ch = self.characters.peek_or_zero();
885                if ch == delim {
886                    self.characters.next();
887                    break;
888                } else if CharacterValidator::is_line_terminator(ch) {
889                    self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::StringLiteralMustBeTerminatedBeforeLineBreak, vec![]);
890                    self.consume_line_terminator();
891                } else if !self.characters.has_remaining() {
892                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
893                    break;
894                } else {
895                    value.push(ch);
896                    self.characters.next();
897                }
898            }
899        } else {
900            loop {
901                if let Some(s) = self.consume_escape_sequence() {
902                    value.push_str(&s);
903                } else {
904                    let ch = self.characters.peek_or_zero();
905                    if ch == delim {
906                        self.characters.next();
907                        break;
908                    } else if CharacterValidator::is_line_terminator(ch) {
909                        self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::StringLiteralMustBeTerminatedBeforeLineBreak, vec![]);
910                        self.consume_line_terminator();
911                    } else if !self.characters.has_remaining() {
912                        self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
913                        break;
914                    } else {
915                        value.push(ch);
916                        self.characters.next();
917                    }
918                }
919            }
920        }
921
922        let location = start.combine_with(self.cursor_location());
923        Some((Token::String(value), location))
924    }
925
926    fn scan_triple_string_literal(&mut self, delim: char, start: Location, raw: bool) -> Option<(Token, Location)> {
927        let mut lines: Vec<String> = vec![];
928        let mut builder = String::new();
929
930        if raw {
931            loop {
932                let ch = self.characters.peek_or_zero();
933                if ch == delim && self.characters.peek_at_or_zero(1) == delim && self.characters.peek_at_or_zero(2) == delim {
934                    self.characters.skip_count_in_place(3);
935                    lines.push(builder.clone());
936                    break;
937                } else if CharacterValidator::is_line_terminator(ch) {
938                    lines.push(builder.clone());
939                    builder.clear();
940                    self.consume_line_terminator();
941                } else if !self.characters.has_remaining() {
942                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
943                    lines.push(builder.clone());
944                    builder.clear();
945                    break;
946                } else {
947                    builder.push(ch);
948                    self.characters.next();
949                }
950            }
951        } else {
952            loop {
953                if let Some(s) = self.consume_escape_sequence() {
954                    builder.push_str(&s);
955                } else {
956                    let ch = self.characters.peek_or_zero();
957                    if ch == delim && self.characters.peek_at_or_zero(1) == delim && self.characters.peek_at_or_zero(2) == delim {
958                        self.characters.skip_count_in_place(3);
959                        lines.push(builder.clone());
960                        break;
961                    } else if CharacterValidator::is_line_terminator(ch) {
962                        lines.push(builder.clone());
963                        builder.clear();
964                        self.consume_line_terminator();
965                    } else if !self.characters.has_remaining() {
966                        self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
967                        lines.push(builder.clone());
968                        builder.clear();
969                        break;
970                    } else {
971                        builder.push(ch);
972                        self.characters.next();
973                    }
974                }
975            }
976        }
977
978        let location = start.combine_with(self.cursor_location());
979
980        if lines[0].is_empty() && lines.len() > 1 {
981            lines.remove(0);
982        }
983
984        let last_line = lines.pop().unwrap();
985
986        let base_indent = CharacterValidator::indent_count(&last_line);
987
988        let mut lines: Vec<String> = lines.iter().map(|line| {
989            let indent = CharacterValidator::indent_count(line);
990            line[usize::min(base_indent, indent)..].to_owned()
991        }).collect();
992
993        let last_line = last_line[base_indent..].to_owned();
994        if !last_line.is_empty() {
995            lines.push(last_line);
996        }
997
998        let value = lines.join("\n");
999        Some((Token::String(value), location))
1000    }
1001
1002    fn consume_escape_sequence(&mut self) -> Option<String> {
1003        if self.characters.peek_or_zero() != '\\' {
1004            return None;
1005        }
1006        self.characters.next();
1007        if !self.characters.has_remaining() {
1008            self.add_unexpected_error();
1009            return Some("".into());
1010        }
1011        if self.consume_line_terminator() {
1012            return Some("".into());
1013        }
1014        let ch = self.characters.peek_or_zero();
1015        match ch {
1016            '\'' | '"' | '\\' => {
1017                self.characters.next();
1018                Some(ch.into())
1019            },
1020            'u' => {
1021                Some(self.expect_unicode_escape_sequence().into())
1022            },
1023            'x' => {
1024                self.characters.next();
1025                let v = (self.expect_hex_digit() << 4) | self.expect_hex_digit();
1026                let v = char::from_u32(v).unwrap();
1027                Some(v.into())
1028            },
1029            'b' => {
1030                self.characters.next();
1031                Some('\x08'.into())
1032            },
1033            'f' => {
1034                self.characters.next();
1035                Some('\x0C'.into())
1036            },
1037            'n' => {
1038                self.characters.next();
1039                Some('\x0A'.into())
1040            },
1041            'r' => {
1042                self.characters.next();
1043                Some('\x0D'.into())
1044            },
1045            't' => {
1046                self.characters.next();
1047                Some('\x09'.into())
1048            },
1049            'v' => {
1050                self.characters.next();
1051                Some('\x0B'.into())
1052            },
1053            '0' => {
1054                self.characters.next();
1055                if CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
1056                    self.add_unexpected_error();
1057                }
1058                Some('\x00'.into())
1059            },
1060            ch => {
1061                if CharacterValidator::is_dec_digit(ch) {
1062                    self.add_unexpected_error();
1063                }
1064                self.characters.next();
1065                Some(ch.into())
1066            },
1067        }
1068    }
1069
1070    /// Scans for an *InputElementXMLTag* token.
1071    pub fn scan_ie_xml_tag(&mut self) -> (Token, Location) {
1072        let start = self.cursor_location();
1073        let ch = self.characters.peek_or_zero();
1074
1075        // XmlName
1076        if CharacterValidator::is_xml_name_start(ch) {
1077            self.characters.next();
1078            while CharacterValidator::is_xml_name_part(self.characters.peek_or_zero()) {
1079                self.characters.next();
1080            }
1081            let location = start.combine_with(self.cursor_location());
1082            let name = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1083            return (Token::XmlName(name), location);
1084        }
1085
1086        // XmlWhitespace
1087        if CharacterValidator::is_xml_whitespace(ch) {
1088            while CharacterValidator::is_xml_whitespace(self.characters.peek_or_zero()) {
1089                if !self.consume_line_terminator() {
1090                    self.characters.next();
1091                }
1092            }
1093            let location = start.combine_with(self.cursor_location());
1094            return (Token::XmlWhitespace, location);
1095        }
1096
1097        match ch {
1098            // Assign
1099            '=' => {
1100                self.characters.next();
1101                let location = start.combine_with(self.cursor_location());
1102                (Token::Assign, location)
1103            },
1104
1105            // Gt
1106            '>' => {
1107                self.characters.next();
1108                let location = start.combine_with(self.cursor_location());
1109                (Token::Gt, location)
1110            },
1111
1112            // XmlSlashGt
1113            '/' => {
1114                self.characters.next();
1115                if self.characters.peek_or_zero() != '>' {
1116                    self.add_unexpected_error();
1117                    /*
1118                    while self.characters.has_remaining() {
1119                        self.characters.next();
1120                        if self.characters.peek_or_zero() == '>' {
1121                            self.characters.next();
1122                            let location = start.combine_with(self.cursor_location());
1123                            return (Token::XmlSlashGt, location);
1124                        }
1125                    }
1126                    */
1127                    let location = start.combine_with(self.cursor_location());
1128                    return (Token::XmlSlashGt, location);
1129                }
1130                self.characters.next();
1131                let location = start.combine_with(self.cursor_location());
1132                (Token::XmlSlashGt, location)
1133            },
1134
1135            // XmlAttributeValue
1136            '"' | '\'' => {
1137                let delim = ch;
1138                self.characters.next();
1139                while self.characters.peek_or_zero() != delim && self.characters.has_remaining() {
1140                    if !self.consume_line_terminator() {
1141                        self.characters.next();
1142                    }
1143                }
1144                if self.characters.reached_end() {
1145                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForAttributeValue);
1146                    let value = self.compilation_unit.text()[(start.first_offset + 1)..self.cursor_location().first_offset].to_owned();
1147                    let location = start.combine_with(self.cursor_location());
1148                    return (Token::XmlAttributeValue(value), location);
1149                }
1150                let value = self.compilation_unit.text()[(start.first_offset + 1)..self.cursor_location().first_offset].to_owned();
1151                self.characters.next();
1152                
1153                let location = start.combine_with(self.cursor_location());
1154                (Token::XmlAttributeValue(value), location)
1155            },
1156
1157            // BlockOpen
1158            '{' => {
1159                self.characters.next();
1160                let location = start.combine_with(self.cursor_location());
1161                (Token::BlockOpen, location)
1162            },
1163
1164            _ => {
1165                if self.characters.reached_end() {
1166                    return (Token::Eof, self.cursor_location());
1167                }
1168                self.add_unexpected_error();
1169                self.characters.next();
1170                self.scan_ie_xml_tag()
1171            },
1172        }
1173    }
1174
1175    /// Scans for an *InputElementXMLContent* token.
1176    pub fn scan_ie_xml_content(&mut self) -> (Token, Location) {
1177        let start = self.cursor_location();
1178        let ch = self.characters.peek_or_zero();
1179
1180        match ch {
1181            '<' => {
1182                self.characters.next();
1183
1184                // XmlMarkup
1185                if let Some(r) = self.scan_xml_markup(start.clone()) {
1186                    return r;
1187                }
1188
1189                // XmlLtSlash
1190                if self.characters.peek_or_zero() == '/' {
1191                    self.characters.next();
1192                    let location = start.combine_with(self.cursor_location());
1193                    return (Token::XmlLtSlash, location);
1194                }
1195
1196                // Lt
1197                let location = start.combine_with(self.cursor_location());
1198                (Token::Lt, location)
1199            },
1200            
1201            // BlockOpen
1202            '{' => {
1203                self.characters.next();
1204                let location = start.combine_with(self.cursor_location());
1205                (Token::BlockOpen, location)
1206            },
1207
1208            // XmlName
1209            _ => {
1210                if self.characters.reached_end() {
1211                    return (Token::Eof, self.cursor_location());
1212                }
1213                loop {
1214                    let ch = self.characters.peek_or_zero();
1215                    if ['<', '{'].contains(&ch) {
1216                        break;
1217                    }
1218                    if CharacterValidator::is_line_terminator(ch) {
1219                        self.consume_line_terminator();
1220                    } else if self.characters.has_remaining() {
1221                        self.characters.next();
1222                    } else {
1223                        break;
1224                    }
1225                }
1226
1227                let location = start.combine_with(self.cursor_location());
1228                let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1229                (Token::XmlText(content), location)
1230            },
1231        }
1232    }
1233
1234    /// Attempts to scan a XMLMarkup token after a `<` character.
1235    pub fn scan_xml_markup(&mut self, start: Location) -> Option<(Token, Location)> {
1236        // XMLComment
1237        if self.characters.peek_seq(3) == "!--" {
1238            self.characters.skip_count_in_place(3);
1239            loop {
1240                if self.characters.peek_or_zero() == '-' && self.characters.peek_seq(3) == "-->" {
1241                    self.characters.skip_count_in_place(3);
1242                    break;
1243                } else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
1244                    self.consume_line_terminator();
1245                } else if self.characters.reached_end() {
1246                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForXmlComment);
1247                    break;
1248                } else {
1249                    self.characters.next();
1250                }
1251            }
1252
1253            let location = start.combine_with(self.cursor_location());
1254            let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1255
1256            return Some((Token::XmlMarkup(content), location));
1257        }
1258
1259        // XMLCDATA
1260        if self.characters.peek_seq(8) == "![CDATA[" {
1261            self.characters.skip_count_in_place(8);
1262            loop {
1263                if self.characters.peek_or_zero() == ']' && self.characters.peek_seq(3) == "]]>" {
1264                    self.characters.skip_count_in_place(3);
1265                    break;
1266                } else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
1267                    self.consume_line_terminator();
1268                } else if self.characters.reached_end() {
1269                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForCData);
1270                    break;
1271                } else {
1272                    self.characters.next();
1273                }
1274            }
1275
1276            let location = start.combine_with(self.cursor_location());
1277            let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1278
1279            return Some((Token::XmlMarkup(content), location));
1280        }
1281
1282        // XMLPI
1283        if self.characters.peek_or_zero() == '?' {
1284            self.characters.next();
1285            loop {
1286                if self.characters.peek_or_zero() == '?' && self.characters.peek_at_or_zero(1) == '>' {
1287                    self.characters.skip_count_in_place(2);
1288                    break;
1289                } else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
1290                    self.consume_line_terminator();
1291                } else if self.characters.reached_end() {
1292                    self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForPi);
1293                    break;
1294                } else {
1295                    self.characters.next();
1296                }
1297            }
1298
1299            let location = start.combine_with(self.cursor_location());
1300            let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1301
1302            return Some((Token::XmlMarkup(content), location));
1303        }
1304
1305        None
1306    }
1307}