1use crate::ns::*;
2
3pub struct Tokenizer<'input> {
4 compilation_unit: Rc<CompilationUnit>,
5 characters: CharacterReader<'input>,
6}
7
8impl<'input> Tokenizer<'input> {
9 pub fn new(compilation_unit: &'input Rc<CompilationUnit>, options: &ParserOptions) -> Self {
11 let text: &'input str = compilation_unit.text();
12 let compilation_unit = compilation_unit.clone();
13 let characters: CharacterReader<'input>;
14 if let Some(range) = options.byte_range {
15 characters = CharacterReader::from_offset(&text[0..range.1], range.0);
16 } else {
17 characters = CharacterReader::from(text);
18 }
19 Self {
20 compilation_unit,
21 characters,
22 }
23 }
24
25 pub fn compilation_unit(&self) -> &Rc<CompilationUnit> {
26 &self.compilation_unit
27 }
28
29 pub fn characters(&self) -> &CharacterReader<'input> {
30 &self.characters
31 }
32
33 fn add_syntax_error(&self, location: &Location, kind: DiagnosticKind, arguments: Vec<Rc<dyn DiagnosticArgument>>) {
34 if self.compilation_unit.prevent_equal_offset_error(location) {
35 return;
36 }
37 self.compilation_unit.add_diagnostic(Diagnostic::new_syntax_error(location, kind, arguments));
38 }
39
40 pub fn scan_ie_div(&mut self) -> (Token, Location) {
42 loop {
43 let ch = self.characters.peek_or_zero();
44 if CharacterValidator::is_whitespace(ch) {
45 self.characters.next();
46 } else if self.consume_line_terminator() || self.consume_comment() {
47 } else {
49 break;
50 }
51 }
52 if let Some(result) = self.scan_identifier() {
53 return result;
54 }
55 if let Some(result) = self.scan_dot_or_numeric_literal() {
56 return result;
57 }
58 if let Some(result) = self.scan_string_literal(false) {
59 return result;
60 }
61 let start = self.cursor_location();
62 match self.characters.peek_or_zero() {
63 ',' => {
64 self.characters.next();
66 let location = start.combine_with(self.cursor_location());
67 return (Token::Comma, location);
68 },
69 '(' => {
70 self.characters.next();
72 let location = start.combine_with(self.cursor_location());
73 return (Token::ParenOpen, location);
74 },
75 ')' => {
76 self.characters.next();
78 let location = start.combine_with(self.cursor_location());
79 return (Token::ParenClose, location);
80 },
81 '[' => {
82 self.characters.next();
84 let location = start.combine_with(self.cursor_location());
85 return (Token::SquareOpen, location);
86 },
87 ']' => {
88 self.characters.next();
90 let location = start.combine_with(self.cursor_location());
91 return (Token::SquareClose, location);
92 },
93 '{' => {
94 self.characters.next();
96 let location = start.combine_with(self.cursor_location());
97 return (Token::BlockOpen, location);
98 },
99 '}' => {
100 self.characters.next();
102 let location = start.combine_with(self.cursor_location());
103 return (Token::BlockClose, location);
104 },
105 ':' => {
106 self.characters.next();
107 if self.characters.peek_or_zero() == ':' {
109 self.characters.next();
110 let location = start.combine_with(self.cursor_location());
111 return (Token::ColonColon, location);
112 }
113 let location = start.combine_with(self.cursor_location());
115 return (Token::Colon, location);
116 },
117 '=' => {
118 self.characters.next();
119 let ch = self.characters.peek_or_zero();
120 if ch == '=' && self.characters.peek_at_or_zero(1) == '=' {
122 self.characters.skip_count_in_place(2);
123 let location = start.combine_with(self.cursor_location());
124 return (Token::StrictEquals, location);
125 }
126 if ch == '=' {
128 self.characters.next();
129 let location = start.combine_with(self.cursor_location());
130 return (Token::Equals, location);
131 }
132 let location = start.combine_with(self.cursor_location());
134 return (Token::Assign, location);
135 },
136 '!' => {
137 self.characters.next();
138 let ch = self.characters.peek_or_zero();
139 if ch == '=' && self.characters.peek_at_or_zero(1) == '=' {
141 self.characters.skip_count_in_place(2);
142 let location = start.combine_with(self.cursor_location());
143 return (Token::StrictNotEquals, location);
144 }
145 if ch == '=' {
147 self.characters.next();
148 let location = start.combine_with(self.cursor_location());
149 return (Token::NotEquals, location);
150 }
151 let location = start.combine_with(self.cursor_location());
153 return (Token::Exclamation, location);
154 },
155 '?' => {
156 self.characters.next();
157 let ch = self.characters.peek_or_zero();
158 if ch == '.' {
160 self.characters.next();
161 let location = start.combine_with(self.cursor_location());
162 return (Token::OptionalChaining, location);
163 }
164 if ch == '?' && self.characters.peek_at_or_zero(1) == '=' {
166 self.characters.skip_count_in_place(2);
167 let location = start.combine_with(self.cursor_location());
168 return (Token::NullCoalescingAssign, location);
169 }
170 if ch == '?' {
172 self.characters.next();
173 let location = start.combine_with(self.cursor_location());
174 return (Token::NullCoalescing, location);
175 }
176 let location = start.combine_with(self.cursor_location());
178 return (Token::Question, location);
179 },
180 ';' => {
181 self.characters.next();
183 let location = start.combine_with(self.cursor_location());
184 return (Token::Semicolon, location);
185 },
186 '<' => {
187 self.characters.next();
188 let ch = self.characters.peek_or_zero();
189 if ch == '=' {
191 self.characters.next();
192 let location = start.combine_with(self.cursor_location());
193 return (Token::Le, location);
194 }
195 if ch == '<' && self.characters.peek_at_or_zero(1) == '=' {
197 self.characters.skip_count_in_place(2);
198 let location = start.combine_with(self.cursor_location());
199 return (Token::LeftShiftAssign, location);
200 }
201 if ch == '<' {
203 self.characters.next();
204 let location = start.combine_with(self.cursor_location());
205 return (Token::LeftShift, location);
206 }
207 let location = start.combine_with(self.cursor_location());
209 return (Token::Lt, location);
210 },
211 '>' => {
212 self.characters.next();
213 let ch = self.characters.peek_or_zero();
214 if ch == '=' {
216 self.characters.next();
217 let location = start.combine_with(self.cursor_location());
218 return (Token::Ge, location);
219 }
220 if ch == '>' && self.characters.peek_at_or_zero(1) == '=' {
222 self.characters.skip_count_in_place(2);
223 let location = start.combine_with(self.cursor_location());
224 return (Token::RightShiftAssign, location);
225 }
226 if ch == '>' && self.characters.peek_seq(3) == ">>=" {
228 self.characters.skip_count_in_place(3);
229 let location = start.combine_with(self.cursor_location());
230 return (Token::UnsignedRightShiftAssign, location);
231 }
232 if ch == '>' && self.characters.peek_at_or_zero(1) == '>' {
234 self.characters.skip_count_in_place(2);
235 let location = start.combine_with(self.cursor_location());
236 return (Token::UnsignedRightShift, location);
237 }
238 if ch == '>' {
240 self.characters.next();
241 let location = start.combine_with(self.cursor_location());
242 return (Token::RightShift, location);
243 }
244 let location = start.combine_with(self.cursor_location());
246 return (Token::Gt, location);
247 },
248 '@' => {
249 self.characters.next();
251 if let Some(token) = self.scan_string_literal(true) {
252 return token;
253 }
254 let location = start.combine_with(self.cursor_location());
255 return (Token::Attribute, location);
256 },
257 '+' => {
258 self.characters.next();
259 let ch = self.characters.peek_or_zero();
260 if ch == '+' {
262 self.characters.next();
263 let location = start.combine_with(self.cursor_location());
264 return (Token::Increment, location);
265 }
266 if ch == '=' {
268 self.characters.next();
269 let location = start.combine_with(self.cursor_location());
270 return (Token::AddAssign, location);
271 }
272 let location = start.combine_with(self.cursor_location());
274 return (Token::Plus, location);
275 },
276 '-' => {
277 self.characters.next();
278 let ch = self.characters.peek_or_zero();
279 if ch == '-' {
281 self.characters.next();
282 let location = start.combine_with(self.cursor_location());
283 return (Token::Decrement, location);
284 }
285 if ch == '=' {
287 self.characters.next();
288 let location = start.combine_with(self.cursor_location());
289 return (Token::SubtractAssign, location);
290 }
291 let location = start.combine_with(self.cursor_location());
293 return (Token::Minus, location);
294 },
295 '*' => {
296 self.characters.next();
297 let ch = self.characters.peek_or_zero();
298 if ch == '*' && self.characters.peek_at_or_zero(1) == '=' {
300 self.characters.skip_count_in_place(2);
301 let location = start.combine_with(self.cursor_location());
302 return (Token::PowerAssign, location);
303 }
304 if ch == '*' {
306 self.characters.next();
307 let location = start.combine_with(self.cursor_location());
308 return (Token::Power, location);
309 }
310 if ch == '=' {
312 self.characters.next();
313 let location = start.combine_with(self.cursor_location());
314 return (Token::MultiplyAssign, location);
315 }
316 let location = start.combine_with(self.cursor_location());
318 return (Token::Times, location);
319 },
320 '/' => {
321 self.characters.next();
322 let ch = self.characters.peek_or_zero();
323 if ch == '=' {
325 self.characters.next();
326 let location = start.combine_with(self.cursor_location());
327 return (Token::DivideAssign, location);
328 }
329 let location = start.combine_with(self.cursor_location());
331 return (Token::Div, location);
332 },
333 '%' => {
334 self.characters.next();
335 let ch = self.characters.peek_or_zero();
336 if ch == '=' {
338 self.characters.next();
339 let location = start.combine_with(self.cursor_location());
340 return (Token::RemainderAssign, location);
341 }
342 let location = start.combine_with(self.cursor_location());
344 return (Token::Percent, location);
345 },
346 '&' => {
347 self.characters.next();
348 let ch = self.characters.peek_or_zero();
349 if ch == '&' && self.characters.peek_at_or_zero(1) == '=' {
351 self.characters.skip_count_in_place(2);
352 let location = start.combine_with(self.cursor_location());
353 return (Token::LogicalAndAssign, location);
354 }
355 if ch == '&' {
357 self.characters.next();
358 let location = start.combine_with(self.cursor_location());
359 return (Token::LogicalAnd, location);
360 }
361 if ch == '=' {
363 self.characters.next();
364 let location = start.combine_with(self.cursor_location());
365 return (Token::BitwiseAndAssign, location);
366 }
367 let location = start.combine_with(self.cursor_location());
369 return (Token::Ampersand, location);
370 },
371 '^' => {
372 self.characters.next();
373 let ch = self.characters.peek_or_zero();
374 if ch == '^' && self.characters.peek_at_or_zero(1) == '=' {
376 self.characters.skip_count_in_place(2);
377 let location = start.combine_with(self.cursor_location());
378 return (Token::LogicalXorAssign, location);
379 }
380 if ch == '^' {
382 self.characters.next();
383 let location = start.combine_with(self.cursor_location());
384 return (Token::LogicalXor, location);
385 }
386 if ch == '=' {
388 self.characters.next();
389 let location = start.combine_with(self.cursor_location());
390 return (Token::BitwiseXorAssign, location);
391 }
392 let location = start.combine_with(self.cursor_location());
394 return (Token::Hat, location);
395 },
396 '|' => {
397 self.characters.next();
398 let ch = self.characters.peek_or_zero();
399 if ch == '|' && self.characters.peek_at_or_zero(1) == '=' {
401 self.characters.skip_count_in_place(2);
402 let location = start.combine_with(self.cursor_location());
403 return (Token::LogicalOrAssign, location);
404 }
405 if ch == '|' {
407 self.characters.next();
408 let location = start.combine_with(self.cursor_location());
409 return (Token::LogicalOr, location);
410 }
411 if ch == '=' {
413 self.characters.next();
414 let location = start.combine_with(self.cursor_location());
415 return (Token::BitwiseOrAssign, location);
416 }
417 let location = start.combine_with(self.cursor_location());
419 return (Token::Pipe, location);
420 },
421 '~' => {
422 self.characters.next();
424 let location = start.combine_with(self.cursor_location());
425 return (Token::Tilde, location);
426 },
427 _ => {
428 if self.characters.has_remaining() {
429 self.add_unexpected_error();
430 self.characters.next();
431 return self.scan_ie_div();
432 } else {
434 return (Token::Eof, start)
435 }
436 },
437 }
438 }
439
440 pub fn scan_regexp_literal(&mut self, start: Location, mut body: String) -> (Token, Location) {
443 loop {
444 let ch = self.characters.peek_or_zero();
445 if ch == '/' {
446 self.characters.next();
447 break;
448 } else if ch == '\\' {
449 self.characters.next();
450 body.push('\\');
451 let ch = self.characters.peek_or_zero();
452 if self.characters.reached_end() {
453 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSlashForRegExp);
454 break;
455 } else if CharacterValidator::is_line_terminator(ch) {
456 self.add_unexpected_error();
457 self.consume_line_terminator();
458 } else {
459 self.characters.next();
460 body.push(ch);
461 }
462 } else if CharacterValidator::is_line_terminator(ch) {
463 body.push('\n');
464 self.consume_line_terminator();
465 } else if self.characters.reached_end() {
466 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSlashForRegExp);
467 break;
468 } else {
469 body.push(ch);
470 self.characters.next();
471 }
472 }
473
474 let mut flags = String::new();
475 while let Some((ch, _)) = self.consume_identifier_part() {
476 flags.push(ch);
477 }
478
479 let location = start.combine_with(self.cursor_location());
480 (Token::RegExp { body, flags }, location)
481 }
482
483 fn character_ahead_location(&self) -> Location {
484 if self.characters.reached_end() {
485 return self.cursor_location();
486 }
487 let offset = self.characters.index();
488 let mut next_characters = self.characters.clone();
489 next_characters.next().unwrap();
490 Location::with_offsets(&self.compilation_unit, offset, next_characters.index())
491 }
492
493 pub fn cursor_location(&self) -> Location {
494 let offset = self.characters.index();
495 Location::with_offset(&self.compilation_unit, offset)
496 }
497
498 fn add_unexpected_error(&self) {
499 if self.characters.has_remaining() {
500 self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::UnexpectedCharacter, diagarg![self.characters.peek_or_zero().to_string()])
501 } else {
502 self.add_syntax_error(&self.cursor_location(), DiagnosticKind::UnexpectedEnd, vec![])
503 }
504 }
505
506 fn add_unexpected_eof_error(&self, kind: DiagnosticKind) {
507 self.add_syntax_error(&self.cursor_location(), kind, vec![]);
508 }
509
510 fn consume_line_terminator(&mut self) -> bool {
512 let ch = self.characters.peek_or_zero();
513 if ch == '\x0D' && self.characters.peek_at_or_zero(1) == '\x0A' {
514 self.characters.skip_count_in_place(2);
515 return true;
517 }
518 if CharacterValidator::is_line_terminator(ch) {
519 self.characters.next();
520 return true;
522 }
523 false
524 }
525
526 fn consume_comment(&mut self) -> bool {
527 let ch = self.characters.peek_or_zero();
528 if ch != '/' {
529 return false;
530 }
531 let ch2 = self.characters.peek_at_or_zero(1);
532 if ch2 == '/' {
533 let start = self.cursor_location();
534 self.characters.skip_count_in_place(2);
535 while !CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) && self.characters.has_remaining() {
536 self.characters.skip_in_place();
537 }
538 let location = start.combine_with(self.cursor_location());
539 self.consume_line_terminator();
540
541 self.compilation_unit.add_comment(Rc::new(Comment {
542 multiline: false,
543 content: RefCell::new(self.compilation_unit.text()[(location.first_offset() + 2)..location.last_offset()].to_owned()),
544 location: RefCell::new(location),
545 }));
546
547 return true;
548 }
549 if ch2 == '*' {
550 let start = self.cursor_location();
551 self.characters.skip_count_in_place(2);
552
553 loop {
554 if self.characters.peek_or_zero() == '*' && self.characters.peek_at_or_zero(1) == '/' {
555 self.characters.skip_count_in_place(2);
556 break;
557 } else if self.consume_line_terminator() {
558 } else if self.characters.has_remaining() {
560 self.characters.skip_in_place();
561 } else {
562 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForMultiLineComment);
563 break;
564 }
565 }
566
567 let location = start.combine_with(self.cursor_location());
568
569 let i = location.first_offset() + 2;
570 let j = decrease_last_offset(i, location.last_offset(), 2);
571
572 self.compilation_unit.add_comment(Rc::new(Comment {
573 multiline: true,
574 content: RefCell::new(self.compilation_unit.text()[i..j].to_owned()),
575 location: RefCell::new(location),
576 }));
577
578 return true;
579 }
580 false
581 }
582
583 fn scan_identifier(&mut self) -> Option<(Token, Location)> {
584 let start = self.cursor_location();
585 let mut escaped = false;
586 let Some((ch, escaped_2)) = self.consume_identifier_start() else {
587 return None;
588 };
589 escaped = escaped || escaped_2;
590 let mut name = String::new();
591 name.push(ch);
592 while let Some((ch, escaped_2)) = self.consume_identifier_part() {
593 escaped = escaped || escaped_2;
594 name.push(ch);
595 }
596
597 let location = start.combine_with(self.cursor_location());
598 if !escaped {
599 if let Some(token) = As3ReservedWord::token(name.as_ref()) {
600 return Some((token, location));
601 }
602 }
603 Some((Token::Identifier(name), location))
604 }
605
606 fn consume_identifier_start(&mut self) -> Option<(char, bool)> {
608 let ch = self.characters.peek_or_zero();
609 if CharacterValidator::is_identifier_start(ch) {
610 self.characters.next();
611 return Some((ch, false));
612 }
613 if self.characters.peek_or_zero() == '\\' {
614 self.characters.next();
615 return Some((self.expect_unicode_escape_sequence(), true));
616 }
617 None
618 }
619
620 fn consume_identifier_part(&mut self) -> Option<(char, bool)> {
622 let ch = self.characters.peek_or_zero();
623 if CharacterValidator::is_identifier_part(ch) {
624 self.characters.next();
625 return Some((ch, false));
626 }
627 if self.characters.peek_or_zero() == '\\' {
628 self.characters.next();
629 return Some((self.expect_unicode_escape_sequence(), true));
630 }
631 None
632 }
633
634 fn expect_unicode_escape_sequence(&mut self) -> char {
636 let start = self.cursor_location();
637 if self.characters.peek_or_zero() != 'u' {
638 self.add_unexpected_error();
639 return '\x5F';
640 }
641 self.characters.next();
642
643 if CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
645 let r = char::from_u32(self.expect_hex_digit() << 12
646 | (self.expect_hex_digit() << 8)
647 | (self.expect_hex_digit() << 4)
648 | self.expect_hex_digit());
649 let Some(r) = r else {
650 self.add_syntax_error(&start.combine_with(self.cursor_location()), DiagnosticKind::InvalidEscapeValue, vec![]);
651 return '\x5F';
652 };
653 return r;
654 }
655
656 if self.characters.peek_or_zero() != '{' {
658 self.add_unexpected_error();
659 return '\x5F';
660 }
661 self.characters.next();
662 while CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
663 self.characters.next();
664 }
665 if self.characters.peek_or_zero() != '}' {
666 self.add_unexpected_error();
667 return '\x5F';
668 }
669 self.characters.next();
670 let location = start.combine_with(self.cursor_location());
671 let r = u32::from_str_radix(&self.compilation_unit.text()[(start.first_offset + 2)..(location.last_offset - 1)], 16);
672 let Ok(r) = r else {
673 self.add_syntax_error(&location, DiagnosticKind::InvalidEscapeValue, vec![]);
674 return '\x5F';
675 };
676 let r = char::from_u32(r);
677 let Some(r) = r else {
678 self.add_syntax_error(&location, DiagnosticKind::InvalidEscapeValue, vec![]);
679 return '\x5F';
680 };
681 r
682 }
683
684 fn expect_hex_digit(&mut self) -> u32 {
685 let ch = self.characters.peek_or_zero();
686 let mv = CharacterValidator::hex_digit_mv(ch);
687 if mv.is_none() {
688 self.add_unexpected_error();
689 return 0x5F;
690 }
691 self.characters.next();
692 mv.unwrap()
693 }
694
695 fn scan_dot_or_numeric_literal(&mut self) -> Option<(Token, Location)> {
696 let start = self.cursor_location();
697 let ch = self.characters.peek_or_zero();
698 let mut initial_dot = false;
699 if ch == '.' {
700 initial_dot = true;
701 self.characters.next();
702
703 let seq = self.characters.peek_seq(2);
704 if seq == ".." {
706 self.characters.skip_count_in_place(2);
707 return Some((Token::Ellipsis, start.combine_with(self.cursor_location())));
708 }
709 let ch = seq.get(..1).map(|ch| ch.chars().next().unwrap()).unwrap_or('\x00');
710 if ch == '.' {
712 self.characters.next();
713 return Some((Token::Descendants, start.combine_with(self.cursor_location())));
714 }
715 if !CharacterValidator::is_dec_digit(ch) {
717 return Some((Token::Dot, start.combine_with(self.cursor_location())));
718 }
719
720 while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
722 self.characters.next();
723 self.consume_underscore_followed_by_dec_digit();
724 }
725 } else if ch == '0' {
726 self.characters.next();
727 let ch_2 = self.characters.peek_or_zero();
728
729 if ['X', 'x'].contains(&ch_2) {
731 self.characters.next();
732 return self.scan_hex_literal(start.clone());
733 }
734
735 if ['B', 'b'].contains(&ch_2) {
737 self.characters.next();
738 return self.scan_bin_literal(start.clone());
739 }
740 } else if CharacterValidator::is_dec_digit(ch) {
741 while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
742 self.characters.next();
743 self.consume_underscore_followed_by_dec_digit();
744 }
745 } else {
746 return None;
747 }
748
749 if !initial_dot && self.characters.peek_or_zero() == '.' {
750 self.characters.next();
751 while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
757 self.characters.next();
758 self.consume_underscore_followed_by_dec_digit();
759 }
760 }
761
762 if ['E', 'e'].contains(&self.characters.peek_or_zero()) {
764 self.characters.next();
765 if ['+', '-'].contains(&self.characters.peek_or_zero()) {
766 self.characters.next();
767 }
768 if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
769 self.add_unexpected_error();
770 }
771 while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
772 self.characters.next();
773 self.consume_underscore_followed_by_dec_digit();
774 }
775 }
776
777 let string = self.compilation_unit.text()[start.first_offset..self.characters.index()].to_owned();
778
779 let mut suffix = NumberSuffix::None;
780 if self.characters.peek_or_zero() == 'f' || self.characters.peek_or_zero() == 'F' {
781 suffix = NumberSuffix::F;
782 self.characters.next();
783 }
784 self.unallow_numeric_suffix();
785
786 let location = start.combine_with(self.cursor_location());
787
788 Some((Token::Number(string, suffix), location))
789 }
790
791 fn scan_hex_literal(&mut self, start: Location) -> Option<(Token, Location)> {
792 if !CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
793 self.add_unexpected_error();
794 }
795 while CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
796 self.characters.next();
797 self.consume_underscore_followed_by_hex_digit();
798 }
799
800 let suffix = NumberSuffix::None;
801 self.unallow_numeric_suffix();
802
803 let location = start.combine_with(self.cursor_location());
804 let s = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
805 Some((Token::Number(s, suffix), location))
806 }
807
808 fn scan_bin_literal(&mut self, start: Location) -> Option<(Token, Location)> {
809 if !CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
810 self.add_unexpected_error();
811 }
812 while CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
813 self.characters.next();
814 self.consume_underscore_followed_by_bin_digit();
815 }
816
817 let suffix = NumberSuffix::None;
818 self.unallow_numeric_suffix();
819
820 let location = start.combine_with(self.cursor_location());
821 let s = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
822 Some((Token::Number(s, suffix), location))
823 }
824
825 fn consume_underscore_followed_by_dec_digit(&mut self) {
826 if self.characters.peek_or_zero() == '_' {
827 self.characters.next();
828 if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
829 self.add_unexpected_error();
830 }
831 self.characters.next();
832 }
833 }
834
835 fn consume_underscore_followed_by_hex_digit(&mut self) {
836 if self.characters.peek_or_zero() == '_' {
837 self.characters.next();
838 if !CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
839 self.add_unexpected_error();
840 }
841 self.characters.next();
842 }
843 }
844
845 fn consume_underscore_followed_by_bin_digit(&mut self) {
846 if self.characters.peek_or_zero() == '_' {
847 self.characters.next();
848 if !CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
849 self.add_unexpected_error();
850 }
851 self.characters.next();
852 }
853 }
854
855 fn unallow_numeric_suffix(&self) {
856 if CharacterValidator::is_identifier_start(self.characters.peek_or_zero()) {
857 self.add_unexpected_error();
858 }
859 }
860
861 fn scan_string_literal(&mut self, raw: bool) -> Option<(Token, Location)> {
862 let delim = self.characters.peek_or_zero();
863 if !['"', '\''].contains(&delim) {
864 return None;
865 }
866 let mut start = self.cursor_location();
867 if raw {
869 start = Location::with_offset(&start.compilation_unit(), start.first_offset() - 1);
870 }
871
872 self.characters.next();
873
874 if self.characters.peek_or_zero() == delim && self.characters.peek_at_or_zero(1) == delim {
876 self.characters.skip_count_in_place(2);
877 return self.scan_triple_string_literal(delim, start, raw);
878 }
879
880 let mut value = String::new();
881
882 if raw {
883 loop {
884 let ch = self.characters.peek_or_zero();
885 if ch == delim {
886 self.characters.next();
887 break;
888 } else if CharacterValidator::is_line_terminator(ch) {
889 self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::StringLiteralMustBeTerminatedBeforeLineBreak, vec![]);
890 self.consume_line_terminator();
891 } else if !self.characters.has_remaining() {
892 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
893 break;
894 } else {
895 value.push(ch);
896 self.characters.next();
897 }
898 }
899 } else {
900 loop {
901 if let Some(s) = self.consume_escape_sequence() {
902 value.push_str(&s);
903 } else {
904 let ch = self.characters.peek_or_zero();
905 if ch == delim {
906 self.characters.next();
907 break;
908 } else if CharacterValidator::is_line_terminator(ch) {
909 self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::StringLiteralMustBeTerminatedBeforeLineBreak, vec![]);
910 self.consume_line_terminator();
911 } else if !self.characters.has_remaining() {
912 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
913 break;
914 } else {
915 value.push(ch);
916 self.characters.next();
917 }
918 }
919 }
920 }
921
922 let location = start.combine_with(self.cursor_location());
923 Some((Token::String(value), location))
924 }
925
926 fn scan_triple_string_literal(&mut self, delim: char, start: Location, raw: bool) -> Option<(Token, Location)> {
927 let mut lines: Vec<String> = vec![];
928 let mut builder = String::new();
929
930 if raw {
931 loop {
932 let ch = self.characters.peek_or_zero();
933 if ch == delim && self.characters.peek_at_or_zero(1) == delim && self.characters.peek_at_or_zero(2) == delim {
934 self.characters.skip_count_in_place(3);
935 lines.push(builder.clone());
936 break;
937 } else if CharacterValidator::is_line_terminator(ch) {
938 lines.push(builder.clone());
939 builder.clear();
940 self.consume_line_terminator();
941 } else if !self.characters.has_remaining() {
942 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
943 lines.push(builder.clone());
944 builder.clear();
945 break;
946 } else {
947 builder.push(ch);
948 self.characters.next();
949 }
950 }
951 } else {
952 loop {
953 if let Some(s) = self.consume_escape_sequence() {
954 builder.push_str(&s);
955 } else {
956 let ch = self.characters.peek_or_zero();
957 if ch == delim && self.characters.peek_at_or_zero(1) == delim && self.characters.peek_at_or_zero(2) == delim {
958 self.characters.skip_count_in_place(3);
959 lines.push(builder.clone());
960 break;
961 } else if CharacterValidator::is_line_terminator(ch) {
962 lines.push(builder.clone());
963 builder.clear();
964 self.consume_line_terminator();
965 } else if !self.characters.has_remaining() {
966 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
967 lines.push(builder.clone());
968 builder.clear();
969 break;
970 } else {
971 builder.push(ch);
972 self.characters.next();
973 }
974 }
975 }
976 }
977
978 let location = start.combine_with(self.cursor_location());
979
980 if lines[0].is_empty() && lines.len() > 1 {
981 lines.remove(0);
982 }
983
984 let last_line = lines.pop().unwrap();
985
986 let base_indent = CharacterValidator::indent_count(&last_line);
987
988 let mut lines: Vec<String> = lines.iter().map(|line| {
989 let indent = CharacterValidator::indent_count(line);
990 line[usize::min(base_indent, indent)..].to_owned()
991 }).collect();
992
993 let last_line = last_line[base_indent..].to_owned();
994 if !last_line.is_empty() {
995 lines.push(last_line);
996 }
997
998 let value = lines.join("\n");
999 Some((Token::String(value), location))
1000 }
1001
1002 fn consume_escape_sequence(&mut self) -> Option<String> {
1003 if self.characters.peek_or_zero() != '\\' {
1004 return None;
1005 }
1006 self.characters.next();
1007 if !self.characters.has_remaining() {
1008 self.add_unexpected_error();
1009 return Some("".into());
1010 }
1011 if self.consume_line_terminator() {
1012 return Some("".into());
1013 }
1014 let ch = self.characters.peek_or_zero();
1015 match ch {
1016 '\'' | '"' | '\\' => {
1017 self.characters.next();
1018 Some(ch.into())
1019 },
1020 'u' => {
1021 Some(self.expect_unicode_escape_sequence().into())
1022 },
1023 'x' => {
1024 self.characters.next();
1025 let v = (self.expect_hex_digit() << 4) | self.expect_hex_digit();
1026 let v = char::from_u32(v).unwrap();
1027 Some(v.into())
1028 },
1029 'b' => {
1030 self.characters.next();
1031 Some('\x08'.into())
1032 },
1033 'f' => {
1034 self.characters.next();
1035 Some('\x0C'.into())
1036 },
1037 'n' => {
1038 self.characters.next();
1039 Some('\x0A'.into())
1040 },
1041 'r' => {
1042 self.characters.next();
1043 Some('\x0D'.into())
1044 },
1045 't' => {
1046 self.characters.next();
1047 Some('\x09'.into())
1048 },
1049 'v' => {
1050 self.characters.next();
1051 Some('\x0B'.into())
1052 },
1053 '0' => {
1054 self.characters.next();
1055 if CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
1056 self.add_unexpected_error();
1057 }
1058 Some('\x00'.into())
1059 },
1060 ch => {
1061 if CharacterValidator::is_dec_digit(ch) {
1062 self.add_unexpected_error();
1063 }
1064 self.characters.next();
1065 Some(ch.into())
1066 },
1067 }
1068 }
1069
1070 pub fn scan_ie_xml_tag(&mut self) -> (Token, Location) {
1072 let start = self.cursor_location();
1073 let ch = self.characters.peek_or_zero();
1074
1075 if CharacterValidator::is_xml_name_start(ch) {
1077 self.characters.next();
1078 while CharacterValidator::is_xml_name_part(self.characters.peek_or_zero()) {
1079 self.characters.next();
1080 }
1081 let location = start.combine_with(self.cursor_location());
1082 let name = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1083 return (Token::XmlName(name), location);
1084 }
1085
1086 if CharacterValidator::is_xml_whitespace(ch) {
1088 while CharacterValidator::is_xml_whitespace(self.characters.peek_or_zero()) {
1089 if !self.consume_line_terminator() {
1090 self.characters.next();
1091 }
1092 }
1093 let location = start.combine_with(self.cursor_location());
1094 return (Token::XmlWhitespace, location);
1095 }
1096
1097 match ch {
1098 '=' => {
1100 self.characters.next();
1101 let location = start.combine_with(self.cursor_location());
1102 (Token::Assign, location)
1103 },
1104
1105 '>' => {
1107 self.characters.next();
1108 let location = start.combine_with(self.cursor_location());
1109 (Token::Gt, location)
1110 },
1111
1112 '/' => {
1114 self.characters.next();
1115 if self.characters.peek_or_zero() != '>' {
1116 self.add_unexpected_error();
1117 let location = start.combine_with(self.cursor_location());
1128 return (Token::XmlSlashGt, location);
1129 }
1130 self.characters.next();
1131 let location = start.combine_with(self.cursor_location());
1132 (Token::XmlSlashGt, location)
1133 },
1134
1135 '"' | '\'' => {
1137 let delim = ch;
1138 self.characters.next();
1139 while self.characters.peek_or_zero() != delim && self.characters.has_remaining() {
1140 if !self.consume_line_terminator() {
1141 self.characters.next();
1142 }
1143 }
1144 if self.characters.reached_end() {
1145 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForAttributeValue);
1146 let value = self.compilation_unit.text()[(start.first_offset + 1)..self.cursor_location().first_offset].to_owned();
1147 let location = start.combine_with(self.cursor_location());
1148 return (Token::XmlAttributeValue(value), location);
1149 }
1150 let value = self.compilation_unit.text()[(start.first_offset + 1)..self.cursor_location().first_offset].to_owned();
1151 self.characters.next();
1152
1153 let location = start.combine_with(self.cursor_location());
1154 (Token::XmlAttributeValue(value), location)
1155 },
1156
1157 '{' => {
1159 self.characters.next();
1160 let location = start.combine_with(self.cursor_location());
1161 (Token::BlockOpen, location)
1162 },
1163
1164 _ => {
1165 if self.characters.reached_end() {
1166 return (Token::Eof, self.cursor_location());
1167 }
1168 self.add_unexpected_error();
1169 self.characters.next();
1170 self.scan_ie_xml_tag()
1171 },
1172 }
1173 }
1174
1175 pub fn scan_ie_xml_content(&mut self) -> (Token, Location) {
1177 let start = self.cursor_location();
1178 let ch = self.characters.peek_or_zero();
1179
1180 match ch {
1181 '<' => {
1182 self.characters.next();
1183
1184 if let Some(r) = self.scan_xml_markup(start.clone()) {
1186 return r;
1187 }
1188
1189 if self.characters.peek_or_zero() == '/' {
1191 self.characters.next();
1192 let location = start.combine_with(self.cursor_location());
1193 return (Token::XmlLtSlash, location);
1194 }
1195
1196 let location = start.combine_with(self.cursor_location());
1198 (Token::Lt, location)
1199 },
1200
1201 '{' => {
1203 self.characters.next();
1204 let location = start.combine_with(self.cursor_location());
1205 (Token::BlockOpen, location)
1206 },
1207
1208 _ => {
1210 if self.characters.reached_end() {
1211 return (Token::Eof, self.cursor_location());
1212 }
1213 loop {
1214 let ch = self.characters.peek_or_zero();
1215 if ['<', '{'].contains(&ch) {
1216 break;
1217 }
1218 if CharacterValidator::is_line_terminator(ch) {
1219 self.consume_line_terminator();
1220 } else if self.characters.has_remaining() {
1221 self.characters.next();
1222 } else {
1223 break;
1224 }
1225 }
1226
1227 let location = start.combine_with(self.cursor_location());
1228 let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1229 (Token::XmlText(content), location)
1230 },
1231 }
1232 }
1233
1234 pub fn scan_xml_markup(&mut self, start: Location) -> Option<(Token, Location)> {
1236 if self.characters.peek_seq(3) == "!--" {
1238 self.characters.skip_count_in_place(3);
1239 loop {
1240 if self.characters.peek_or_zero() == '-' && self.characters.peek_seq(3) == "-->" {
1241 self.characters.skip_count_in_place(3);
1242 break;
1243 } else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
1244 self.consume_line_terminator();
1245 } else if self.characters.reached_end() {
1246 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForXmlComment);
1247 break;
1248 } else {
1249 self.characters.next();
1250 }
1251 }
1252
1253 let location = start.combine_with(self.cursor_location());
1254 let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1255
1256 return Some((Token::XmlMarkup(content), location));
1257 }
1258
1259 if self.characters.peek_seq(8) == "![CDATA[" {
1261 self.characters.skip_count_in_place(8);
1262 loop {
1263 if self.characters.peek_or_zero() == ']' && self.characters.peek_seq(3) == "]]>" {
1264 self.characters.skip_count_in_place(3);
1265 break;
1266 } else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
1267 self.consume_line_terminator();
1268 } else if self.characters.reached_end() {
1269 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForCData);
1270 break;
1271 } else {
1272 self.characters.next();
1273 }
1274 }
1275
1276 let location = start.combine_with(self.cursor_location());
1277 let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1278
1279 return Some((Token::XmlMarkup(content), location));
1280 }
1281
1282 if self.characters.peek_or_zero() == '?' {
1284 self.characters.next();
1285 loop {
1286 if self.characters.peek_or_zero() == '?' && self.characters.peek_at_or_zero(1) == '>' {
1287 self.characters.skip_count_in_place(2);
1288 break;
1289 } else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
1290 self.consume_line_terminator();
1291 } else if self.characters.reached_end() {
1292 self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForPi);
1293 break;
1294 } else {
1295 self.characters.next();
1296 }
1297 }
1298
1299 let location = start.combine_with(self.cursor_location());
1300 let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
1301
1302 return Some((Token::XmlMarkup(content), location));
1303 }
1304
1305 None
1306 }
1307}