1use crate::SyntaxKind;
9use crate::char_codes::CharacterCodes;
10use std::sync::Arc;
11use tsz_common::interner::{Atom, Interner};
12use wasm_bindgen::prelude::wasm_bindgen;
13
14#[wasm_bindgen]
20#[repr(u32)]
21#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
22pub enum TokenFlags {
23 #[default]
24 None = 0,
25 PrecedingLineBreak = 1,
26 PrecedingJSDocComment = 2,
27 Unterminated = 4,
28 ExtendedUnicodeEscape = 8,
29 Scientific = 16,
30 Octal = 32,
31 HexSpecifier = 64,
32 BinarySpecifier = 128,
33 OctalSpecifier = 256,
34 ContainsSeparator = 512,
35 UnicodeEscape = 1024,
36 ContainsInvalidEscape = 2048,
37 HexEscape = 4096,
38 ContainsLeadingZero = 8192,
39 ContainsInvalidSeparator = 16384,
40 PrecedingJSDocLeadingAsterisks = 32768,
41}
42
43#[derive(Clone, Debug)]
49pub struct ScannerDiagnostic {
50 pub pos: usize,
52 pub length: usize,
54 pub message: &'static str,
56 pub code: u32,
58}
59
60#[derive(Clone, Debug)]
62pub struct RegexFlagError {
63 pub kind: RegexFlagErrorKind,
65 pub pos: usize,
67}
68
69#[derive(Clone, Copy, Debug, PartialEq, Eq)]
71pub enum RegexFlagErrorKind {
72 Duplicate,
74 InvalidFlag,
76 IncompatibleFlags,
78}
79
80#[derive(Clone)]
82pub struct ScannerSnapshot {
83 pub pos: usize,
84 pub full_start_pos: usize,
85 pub token_start: usize,
86 pub token: SyntaxKind,
87 pub token_value: String,
88 pub token_flags: u32,
89 pub token_atom: Atom,
90 pub token_invalid_separator_pos: Option<usize>,
91 pub token_invalid_separator_is_consecutive: bool,
92 pub regex_flag_errors: Vec<RegexFlagError>,
93}
94
95#[wasm_bindgen]
101pub struct ScannerState {
102 source: Arc<str>,
107 pos: usize,
109 end: usize,
111 full_start_pos: usize,
113 token_start: usize,
115 token: SyntaxKind,
117 token_value: String,
119 token_flags: u32,
121 token_invalid_separator_pos: Option<usize>,
123 token_invalid_separator_is_consecutive: bool,
125 regex_flag_errors: Vec<RegexFlagError>,
127 scanner_diagnostics: Vec<ScannerDiagnostic>,
129 skip_trivia: bool,
131 #[wasm_bindgen(skip)]
133 pub interner: Interner,
134 token_atom: Atom,
136}
137
138#[wasm_bindgen]
139#[allow(clippy::missing_const_for_fn)]
140impl ScannerState {
141 #[wasm_bindgen(constructor)]
146 #[must_use]
147 pub fn new(text: String, skip_trivia: bool) -> Self {
148 let end = text.len();
150 let interner = Interner::new();
151 let source: Arc<str> = Arc::from(text.into_boxed_str());
152 Self {
153 source,
154 pos: 0,
155 end,
156 full_start_pos: 0,
157 token_start: 0,
158 token: SyntaxKind::Unknown,
159 token_value: String::new(),
160 token_flags: 0,
161 token_invalid_separator_pos: None,
162 token_invalid_separator_is_consecutive: false,
163 regex_flag_errors: Vec::new(),
164 scanner_diagnostics: Vec::new(),
165 skip_trivia,
166 interner,
167 token_atom: Atom::NONE,
168 }
169 }
170
171 #[wasm_bindgen(js_name = getPos)]
173 #[must_use]
174 pub fn get_pos(&self) -> usize {
175 self.pos
176 }
177
178 pub fn set_pos(&mut self, pos: usize) {
181 self.pos = pos;
182 }
183
184 #[wasm_bindgen(js_name = getTokenFullStart)]
186 #[must_use]
187 pub fn get_token_full_start(&self) -> usize {
188 self.full_start_pos
189 }
190
191 #[wasm_bindgen(js_name = getTokenStart)]
193 #[must_use]
194 pub fn get_token_start(&self) -> usize {
195 self.token_start
196 }
197
198 #[wasm_bindgen(js_name = getTokenEnd)]
200 #[must_use]
201 pub fn get_token_end(&self) -> usize {
202 self.pos
203 }
204
205 #[wasm_bindgen(js_name = getToken)]
207 #[must_use]
208 pub fn get_token(&self) -> SyntaxKind {
209 self.token
210 }
211
212 #[must_use]
215 #[wasm_bindgen(js_name = getTokenValue)]
216 pub fn get_token_value(&self) -> String {
217 self.get_token_value_ref().to_string()
218 }
219
220 #[must_use]
222 #[wasm_bindgen(js_name = getTokenText)]
223 pub fn get_token_text(&self) -> String {
224 self.source[self.token_start..self.pos].to_string()
225 }
226
227 #[must_use]
229 #[wasm_bindgen(js_name = getTokenFlags)]
230 pub fn get_token_flags(&self) -> u32 {
231 self.token_flags
232 }
233
234 #[must_use]
236 #[wasm_bindgen(js_name = hasPrecedingLineBreak)]
237 pub fn has_preceding_line_break(&self) -> bool {
238 (self.token_flags & TokenFlags::PrecedingLineBreak as u32) != 0
239 }
240
241 #[must_use]
243 #[wasm_bindgen(js_name = isUnterminated)]
244 pub fn is_unterminated(&self) -> bool {
245 (self.token_flags & TokenFlags::Unterminated as u32) != 0
246 }
247
248 #[must_use]
250 #[wasm_bindgen(js_name = isIdentifier)]
251 pub fn is_identifier(&self) -> bool {
252 self.token == SyntaxKind::Identifier
253 || (self.token as u16) > (SyntaxKind::WithKeyword as u16)
254 }
255
256 #[must_use]
258 #[wasm_bindgen(js_name = isReservedWord)]
259 pub fn is_reserved_word(&self) -> bool {
260 let t = self.token as u16;
261 t >= SyntaxKind::BreakKeyword as u16 && t <= SyntaxKind::WithKeyword as u16
262 }
263
264 #[wasm_bindgen(js_name = setText)]
267 pub fn set_text(&mut self, text: String, start: Option<usize>, length: Option<usize>) {
268 let start = start.unwrap_or(0);
269 let len = length.unwrap_or(text.len() - start);
270 self.source = Arc::from(text.into_boxed_str());
271 self.pos = start;
272 self.end = start + len;
273 self.full_start_pos = start;
274 self.token_start = start;
275 self.token = SyntaxKind::Unknown;
276 self.token_value = String::new();
277 self.token_flags = 0;
278 }
279
280 #[wasm_bindgen(js_name = resetTokenState)]
282 pub fn reset_token_state(&mut self, new_pos: usize) {
283 self.pos = new_pos;
284 self.full_start_pos = new_pos;
285 self.token_start = new_pos;
286 self.token = SyntaxKind::Unknown;
287 self.token_value = String::new();
288 self.token_flags = 0;
289 }
290
291 #[must_use]
293 #[wasm_bindgen(js_name = getText)]
294 pub fn get_text(&self) -> String {
295 self.source.to_string()
296 }
297
298 #[inline]
305 #[must_use]
306 fn char_code_unchecked(&self, index: usize) -> u32 {
307 let bytes = self.source.as_bytes();
308 if index < bytes.len() {
309 let b = bytes[index];
310 if b < 128 {
311 u32::from(b)
313 } else {
314 if self.source.is_char_boundary(index) {
317 self.source[index..].chars().next().map_or(0, |c| c as u32)
318 } else {
319 let mut start = index;
321 while start > 0 && !self.source.is_char_boundary(start) {
322 start -= 1;
323 }
324 self.source[start..].chars().next().map_or(0, |c| c as u32)
325 }
326 }
327 } else {
328 0
329 }
330 }
331
332 #[inline]
335 fn char_code_at(&self, index: usize) -> Option<u32> {
336 let bytes = self.source.as_bytes();
337 if index < bytes.len() {
338 let b = bytes[index];
339 if b < 128 {
340 Some(u32::from(b))
341 } else if self.source.is_char_boundary(index) {
342 self.source[index..].chars().next().map(|c| c as u32)
343 } else {
344 let mut start = index;
345 while start > 0 && !self.source.is_char_boundary(start) {
346 start -= 1;
347 }
348 self.source[start..].chars().next().map(|c| c as u32)
349 }
350 } else {
351 None
352 }
353 }
354
355 #[inline]
357 fn char_len_at(&self, index: usize) -> usize {
358 let bytes = self.source.as_bytes();
359 if index >= bytes.len() {
360 return 0;
361 }
362 let b = bytes[index];
363 if b < 128 {
364 1 } else if b < 0xE0 {
366 2 } else if b < 0xF0 {
368 3 } else {
370 4 }
372 }
373
374 #[inline]
376 fn substring(&self, start: usize, end: usize) -> String {
377 let len = self.source.len();
378 let clamped_start = start.min(len);
379 let clamped_end = end.min(len);
380 if clamped_start >= clamped_end {
381 return String::new();
382 }
383 self.source[clamped_start..clamped_end].to_string()
384 }
385
386 #[wasm_bindgen]
392 pub fn scan(&mut self) -> SyntaxKind {
393 self.full_start_pos = self.pos;
394 self.token_flags = 0;
395 self.token_invalid_separator_pos = None;
396 self.token_invalid_separator_is_consecutive = false;
397 self.regex_flag_errors.clear();
398 self.token_value.clear();
399 self.token_atom = Atom::NONE; loop {
402 self.token_start = self.pos;
403
404 if self.pos >= self.end {
405 self.token = SyntaxKind::EndOfFileToken;
406 return self.token;
407 }
408
409 let ch = self.char_code_unchecked(self.pos);
410
411 match ch {
412 CharacterCodes::LINE_FEED | CharacterCodes::CARRIAGE_RETURN => {
414 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
415 if self.skip_trivia {
416 self.pos += 1;
417 if ch == CharacterCodes::CARRIAGE_RETURN
418 && self.pos < self.end
419 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
420 {
421 self.pos += 1;
422 }
423 continue;
424 }
425 if ch == CharacterCodes::CARRIAGE_RETURN
426 && self.pos + 1 < self.end
427 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::LINE_FEED
428 {
429 self.pos += 2;
430 } else {
431 self.pos += 1;
432 }
433 self.token = SyntaxKind::NewLineTrivia;
434 return self.token;
435 }
436
437 CharacterCodes::TAB
439 | CharacterCodes::VERTICAL_TAB
440 | CharacterCodes::FORM_FEED
441 | CharacterCodes::SPACE
442 | CharacterCodes::NON_BREAKING_SPACE => {
443 if self.skip_trivia {
444 self.pos += self.char_len_at(self.pos);
446 while self.pos < self.end
447 && is_white_space_single_line(self.char_code_unchecked(self.pos))
448 {
449 self.pos += self.char_len_at(self.pos);
450 }
451 continue;
452 }
453 while self.pos < self.end
454 && is_white_space_single_line(self.char_code_unchecked(self.pos))
455 {
456 self.pos += self.char_len_at(self.pos);
457 }
458 self.token = SyntaxKind::WhitespaceTrivia;
459 return self.token;
460 }
461
462 CharacterCodes::BYTE_ORDER_MARK => {
464 if self.skip_trivia {
465 self.pos += 3; while self.pos < self.end
467 && is_white_space_single_line(self.char_code_unchecked(self.pos))
468 {
469 self.pos += self.char_len_at(self.pos);
470 }
471 continue;
472 }
473 self.pos += 3; while self.pos < self.end
475 && is_white_space_single_line(self.char_code_unchecked(self.pos))
476 {
477 self.pos += self.char_len_at(self.pos);
478 }
479 self.token = SyntaxKind::WhitespaceTrivia;
480 return self.token;
481 }
482
483 CharacterCodes::OPEN_BRACE => {
485 self.pos += 1;
486 self.token = SyntaxKind::OpenBraceToken;
487 return self.token;
488 }
489 CharacterCodes::CLOSE_BRACE => {
490 self.pos += 1;
491 self.token = SyntaxKind::CloseBraceToken;
492 return self.token;
493 }
494 CharacterCodes::OPEN_PAREN => {
495 self.pos += 1;
496 self.token = SyntaxKind::OpenParenToken;
497 return self.token;
498 }
499 CharacterCodes::CLOSE_PAREN => {
500 self.pos += 1;
501 self.token = SyntaxKind::CloseParenToken;
502 return self.token;
503 }
504 CharacterCodes::OPEN_BRACKET => {
505 self.pos += 1;
506 self.token = SyntaxKind::OpenBracketToken;
507 return self.token;
508 }
509 CharacterCodes::CLOSE_BRACKET => {
510 self.pos += 1;
511 self.token = SyntaxKind::CloseBracketToken;
512 return self.token;
513 }
514 CharacterCodes::SEMICOLON => {
515 self.pos += 1;
516 self.token = SyntaxKind::SemicolonToken;
517 return self.token;
518 }
519 CharacterCodes::COMMA => {
520 self.pos += 1;
521 self.token = SyntaxKind::CommaToken;
522 return self.token;
523 }
524 CharacterCodes::TILDE => {
525 self.pos += 1;
526 self.token = SyntaxKind::TildeToken;
527 return self.token;
528 }
529 CharacterCodes::AT => {
530 self.pos += 1;
531 self.token = SyntaxKind::AtToken;
532 return self.token;
533 }
534 CharacterCodes::COLON => {
535 self.pos += 1;
536 self.token = SyntaxKind::ColonToken;
537 return self.token;
538 }
539
540 CharacterCodes::DOT => {
542 if self.pos + 1 < self.end && is_digit(self.char_code_unchecked(self.pos + 1)) {
543 self.scan_number();
544 return self.token;
545 }
546 if self.pos + 2 < self.end
547 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::DOT
548 && self.char_code_unchecked(self.pos + 2) == CharacterCodes::DOT
549 {
550 self.pos += 3;
551 self.token = SyntaxKind::DotDotDotToken;
552 return self.token;
553 }
554 self.pos += 1;
555 self.token = SyntaxKind::DotToken;
556 return self.token;
557 }
558
559 CharacterCodes::EXCLAMATION => {
561 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
562 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
563 self.pos += 3;
564 self.token = SyntaxKind::ExclamationEqualsEqualsToken;
565 return self.token;
566 }
567 self.pos += 2;
568 self.token = SyntaxKind::ExclamationEqualsToken;
569 return self.token;
570 }
571 self.pos += 1;
572 self.token = SyntaxKind::ExclamationToken;
573 return self.token;
574 }
575
576 CharacterCodes::EQUALS => {
578 if self.is_conflict_marker_trivia() {
579 self.scan_conflict_marker_trivia();
580 if self.skip_trivia {
581 continue;
582 }
583 self.token = SyntaxKind::ConflictMarkerTrivia;
584 return self.token;
585 }
586 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
587 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
588 self.pos += 3;
589 self.token = SyntaxKind::EqualsEqualsEqualsToken;
590 return self.token;
591 }
592 self.pos += 2;
593 self.token = SyntaxKind::EqualsEqualsToken;
594 return self.token;
595 }
596 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::GREATER_THAN) {
597 self.pos += 2;
598 self.token = SyntaxKind::EqualsGreaterThanToken;
599 return self.token;
600 }
601 self.pos += 1;
602 self.token = SyntaxKind::EqualsToken;
603 return self.token;
604 }
605
606 CharacterCodes::PLUS => {
608 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::PLUS) {
609 self.pos += 2;
610 self.token = SyntaxKind::PlusPlusToken;
611 return self.token;
612 }
613 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
614 self.pos += 2;
615 self.token = SyntaxKind::PlusEqualsToken;
616 return self.token;
617 }
618 self.pos += 1;
619 self.token = SyntaxKind::PlusToken;
620 return self.token;
621 }
622
623 CharacterCodes::MINUS => {
625 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::MINUS) {
626 self.pos += 2;
627 self.token = SyntaxKind::MinusMinusToken;
628 return self.token;
629 }
630 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
631 self.pos += 2;
632 self.token = SyntaxKind::MinusEqualsToken;
633 return self.token;
634 }
635 self.pos += 1;
636 self.token = SyntaxKind::MinusToken;
637 return self.token;
638 }
639
640 CharacterCodes::ASTERISK => {
642 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::ASTERISK) {
643 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
644 self.pos += 3;
645 self.token = SyntaxKind::AsteriskAsteriskEqualsToken;
646 return self.token;
647 }
648 self.pos += 2;
649 self.token = SyntaxKind::AsteriskAsteriskToken;
650 return self.token;
651 }
652 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
653 self.pos += 2;
654 self.token = SyntaxKind::AsteriskEqualsToken;
655 return self.token;
656 }
657 self.pos += 1;
658 self.token = SyntaxKind::AsteriskToken;
659 return self.token;
660 }
661
662 CharacterCodes::PERCENT => {
664 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
665 self.pos += 2;
666 self.token = SyntaxKind::PercentEqualsToken;
667 return self.token;
668 }
669 self.pos += 1;
670 self.token = SyntaxKind::PercentToken;
671 return self.token;
672 }
673
674 CharacterCodes::AMPERSAND => {
676 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::AMPERSAND) {
677 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
678 self.pos += 3;
679 self.token = SyntaxKind::AmpersandAmpersandEqualsToken;
680 return self.token;
681 }
682 self.pos += 2;
683 self.token = SyntaxKind::AmpersandAmpersandToken;
684 return self.token;
685 }
686 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
687 self.pos += 2;
688 self.token = SyntaxKind::AmpersandEqualsToken;
689 return self.token;
690 }
691 self.pos += 1;
692 self.token = SyntaxKind::AmpersandToken;
693 return self.token;
694 }
695
696 CharacterCodes::BAR => {
698 if self.is_conflict_marker_trivia() {
699 self.scan_conflict_marker_trivia();
700 if self.skip_trivia {
701 continue;
702 }
703 self.token = SyntaxKind::ConflictMarkerTrivia;
704 return self.token;
705 }
706 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::BAR) {
707 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
708 self.pos += 3;
709 self.token = SyntaxKind::BarBarEqualsToken;
710 return self.token;
711 }
712 self.pos += 2;
713 self.token = SyntaxKind::BarBarToken;
714 return self.token;
715 }
716 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
717 self.pos += 2;
718 self.token = SyntaxKind::BarEqualsToken;
719 return self.token;
720 }
721 self.pos += 1;
722 self.token = SyntaxKind::BarToken;
723 return self.token;
724 }
725
726 CharacterCodes::CARET => {
728 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
729 self.pos += 2;
730 self.token = SyntaxKind::CaretEqualsToken;
731 return self.token;
732 }
733 self.pos += 1;
734 self.token = SyntaxKind::CaretToken;
735 return self.token;
736 }
737
738 CharacterCodes::QUESTION => {
740 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::DOT)
741 && !is_digit(self.char_code_at(self.pos + 2).unwrap_or(0))
742 {
743 self.pos += 2;
744 self.token = SyntaxKind::QuestionDotToken;
745 return self.token;
746 }
747 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::QUESTION) {
748 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
749 self.pos += 3;
750 self.token = SyntaxKind::QuestionQuestionEqualsToken;
751 return self.token;
752 }
753 self.pos += 2;
754 self.token = SyntaxKind::QuestionQuestionToken;
755 return self.token;
756 }
757 self.pos += 1;
758 self.token = SyntaxKind::QuestionToken;
759 return self.token;
760 }
761
762 CharacterCodes::LESS_THAN => {
766 if self.is_conflict_marker_trivia() {
767 self.scan_conflict_marker_trivia();
768 if self.skip_trivia {
769 continue;
770 }
771 self.token = SyntaxKind::ConflictMarkerTrivia;
772 return self.token;
773 }
774 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::LESS_THAN) {
775 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
776 self.pos += 3;
777 self.token = SyntaxKind::LessThanLessThanEqualsToken;
778 return self.token;
779 }
780 self.pos += 2;
781 self.token = SyntaxKind::LessThanLessThanToken;
782 return self.token;
783 }
784 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
785 self.pos += 2;
786 self.token = SyntaxKind::LessThanEqualsToken;
787 return self.token;
788 }
789 self.pos += 1;
791 self.token = SyntaxKind::LessThanToken;
792 return self.token;
793 }
794
795 CharacterCodes::GREATER_THAN => {
798 if self.is_conflict_marker_trivia() {
799 self.scan_conflict_marker_trivia();
800 if self.skip_trivia {
801 continue;
802 }
803 self.token = SyntaxKind::ConflictMarkerTrivia;
804 return self.token;
805 }
806 self.pos += 1;
807 self.token = SyntaxKind::GreaterThanToken;
808 return self.token;
809 }
810
811 CharacterCodes::SLASH => {
813 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::SLASH) {
815 self.pos += 2;
816 while self.pos < self.end {
817 let c = self.char_code_unchecked(self.pos);
818 if c == CharacterCodes::LINE_FEED
819 || c == CharacterCodes::CARRIAGE_RETURN
820 {
821 break;
822 }
823 self.pos += self.char_len_at(self.pos); }
825 if self.skip_trivia {
826 continue;
827 }
828 self.token = SyntaxKind::SingleLineCommentTrivia;
829 return self.token;
830 }
831 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::ASTERISK) {
832 self.pos += 2;
833 let mut comment_closed = false;
834 while self.pos < self.end {
835 let c = self.char_code_unchecked(self.pos);
836 if c == CharacterCodes::ASTERISK
837 && self.char_code_at(self.pos + 1) == Some(CharacterCodes::SLASH)
838 {
839 self.pos += 2;
840 comment_closed = true;
841 break;
842 }
843 if c == CharacterCodes::LINE_FEED
844 || c == CharacterCodes::CARRIAGE_RETURN
845 {
846 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
847 }
848 self.pos += self.char_len_at(self.pos); }
850 if !comment_closed {
851 self.token_flags |= TokenFlags::Unterminated as u32;
852 self.scanner_diagnostics.push(ScannerDiagnostic {
854 pos: self.pos,
855 length: 0,
856 message: "'*/' expected.",
857 code: 1010,
858 });
859 }
860 if self.skip_trivia {
861 continue;
862 }
863 self.token = SyntaxKind::MultiLineCommentTrivia;
864 return self.token;
865 }
866 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
867 self.pos += 2;
868 self.token = SyntaxKind::SlashEqualsToken;
869 return self.token;
870 }
871 self.pos += 1;
872 self.token = SyntaxKind::SlashToken;
873 return self.token;
874 }
875
876 CharacterCodes::DOUBLE_QUOTE | CharacterCodes::SINGLE_QUOTE => {
878 self.scan_string(ch);
879 return self.token;
880 }
881
882 CharacterCodes::BACKTICK => {
884 self.scan_template_literal();
885 return self.token;
886 }
887
888 CharacterCodes::HASH => {
890 self.pos += 1;
893 if self.pos < self.end
894 && is_identifier_start(self.char_code_unchecked(self.pos))
895 {
896 self.pos += self.char_len_at(self.pos); while self.pos < self.end
898 && is_identifier_part(self.char_code_unchecked(self.pos))
899 {
900 self.pos += self.char_len_at(self.pos); }
902 self.token_value = self.substring(self.token_start, self.pos);
903 self.token = SyntaxKind::PrivateIdentifier;
904 } else {
905 self.token = SyntaxKind::HashToken;
906 }
907 return self.token;
908 }
909
910 CharacterCodes::_0..=CharacterCodes::_9 => {
912 self.scan_number();
913 return self.token;
914 }
915
916 CharacterCodes::BACKSLASH => {
918 let escaped_ch = self.peek_unicode_escape();
921 if let Some(code_point) = escaped_ch
922 && is_identifier_start(code_point)
923 {
924 self.scan_identifier_with_escapes();
925 return self.token;
926 }
927 self.pos += 1;
929 self.token = SyntaxKind::Unknown;
930 return self.token;
931 }
932
933 _ => {
935 if ch == CharacterCodes::LINE_SEPARATOR
937 || ch == CharacterCodes::PARAGRAPH_SEPARATOR
938 {
939 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
940 if self.skip_trivia {
941 self.pos += self.char_len_at(self.pos);
942 continue;
943 }
944 self.pos += self.char_len_at(self.pos);
945 self.token = SyntaxKind::NewLineTrivia;
946 return self.token;
947 }
948 if ch > 127 && is_white_space_single_line(ch) {
950 if self.skip_trivia {
951 self.pos += self.char_len_at(self.pos);
952 while self.pos < self.end
953 && is_white_space_single_line(self.char_code_unchecked(self.pos))
954 {
955 self.pos += self.char_len_at(self.pos);
956 }
957 continue;
958 }
959 self.pos += self.char_len_at(self.pos);
960 while self.pos < self.end
961 && is_white_space_single_line(self.char_code_unchecked(self.pos))
962 {
963 self.pos += self.char_len_at(self.pos);
964 }
965 self.token = SyntaxKind::WhitespaceTrivia;
966 return self.token;
967 }
968 if is_identifier_start(ch) {
969 self.scan_identifier();
970 return self.token;
971 }
972 self.pos += self.char_len_at(self.pos);
974 self.token = SyntaxKind::Unknown;
975 return self.token;
976 }
977 }
978 }
979 }
980
981 fn scan_string(&mut self, quote: u32) {
983 self.pos += 1; let mut result = String::new();
985
986 while self.pos < self.end {
987 let ch = self.char_code_unchecked(self.pos);
988 if ch == quote {
989 self.pos += 1; self.token_value = result;
991 self.token = SyntaxKind::StringLiteral;
992 return;
993 }
994 if ch == CharacterCodes::BACKSLASH {
995 self.pos += 1;
996 if self.pos < self.end {
997 self.scan_string_escape(quote, &mut result);
998 }
999 } else if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
1000 self.token_flags |= TokenFlags::Unterminated as u32;
1002 self.token_value = result;
1003 self.token = SyntaxKind::StringLiteral;
1004 return;
1005 } else {
1006 if let Some(c) = char::from_u32(ch) {
1007 result.push(c);
1008 }
1009 self.pos += self.char_len_at(self.pos); }
1011 }
1012
1013 self.token_flags |= TokenFlags::Unterminated as u32;
1015 self.token_value = result;
1016 self.token = SyntaxKind::StringLiteral;
1017 }
1018
1019 fn scan_string_escape(&mut self, quote: u32, result: &mut String) {
1020 let escaped = self.char_code_unchecked(self.pos);
1021 let escaped_len = self.char_len_at(self.pos);
1023 self.pos += escaped_len;
1024
1025 match escaped {
1026 CharacterCodes::_0 => self.scan_string_escape_zero(result),
1027 CharacterCodes::_1
1028 | CharacterCodes::_2
1029 | CharacterCodes::_3
1030 | CharacterCodes::_4
1031 | CharacterCodes::_5
1032 | CharacterCodes::_6
1033 | CharacterCodes::_7 => self.scan_string_escape_octal(escaped, result),
1034 CharacterCodes::LOWER_N => result.push('\n'),
1035 CharacterCodes::LOWER_R => result.push('\r'),
1036 CharacterCodes::LOWER_T => result.push('\t'),
1037 CharacterCodes::LOWER_V => result.push('\x0B'),
1038 CharacterCodes::LOWER_B => result.push('\x08'),
1039 CharacterCodes::LOWER_F => result.push('\x0C'),
1040 CharacterCodes::BACKSLASH => result.push('\\'),
1041 c if c == quote => result.push(char::from_u32(quote).unwrap_or('\0')),
1042 CharacterCodes::LOWER_X => self.scan_string_escape_hex(result),
1043 CharacterCodes::LOWER_U => self.scan_string_escape_unicode(result),
1044 CharacterCodes::LINE_FEED
1045 | CharacterCodes::CARRIAGE_RETURN
1046 | CharacterCodes::LINE_SEPARATOR
1047 | CharacterCodes::PARAGRAPH_SEPARATOR => {
1048 if escaped == CharacterCodes::CARRIAGE_RETURN
1050 && self.pos < self.end
1051 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
1052 {
1053 self.pos += 1;
1054 }
1055 }
1056 _ => {
1057 if let Some(c) = char::from_u32(escaped) {
1058 result.push(c);
1059 }
1060 }
1061 }
1062 }
1063
1064 fn scan_string_escape_zero(&mut self, result: &mut String) {
1065 if self.pos < self.end && is_digit(self.char_code_unchecked(self.pos)) {
1066 let mut value = 0u32;
1068 let octal_start = self.pos - 1; self.pos = octal_start;
1070 while self.pos < self.end
1071 && self.pos < octal_start + 3
1072 && is_octal_digit(self.char_code_unchecked(self.pos))
1073 {
1074 value = value * 8 + (self.char_code_unchecked(self.pos) - CharacterCodes::_0);
1075 self.pos += 1;
1076 }
1077 if let Some(c) = char::from_u32(value) {
1078 result.push(c);
1079 }
1080 } else {
1081 result.push('\0');
1082 }
1083 }
1084
1085 fn scan_string_escape_octal(&mut self, escaped: u32, result: &mut String) {
1086 let mut value = escaped - CharacterCodes::_0;
1088 let mut count = 1;
1089 while count < 3 && self.pos < self.end && is_octal_digit(self.char_code_unchecked(self.pos))
1090 {
1091 value = value * 8 + (self.char_code_unchecked(self.pos) - CharacterCodes::_0);
1092 self.pos += 1;
1093 count += 1;
1094 }
1095 if let Some(c) = char::from_u32(value) {
1096 result.push(c);
1097 }
1098 }
1099
1100 fn scan_string_escape_hex(&mut self, result: &mut String) {
1101 if self.pos + 2 <= self.end {
1102 let hex = self.substring(self.pos, self.pos + 2);
1103 if let Ok(code) = u32::from_str_radix(&hex, 16) {
1104 self.pos += 2;
1105 if let Some(c) = char::from_u32(code) {
1106 result.push(c);
1107 }
1108 return;
1109 }
1110 }
1111 result.push('\\');
1112 result.push('x');
1113 }
1114
1115 fn scan_string_escape_unicode(&mut self, result: &mut String) {
1116 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::OPEN_BRACE {
1117 self.pos += 1;
1118 let hex_start = self.pos;
1119 while self.pos < self.end && is_hex_digit(self.char_code_unchecked(self.pos)) {
1120 self.pos += 1;
1121 }
1122 if self.pos < self.end
1123 && self.char_code_unchecked(self.pos) == CharacterCodes::CLOSE_BRACE
1124 {
1125 let hex = self.substring(hex_start, self.pos);
1126 self.pos += 1;
1127 if let Ok(code) = u32::from_str_radix(&hex, 16)
1128 && let Some(c) = char::from_u32(code)
1129 {
1130 result.push(c);
1131 return;
1132 }
1133 }
1134 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1136 result.push('\\');
1137 result.push('u');
1138 return;
1139 }
1140 if self.pos + 4 <= self.end {
1141 let hex = self.substring(self.pos, self.pos + 4);
1142 if let Ok(code) = u32::from_str_radix(&hex, 16)
1143 && let Some(c) = char::from_u32(code)
1144 {
1145 self.pos += 4;
1146 result.push(c);
1147 return;
1148 }
1149 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1151 result.push('\\');
1152 result.push('u');
1153 return;
1154 }
1155 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1157 result.push('\\');
1158 result.push('u');
1159 }
1160
1161 fn scan_template_literal(&mut self) {
1163 self.pos += 1; let mut result = String::new();
1165
1166 while self.pos < self.end {
1167 let ch = self.char_code_unchecked(self.pos);
1168 if ch == CharacterCodes::BACKTICK {
1169 self.pos += 1;
1170 self.token_value = result;
1171 self.token = SyntaxKind::NoSubstitutionTemplateLiteral;
1172 return;
1173 }
1174 if ch == CharacterCodes::DOLLAR
1175 && self.char_code_at(self.pos + 1) == Some(CharacterCodes::OPEN_BRACE)
1176 {
1177 self.pos += 2;
1178 self.token_value = result;
1179 self.token = SyntaxKind::TemplateHead;
1180 return;
1181 }
1182 if ch == CharacterCodes::BACKSLASH {
1183 self.pos += 1;
1185 let escaped = self.scan_template_escape_sequence();
1186 result.push_str(&escaped);
1187 } else {
1188 if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
1189 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
1190 }
1191 if let Some(c) = char::from_u32(ch) {
1192 result.push(c);
1193 }
1194 self.pos += self.char_len_at(self.pos); }
1196 }
1197
1198 self.token_flags |= TokenFlags::Unterminated as u32;
1199 self.token_value = result;
1200 self.token = SyntaxKind::NoSubstitutionTemplateLiteral;
1201 }
1202
1203 fn scan_number(&mut self) {
1205 let start = self.pos;
1206
1207 if self.char_code_unchecked(self.pos) == CharacterCodes::_0 {
1209 let next = self.char_code_at(self.pos + 1).unwrap_or(0);
1210 if self.scan_prefixed_number(start, next) {
1211 return;
1212 }
1213
1214 if is_digit(next) && self.scan_legacy_octal_number(start) {
1217 return;
1218 }
1219 }
1220
1221 self.scan_decimal_number(start);
1223 }
1224
1225 fn scan_prefixed_number(&mut self, start: usize, next: u32) -> bool {
1226 match next {
1227 CharacterCodes::LOWER_X | CharacterCodes::UPPER_X => {
1228 self.scan_integer_base_literal(start, is_hex_digit, TokenFlags::HexSpecifier);
1229 true
1230 }
1231 CharacterCodes::LOWER_B | CharacterCodes::UPPER_B => {
1232 self.scan_integer_base_literal(start, is_binary_digit, TokenFlags::BinarySpecifier);
1233 true
1234 }
1235 CharacterCodes::LOWER_O | CharacterCodes::UPPER_O => {
1236 self.scan_integer_base_literal(start, is_octal_digit, TokenFlags::OctalSpecifier);
1237 true
1238 }
1239 _ => false,
1240 }
1241 }
1242
1243 fn scan_integer_base_literal(
1244 &mut self,
1245 start: usize,
1246 is_valid_digit: fn(u32) -> bool,
1247 specifier_flag: TokenFlags,
1248 ) {
1249 self.pos += 2;
1250 self.token_flags |= specifier_flag as u32;
1251 self.scan_digits_with_separators(is_valid_digit);
1252
1253 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::LOWER_N {
1254 self.pos += 1;
1255 self.token_value = self.substring(start, self.pos);
1256 self.token = SyntaxKind::BigIntLiteral;
1257 return;
1258 }
1259
1260 self.set_numeric_token_value(start);
1261 self.token = SyntaxKind::NumericLiteral;
1262 }
1263
1264 fn scan_legacy_octal_number(&mut self, start: usize) -> bool {
1265 let mut all_octal = true;
1266 let digit_start = self.pos + 1; let mut scan_pos = digit_start;
1268 while scan_pos < self.end && is_digit(self.char_code_unchecked(scan_pos)) {
1269 if !is_octal_digit(self.char_code_unchecked(scan_pos)) {
1270 all_octal = false;
1271 }
1272 scan_pos += 1;
1273 }
1274 if all_octal && scan_pos > digit_start {
1275 self.pos = scan_pos;
1276 self.token_flags |= TokenFlags::Octal as u32;
1277 self.set_numeric_token_value(start);
1278 self.token = SyntaxKind::NumericLiteral;
1279 true
1280 } else {
1281 self.token_flags |= TokenFlags::ContainsLeadingZero as u32;
1282 false
1283 }
1284 }
1285
1286 fn set_numeric_token_value(&mut self, start: usize) {
1287 if (self.token_flags & TokenFlags::ContainsSeparator as u32) != 0 {
1288 self.token_value = self.substring(start, self.pos);
1289 } else {
1290 self.token_value.clear();
1291 }
1292 }
1293
1294 fn scan_decimal_number(&mut self, start: usize) {
1295 self.scan_digits_with_separators(is_digit);
1296
1297 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::DOT {
1299 self.pos += 1;
1300 self.scan_digits_with_separators(is_digit);
1301 }
1302
1303 if self.pos < self.end {
1305 let ch = self.char_code_unchecked(self.pos);
1306 if ch == CharacterCodes::LOWER_E || ch == CharacterCodes::UPPER_E {
1307 self.pos += 1;
1308 self.token_flags |= TokenFlags::Scientific as u32;
1309 if self.pos < self.end {
1310 let sign = self.char_code_unchecked(self.pos);
1311 if sign == CharacterCodes::PLUS || sign == CharacterCodes::MINUS {
1312 self.pos += 1;
1313 }
1314 }
1315 self.scan_digits_with_separators(is_digit);
1316 }
1317 }
1318
1319 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::LOWER_N {
1321 self.pos += 1;
1322 self.token_value = self.substring(start, self.pos);
1323 self.token = SyntaxKind::BigIntLiteral;
1324 return;
1325 }
1326
1327 self.set_numeric_token_value(start);
1330 self.token = SyntaxKind::NumericLiteral;
1331 }
1332
1333 fn scan_digits_with_separators(&mut self, is_valid_digit: fn(u32) -> bool) {
1334 let mut saw_digit = false;
1335 let mut prev_separator = false;
1336
1337 while self.pos < self.end {
1338 let ch = self.char_code_unchecked(self.pos);
1339 if ch == CharacterCodes::UNDERSCORE {
1340 self.token_flags |= TokenFlags::ContainsSeparator as u32;
1341 if !saw_digit || prev_separator {
1342 self.token_flags |= TokenFlags::ContainsInvalidSeparator as u32;
1343 if self.token_invalid_separator_pos.is_none() {
1344 self.token_invalid_separator_pos = Some(self.pos);
1345 self.token_invalid_separator_is_consecutive = prev_separator;
1346 }
1347 }
1348 prev_separator = true;
1349 self.pos += 1;
1350 continue;
1351 }
1352 if is_valid_digit(ch) {
1353 saw_digit = true;
1354 prev_separator = false;
1355 self.pos += 1;
1356 continue;
1357 }
1358 break;
1359 }
1360
1361 if prev_separator {
1362 self.token_flags |= TokenFlags::ContainsInvalidSeparator as u32;
1363 if self.token_invalid_separator_pos.is_none() {
1364 self.token_invalid_separator_pos = Some(self.pos.saturating_sub(1));
1365 self.token_invalid_separator_is_consecutive = false;
1366 }
1367 }
1368 }
1369
1370 fn scan_identifier(&mut self) {
1374 let start = self.pos;
1375 self.pos += self.char_len_at(self.pos);
1377
1378 while self.pos < self.end {
1379 let ch = self.char_code_unchecked(self.pos);
1380 if ch == CharacterCodes::BACKSLASH {
1381 if let Some(code_point) = self.peek_unicode_escape()
1383 && is_identifier_part(code_point)
1384 {
1385 self.continue_identifier_with_escapes(start);
1387 return;
1388 }
1389 break;
1391 }
1392 if !is_identifier_part(ch) {
1393 break;
1394 }
1395 self.pos += self.char_len_at(self.pos); }
1397
1398 let text_slice = &self.source[start..self.pos];
1400
1401 self.token = crate::text_to_keyword(text_slice).unwrap_or(SyntaxKind::Identifier);
1403
1404 self.token_atom = self.interner.intern(text_slice);
1406
1407 self.token_value.clear();
1410 }
1411
1412 fn continue_identifier_with_escapes(&mut self, start: usize) {
1416 let mut result = String::from(&self.source[start..self.pos]);
1418
1419 while self.pos < self.end {
1421 let ch = self.char_code_unchecked(self.pos);
1422 if ch == CharacterCodes::BACKSLASH {
1423 if let Some(code_point) = self.peek_unicode_escape()
1425 && is_identifier_part(code_point)
1426 {
1427 if let Some(c) = char::from_u32(self.scan_unicode_escape_value().unwrap_or(0)) {
1429 result.push(c);
1430 }
1431 continue;
1432 }
1433 break;
1435 }
1436 if !is_identifier_part(ch) {
1437 break;
1438 }
1439 if let Some(c) = char::from_u32(ch) {
1440 result.push(c);
1441 }
1442 self.pos += self.char_len_at(self.pos);
1443 }
1444
1445 self.token = crate::text_to_keyword(&result).unwrap_or(SyntaxKind::Identifier);
1446 self.token_atom = self.interner.intern(&result);
1447 self.token_value.clear();
1448 self.token_flags |= TokenFlags::UnicodeEscape as u32;
1449 }
1450
1451 fn peek_unicode_escape(&self) -> Option<u32> {
1454 if self.pos + 1 >= self.end {
1456 return None;
1457 }
1458 let bytes = self.source.as_bytes();
1459 if bytes.get(self.pos + 1).copied() != Some(b'u') {
1460 return None;
1461 }
1462 if bytes.get(self.pos + 2).copied() == Some(b'{') {
1464 let start = self.pos + 3;
1465 let mut end = start;
1466 while end < self.end && bytes.get(end).is_some_and(u8::is_ascii_hexdigit) {
1467 end += 1;
1468 }
1469 if end == start || bytes.get(end).copied() != Some(b'}') {
1470 return None;
1471 }
1472 let hex = &self.source[start..end];
1473 u32::from_str_radix(hex, 16)
1474 .ok()
1475 .filter(|&cp| cp <= 0x0010_FFFF)
1476 } else {
1477 if self.pos + 5 >= self.end {
1479 return None;
1480 }
1481 let hex = &self.source[self.pos + 2..self.pos + 6];
1482 if hex.len() == 4 && hex.bytes().all(|b| b.is_ascii_hexdigit()) {
1483 u32::from_str_radix(hex, 16).ok()
1484 } else {
1485 None
1486 }
1487 }
1488 }
1489
1490 fn scan_identifier_with_escapes(&mut self) {
1492 let mut result = String::new();
1493
1494 if let Some(ch) = self.scan_unicode_escape_value()
1496 && let Some(c) = char::from_u32(ch)
1497 {
1498 result.push(c);
1499 }
1500
1501 while self.pos < self.end {
1503 let ch = self.char_code_unchecked(self.pos);
1504 if ch == CharacterCodes::BACKSLASH {
1505 if let Some(code_point) = self.peek_unicode_escape()
1507 && is_identifier_part(code_point)
1508 {
1509 if let Some(c) = char::from_u32(self.scan_unicode_escape_value().unwrap_or(0)) {
1510 result.push(c);
1511 }
1512 continue;
1513 }
1514 break;
1515 }
1516 if !is_identifier_part(ch) {
1517 break;
1518 }
1519 if let Some(c) = char::from_u32(ch) {
1520 result.push(c);
1521 }
1522 self.pos += self.char_len_at(self.pos);
1523 }
1524
1525 self.token = crate::text_to_keyword(&result).unwrap_or(SyntaxKind::Identifier);
1526 self.token_atom = self.interner.intern(&result);
1527 self.token_value.clear();
1528 self.token_flags |= TokenFlags::UnicodeEscape as u32;
1529 }
1530
1531 fn scan_unicode_escape_value(&mut self) -> Option<u32> {
1534 self.pos += 1;
1536 if self.pos >= self.end || self.source.as_bytes()[self.pos] != b'u' {
1537 return None;
1538 }
1539 self.pos += 1; if self.pos < self.end && self.source.as_bytes()[self.pos] == b'{' {
1542 self.pos += 1;
1544 let start = self.pos;
1545 while self.pos < self.end
1546 && self
1547 .source
1548 .as_bytes()
1549 .get(self.pos)
1550 .is_some_and(u8::is_ascii_hexdigit)
1551 {
1552 self.pos += 1;
1553 }
1554 let result = u32::from_str_radix(&self.source[start..self.pos], 16).ok();
1555 if self.pos < self.end && self.source.as_bytes()[self.pos] == b'}' {
1556 self.pos += 1;
1557 }
1558 result
1559 } else {
1560 if self.pos + 4 > self.end {
1562 return None;
1563 }
1564 let hex = &self.source[self.pos..self.pos + 4];
1565 if hex.bytes().all(|b| b.is_ascii_hexdigit()) {
1566 self.pos += 4;
1567 u32::from_str_radix(hex, 16).ok()
1568 } else {
1569 None
1570 }
1571 }
1572 }
1573
1574 #[wasm_bindgen(js_name = reScanGreaterToken)]
1581 pub fn re_scan_greater_token(&mut self) -> SyntaxKind {
1582 if self.token == SyntaxKind::GreaterThanToken {
1583 let next_char = self.char_code_unchecked(self.pos);
1584 if next_char == CharacterCodes::GREATER_THAN {
1585 let next_next = self.char_code_unchecked(self.pos + 1);
1586 if next_next == CharacterCodes::GREATER_THAN {
1587 let next_next_next = self.char_code_unchecked(self.pos + 2);
1589 if next_next_next == CharacterCodes::EQUALS {
1590 self.pos += 3;
1592 self.token = SyntaxKind::GreaterThanGreaterThanGreaterThanEqualsToken;
1593 return self.token;
1594 }
1595 self.pos += 2;
1596 self.token = SyntaxKind::GreaterThanGreaterThanGreaterThanToken;
1597 return self.token;
1598 }
1599 if next_next == CharacterCodes::EQUALS {
1600 self.pos += 2;
1602 self.token = SyntaxKind::GreaterThanGreaterThanEqualsToken;
1603 return self.token;
1604 }
1605 self.pos += 1;
1607 self.token = SyntaxKind::GreaterThanGreaterThanToken;
1608 return self.token;
1609 }
1610 if next_char == CharacterCodes::EQUALS {
1611 self.pos += 1;
1613 self.token = SyntaxKind::GreaterThanEqualsToken;
1614 return self.token;
1615 }
1616 }
1617 self.token
1618 }
1619
1620 #[wasm_bindgen(js_name = reScanSlashToken)]
1623 pub fn re_scan_slash_token(&mut self) -> SyntaxKind {
1624 if self.token == SyntaxKind::SlashToken || self.token == SyntaxKind::SlashEqualsToken {
1625 let start_of_regex_body = self.token_start + 1;
1627 self.pos = start_of_regex_body;
1628 let mut in_escape = false;
1629 let mut in_character_class = false;
1630
1631 while self.pos < self.end {
1633 let ch = self.char_code_unchecked(self.pos);
1634
1635 if is_line_break(ch) {
1637 self.token_flags |= TokenFlags::Unterminated as u32;
1638 break;
1639 }
1640
1641 if in_escape {
1642 in_escape = false;
1644 } else if ch == CharacterCodes::SLASH && !in_character_class {
1645 break;
1647 } else if ch == CharacterCodes::OPEN_BRACKET {
1648 in_character_class = true;
1649 } else if ch == CharacterCodes::BACKSLASH {
1650 in_escape = true;
1651 } else if ch == CharacterCodes::CLOSE_BRACKET {
1652 in_character_class = false;
1653 }
1654 self.pos += self.char_len_at(self.pos);
1656 }
1657
1658 if self.pos >= self.end && (self.token_flags & TokenFlags::Unterminated as u32) == 0 {
1660 self.token_flags |= TokenFlags::Unterminated as u32;
1661 }
1662
1663 if (self.token_flags & TokenFlags::Unterminated as u32) == 0 {
1664 self.pos += 1;
1666
1667 let mut seen_flags: u8 = 0;
1670 let mut has_u = false;
1671 let mut has_v = false;
1672
1673 while self.pos < self.end {
1674 let ch = self.char_code_unchecked(self.pos);
1675 if !is_regex_flag(ch) && !is_identifier_part(ch) {
1676 break;
1677 }
1678
1679 let flag_bit = match ch {
1681 CharacterCodes::LOWER_G => Some(0),
1682 CharacterCodes::LOWER_I => Some(1),
1683 CharacterCodes::LOWER_M => Some(2),
1684 CharacterCodes::LOWER_S => Some(3),
1685 CharacterCodes::LOWER_U => {
1686 has_u = true;
1687 Some(4)
1688 }
1689 CharacterCodes::LOWER_V => {
1690 has_v = true;
1691 Some(5)
1692 }
1693 CharacterCodes::LOWER_Y => Some(6),
1694 CharacterCodes::LOWER_D => Some(7),
1695 _ => None,
1696 };
1697
1698 if let Some(bit) = flag_bit {
1699 let mask = 1 << bit;
1700 if seen_flags & mask != 0 {
1701 self.regex_flag_errors.push(RegexFlagError {
1703 kind: RegexFlagErrorKind::Duplicate,
1704 pos: self.pos,
1705 });
1706 }
1707 seen_flags |= mask;
1708 } else if is_identifier_part(ch) {
1709 self.regex_flag_errors.push(RegexFlagError {
1711 kind: RegexFlagErrorKind::InvalidFlag,
1712 pos: self.pos,
1713 });
1714 }
1715
1716 self.pos += self.char_len_at(self.pos);
1718 }
1719
1720 if has_u && has_v {
1722 self.regex_flag_errors.push(RegexFlagError {
1724 kind: RegexFlagErrorKind::IncompatibleFlags,
1725 pos: self.pos,
1726 });
1727 }
1728 }
1729
1730 self.token_value = self.substring(self.token_start, self.pos);
1731 self.token = SyntaxKind::RegularExpressionLiteral;
1732 }
1733 self.token
1734 }
1735
1736 #[wasm_bindgen(js_name = reScanAsteriskEqualsToken)]
1739 pub fn re_scan_asterisk_equals_token(&mut self) -> SyntaxKind {
1740 if self.token == SyntaxKind::AsteriskEqualsToken {
1741 self.pos = self.token_start + 1;
1742 self.token = SyntaxKind::EqualsToken;
1743 }
1744 self.token
1745 }
1746
1747 #[wasm_bindgen(js_name = reScanTemplateToken)]
1755 pub fn re_scan_template_token(&mut self, _is_tagged_template: bool) -> SyntaxKind {
1756 if self.token_start >= self.end {
1759 self.token = SyntaxKind::EndOfFileToken;
1760 return self.token;
1761 }
1762 self.pos = self.token_start;
1763 self.token = self.scan_template_and_set_token_value(false);
1764 self.token
1765 }
1766
1767 #[wasm_bindgen(js_name = reScanTemplateHeadOrNoSubstitutionTemplate)]
1770 pub fn re_scan_template_head_or_no_substitution_template(&mut self) -> SyntaxKind {
1771 self.pos = self.token_start;
1772 self.token = self.scan_template_and_set_token_value(true);
1773 self.token
1774 }
1775
1776 fn scan_template_and_set_token_value(&mut self, started_with_backtick: bool) -> SyntaxKind {
1782 if self.pos >= self.end {
1785 self.token_flags |= TokenFlags::Unterminated as u32;
1786 self.token_value = String::new();
1787 return if started_with_backtick {
1788 SyntaxKind::NoSubstitutionTemplateLiteral
1789 } else {
1790 SyntaxKind::TemplateTail
1791 };
1792 }
1793 self.pos += 1;
1794 let mut start = self.pos;
1795 let mut contents = String::new();
1796
1797 while self.pos < self.end {
1798 let ch = self.char_code_unchecked(self.pos);
1799
1800 if ch == CharacterCodes::BACKTICK {
1802 contents.push_str(&self.substring(start, self.pos));
1803 self.pos += 1;
1804 self.token_value = contents;
1805 return if started_with_backtick {
1806 SyntaxKind::NoSubstitutionTemplateLiteral
1807 } else {
1808 SyntaxKind::TemplateTail
1809 };
1810 }
1811
1812 if ch == CharacterCodes::DOLLAR
1814 && self.pos + 1 < self.end
1815 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::OPEN_BRACE
1816 {
1817 contents.push_str(&self.substring(start, self.pos));
1818 self.pos += 2;
1819 self.token_value = contents;
1820 return if started_with_backtick {
1821 SyntaxKind::TemplateHead
1822 } else {
1823 SyntaxKind::TemplateMiddle
1824 };
1825 }
1826
1827 if ch == CharacterCodes::BACKSLASH {
1829 contents.push_str(&self.substring(start, self.pos));
1830 let escaped = self.scan_template_escape_sequence();
1831 contents.push_str(&escaped);
1832 start = self.pos;
1834 continue;
1835 }
1836
1837 if ch == CharacterCodes::CARRIAGE_RETURN {
1839 contents.push_str(&self.substring(start, self.pos));
1840 self.pos += 1;
1841 if self.pos < self.end
1842 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
1843 {
1844 self.pos += 1;
1845 }
1846 contents.push('\n');
1847 start = self.pos;
1849 continue;
1850 }
1851
1852 self.pos += self.char_len_at(self.pos);
1855 }
1856
1857 contents.push_str(&self.substring(start, self.pos));
1859 self.token_flags |= TokenFlags::Unterminated as u32;
1860 self.token_value = contents;
1861 if started_with_backtick {
1862 SyntaxKind::NoSubstitutionTemplateLiteral
1863 } else {
1864 SyntaxKind::TemplateTail
1865 }
1866 }
1867
1868 fn scan_template_escape_sequence(&mut self) -> String {
1873 if self.pos >= self.end {
1874 return String::from("\\");
1875 }
1876
1877 let ch = self.char_code_unchecked(self.pos);
1878 let ch_len = self.char_len_at(self.pos);
1880 self.pos += ch_len;
1881
1882 match ch {
1883 CharacterCodes::_0 => self.scan_template_escape_digit_zero(),
1884 CharacterCodes::_1
1885 | CharacterCodes::_2
1886 | CharacterCodes::_3
1887 | CharacterCodes::_4
1888 | CharacterCodes::_5
1889 | CharacterCodes::_6
1890 | CharacterCodes::_7
1891 | CharacterCodes::_8
1892 | CharacterCodes::_9 => self.scan_template_escape_octal_digit(ch),
1893 CharacterCodes::LOWER_N => String::from("\n"),
1894 CharacterCodes::LOWER_R => String::from("\r"),
1895 CharacterCodes::LOWER_T => String::from("\t"),
1896 CharacterCodes::LOWER_V => String::from("\x0B"),
1897 CharacterCodes::LOWER_B => String::from("\x08"),
1898 CharacterCodes::LOWER_F => String::from("\x0C"),
1899 CharacterCodes::SINGLE_QUOTE => String::from("'"),
1900 CharacterCodes::DOUBLE_QUOTE => String::from("\""),
1901 CharacterCodes::BACKTICK => String::from("`"),
1902 CharacterCodes::BACKSLASH => String::from("\\"),
1903 CharacterCodes::DOLLAR => String::from("$"),
1904 CharacterCodes::LINE_FEED
1905 | CharacterCodes::LINE_SEPARATOR
1906 | CharacterCodes::PARAGRAPH_SEPARATOR => String::new(),
1907 CharacterCodes::CARRIAGE_RETURN => self.scan_template_escape_cr(),
1908 CharacterCodes::LOWER_X => self.scan_template_hex_escape(),
1909 CharacterCodes::LOWER_U => self.scan_template_unicode_escape(),
1910 _ => Self::scan_template_unknown_escape(ch),
1911 }
1912 }
1913
1914 fn scan_template_escape_digit_zero(&mut self) -> String {
1915 if self.pos < self.end && is_digit(self.char_code_unchecked(self.pos)) {
1916 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1917 return String::from("\\0");
1918 }
1919 String::from("\0")
1920 }
1921
1922 fn scan_template_escape_octal_digit(&mut self, ch: u32) -> String {
1923 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1924 let digit = char::from_u32(ch).unwrap_or('?');
1925 format!("\\{digit}")
1926 }
1927
1928 fn scan_template_escape_cr(&mut self) -> String {
1929 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED {
1930 self.pos += 1;
1931 }
1932 String::new()
1933 }
1934
1935 fn scan_template_hex_escape(&mut self) -> String {
1936 if self.pos + 2 <= self.end {
1937 let hex = self.substring(self.pos, self.pos + 2);
1938 if let Ok(code) = u32::from_str_radix(&hex, 16) {
1939 self.pos += 2;
1940 if let Some(c) = char::from_u32(code) {
1941 return c.to_string();
1942 }
1943 }
1944 }
1945 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1946 "\\x".to_string()
1947 }
1948
1949 fn scan_template_unicode_escape(&mut self) -> String {
1950 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::OPEN_BRACE {
1951 return self.scan_template_brace_unicode_escape();
1952 }
1953
1954 if self.pos + 4 <= self.end {
1955 let hex = self.substring(self.pos, self.pos + 4);
1956 if let Ok(code) = u32::from_str_radix(&hex, 16) {
1957 self.pos += 4;
1958 if let Some(c) = char::from_u32(code) {
1959 return c.to_string();
1960 }
1961 }
1962 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1963 return String::from("\\u");
1964 }
1965
1966 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1967 String::from("\\u")
1968 }
1969
1970 fn scan_template_brace_unicode_escape(&mut self) -> String {
1971 self.pos += 1;
1972 let hex_start = self.pos;
1973 while self.pos < self.end && is_hex_digit(self.char_code_unchecked(self.pos)) {
1974 self.pos += 1;
1975 }
1976 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::CLOSE_BRACE
1977 {
1978 let hex = self.substring(hex_start, self.pos);
1979 self.pos += 1;
1980 if let Ok(code) = u32::from_str_radix(&hex, 16)
1981 && let Some(c) = char::from_u32(code)
1982 {
1983 return c.to_string();
1984 }
1985 }
1986 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1987 String::from("\\u")
1988 }
1989
1990 fn scan_template_unknown_escape(ch: u32) -> String {
1991 if let Some(c) = char::from_u32(ch) {
1992 c.to_string()
1993 } else {
1994 String::new()
1995 }
1996 }
1997
1998 #[wasm_bindgen(js_name = scanJsxIdentifier)]
2005 pub fn scan_jsx_identifier(&mut self) -> SyntaxKind {
2006 if crate::token_is_identifier_or_keyword(self.token) {
2007 while self.pos < self.end {
2011 let ch = self.char_code_unchecked(self.pos);
2012 if ch == CharacterCodes::MINUS {
2013 self.pos += 1;
2015 if self.pos < self.end
2017 && is_identifier_start(self.char_code_unchecked(self.pos))
2018 {
2019 self.pos += self.char_len_at(self.pos); while self.pos < self.end
2021 && is_identifier_part(self.char_code_unchecked(self.pos))
2022 {
2023 self.pos += self.char_len_at(self.pos); }
2025 }
2026 } else {
2027 break;
2028 }
2029 }
2030 self.token_atom = self
2032 .interner
2033 .intern(&self.source[self.token_start..self.pos]);
2034 self.token_value.clear();
2035 self.token = SyntaxKind::Identifier;
2037 }
2038 self.token
2039 }
2040
2041 #[wasm_bindgen(js_name = reScanJsxToken)]
2044 pub fn re_scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> SyntaxKind {
2045 self.pos = self.token_start;
2046 self.scan_jsx_token(allow_multiline_jsx_text)
2047 }
2048
2049 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> SyntaxKind {
2051 self.full_start_pos = self.pos;
2052 self.token_start = self.pos;
2053
2054 if self.pos >= self.end {
2055 self.token = SyntaxKind::EndOfFileToken;
2056 return self.token;
2057 }
2058
2059 let ch = self.char_code_unchecked(self.pos);
2060
2061 if ch == CharacterCodes::LESS_THAN {
2063 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::SLASH) {
2065 self.pos += 2;
2066 self.token = SyntaxKind::LessThanSlashToken;
2067 return self.token;
2068 }
2069 self.pos += 1;
2070 self.token = SyntaxKind::LessThanToken;
2071 return self.token;
2072 }
2073
2074 if ch == CharacterCodes::OPEN_BRACE {
2075 self.pos += 1;
2076 self.token = SyntaxKind::OpenBraceToken;
2077 return self.token;
2078 }
2079
2080 let mut text = String::new();
2082 while self.pos < self.end {
2083 let c = self.char_code_unchecked(self.pos);
2084
2085 if c == CharacterCodes::OPEN_BRACE || c == CharacterCodes::LESS_THAN {
2087 break;
2088 }
2089
2090 if is_line_break(c) {
2092 if !allow_multiline_jsx_text {
2093 break;
2094 }
2095 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
2096 }
2097
2098 if let Some(char) = char::from_u32(c) {
2099 text.push(char);
2100 }
2101 self.pos += self.char_len_at(self.pos); }
2103
2104 if !text.is_empty() {
2105 self.token_value = text;
2106 self.token = SyntaxKind::JsxText;
2107 return self.token;
2108 }
2109
2110 self.token = SyntaxKind::Unknown;
2111 self.token
2112 }
2113
2114 #[wasm_bindgen(js_name = scanJsxAttributeValue)]
2116 pub fn scan_jsx_attribute_value(&mut self) -> SyntaxKind {
2117 self.full_start_pos = self.pos;
2118 self.token_flags = 0;
2119
2120 while self.pos < self.end && is_white_space_single_line(self.char_code_unchecked(self.pos))
2122 {
2123 self.pos += 1;
2124 }
2125
2126 self.token_start = self.pos;
2127
2128 if self.pos >= self.end {
2129 self.token = SyntaxKind::EndOfFileToken;
2130 return self.token;
2131 }
2132
2133 let ch = self.char_code_unchecked(self.pos);
2134
2135 if ch == CharacterCodes::DOUBLE_QUOTE || ch == CharacterCodes::SINGLE_QUOTE {
2137 self.scan_jsx_string_literal(ch);
2138 return self.token;
2139 }
2140
2141 self.scan()
2142 }
2143
2144 fn scan_jsx_string_literal(&mut self, quote: u32) {
2147 self.pos += 1; let mut result = String::new();
2149
2150 while self.pos < self.end {
2151 let ch = self.char_code_unchecked(self.pos);
2152 if ch == quote {
2153 self.pos += 1; self.token_value = result;
2155 self.token = SyntaxKind::StringLiteral;
2156 return;
2157 }
2158 if let Some(c) = char::from_u32(ch) {
2160 result.push(c);
2161 }
2162 self.pos += 1;
2163 }
2164
2165 self.token_flags |= TokenFlags::Unterminated as u32;
2167 self.token_value = result;
2168 self.token = SyntaxKind::StringLiteral;
2169 }
2170
2171 #[wasm_bindgen(js_name = reScanJsxAttributeValue)]
2173 pub fn re_scan_jsx_attribute_value(&mut self) -> SyntaxKind {
2174 self.pos = self.token_start;
2175 self.scan_jsx_attribute_value()
2176 }
2177
2178 #[wasm_bindgen(js_name = reScanLessThanToken)]
2181 pub fn re_scan_less_than_token(&mut self) -> SyntaxKind {
2182 if self.token == SyntaxKind::LessThanToken
2183 && self.pos < self.end
2184 && self.char_code_unchecked(self.pos) == CharacterCodes::SLASH
2185 {
2186 self.pos += 1;
2187 self.token = SyntaxKind::LessThanSlashToken;
2188 }
2189 self.token
2190 }
2191
2192 #[wasm_bindgen(js_name = reScanHashToken)]
2194 pub fn re_scan_hash_token(&mut self) -> SyntaxKind {
2195 if self.token == SyntaxKind::HashToken
2196 && self.pos < self.end
2197 && is_identifier_start(self.char_code_unchecked(self.pos))
2198 {
2199 self.pos += self.char_len_at(self.pos);
2201 while self.pos < self.end && is_identifier_part(self.char_code_unchecked(self.pos)) {
2202 self.pos += self.char_len_at(self.pos);
2203 }
2204 self.token_value = self.substring(self.token_start, self.pos);
2205 self.token = SyntaxKind::PrivateIdentifier;
2206 }
2207 self.token
2208 }
2209
2210 #[wasm_bindgen(js_name = reScanQuestionToken)]
2212 pub fn re_scan_question_token(&mut self) -> SyntaxKind {
2213 if self.token == SyntaxKind::QuestionToken {
2214 let ch = self.char_code_at(self.pos);
2215 if ch == Some(CharacterCodes::DOT) {
2216 let next = self.char_code_at(self.pos + 1);
2218 if !next.is_some_and(is_digit) {
2219 self.pos += 1;
2220 self.token = SyntaxKind::QuestionDotToken;
2221 }
2222 } else if ch == Some(CharacterCodes::QUESTION) {
2223 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
2224 self.pos += 2;
2225 self.token = SyntaxKind::QuestionQuestionEqualsToken;
2226 } else {
2227 self.pos += 1;
2228 self.token = SyntaxKind::QuestionQuestionToken;
2229 }
2230 }
2231 }
2232 self.token
2233 }
2234
2235 #[wasm_bindgen(js_name = scanJsDocToken)]
2242 pub fn scan_jsdoc_token(&mut self) -> SyntaxKind {
2243 self.full_start_pos = self.pos;
2244 self.token_flags = 0;
2245
2246 if self.pos >= self.end {
2247 self.token = SyntaxKind::EndOfFileToken;
2248 return self.token;
2249 }
2250
2251 self.token_start = self.pos;
2252 let ch = self.char_code_unchecked(self.pos);
2253
2254 if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
2256 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
2257 self.pos += 1;
2258 if ch == CharacterCodes::CARRIAGE_RETURN
2259 && self.pos < self.end
2260 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
2261 {
2262 self.pos += 1;
2263 }
2264 self.token = SyntaxKind::NewLineTrivia;
2265 return self.token;
2266 }
2267
2268 if is_white_space_single_line(ch) {
2270 while self.pos < self.end
2271 && is_white_space_single_line(self.char_code_unchecked(self.pos))
2272 {
2273 self.pos += 1;
2274 }
2275 self.token = SyntaxKind::WhitespaceTrivia;
2276 return self.token;
2277 }
2278
2279 if self.scan_jsdoc_punctuation_token(ch) {
2280 return self.token;
2281 }
2282
2283 if is_identifier_start(ch) {
2285 return self.scan_jsdoc_identifier();
2286 }
2287
2288 self.scan_jsdoc_unknown_character();
2290 self.token
2291 }
2292
2293 fn scan_jsdoc_punctuation_token(&mut self, ch: u32) -> bool {
2294 match ch {
2295 CharacterCodes::AT => {
2296 self.pos += 1;
2297 self.token = SyntaxKind::AtToken;
2298 }
2299 CharacterCodes::ASTERISK => {
2300 self.pos += 1;
2301 self.token = SyntaxKind::AsteriskToken;
2302 }
2303 CharacterCodes::OPEN_BRACE => {
2304 self.pos += 1;
2305 self.token = SyntaxKind::OpenBraceToken;
2306 }
2307 CharacterCodes::CLOSE_BRACE => {
2308 self.pos += 1;
2309 self.token = SyntaxKind::CloseBraceToken;
2310 }
2311 CharacterCodes::OPEN_BRACKET => {
2312 self.pos += 1;
2313 self.token = SyntaxKind::OpenBracketToken;
2314 }
2315 CharacterCodes::CLOSE_BRACKET => {
2316 self.pos += 1;
2317 self.token = SyntaxKind::CloseBracketToken;
2318 }
2319 CharacterCodes::LESS_THAN => {
2320 self.pos += 1;
2321 self.token = SyntaxKind::LessThanToken;
2322 }
2323 CharacterCodes::GREATER_THAN => {
2324 self.pos += 1;
2325 self.token = SyntaxKind::GreaterThanToken;
2326 }
2327 CharacterCodes::EQUALS => {
2328 self.pos += 1;
2329 self.token = SyntaxKind::EqualsToken;
2330 }
2331 CharacterCodes::COMMA => {
2332 self.pos += 1;
2333 self.token = SyntaxKind::CommaToken;
2334 }
2335 CharacterCodes::DOT => {
2336 self.pos += 1;
2337 self.token = SyntaxKind::DotToken;
2338 }
2339 CharacterCodes::BACKTICK => {
2340 self.pos += 1;
2341 while self.pos < self.end
2342 && self.char_code_unchecked(self.pos) != CharacterCodes::BACKTICK
2343 {
2344 self.pos += 1;
2345 }
2346 if self.pos < self.end {
2347 self.pos += 1;
2348 }
2349 self.token_value = self.substring(self.token_start, self.pos);
2350 self.token = SyntaxKind::NoSubstitutionTemplateLiteral;
2351 }
2352 _ => return false,
2353 }
2354 true
2355 }
2356
2357 fn scan_jsdoc_identifier(&mut self) -> SyntaxKind {
2358 self.pos += self.char_len_at(self.pos);
2359 while self.pos < self.end && is_identifier_part(self.char_code_unchecked(self.pos)) {
2360 self.pos += self.char_len_at(self.pos);
2361 }
2362 self.token_value = self.substring(self.token_start, self.pos);
2363 self.token = crate::text_to_keyword(&self.token_value).unwrap_or(SyntaxKind::Identifier);
2364 self.token
2365 }
2366
2367 fn scan_jsdoc_unknown_character(&mut self) {
2368 self.pos += self.char_len_at(self.pos);
2369 self.token = SyntaxKind::Unknown;
2370 }
2371
2372 #[wasm_bindgen(js_name = scanJsDocCommentTextToken)]
2375 pub fn scan_jsdoc_comment_text_token(&mut self, in_backticks: bool) -> SyntaxKind {
2376 self.full_start_pos = self.pos;
2377 self.token_flags = 0;
2378 self.token_start = self.pos;
2379
2380 if self.pos >= self.end {
2381 self.token = SyntaxKind::EndOfFileToken;
2382 return self.token;
2383 }
2384
2385 while self.pos < self.end {
2387 let ch = self.char_code_unchecked(self.pos);
2388
2389 match ch {
2391 CharacterCodes::LINE_FEED | CharacterCodes::CARRIAGE_RETURN => {
2393 break;
2394 }
2395 CharacterCodes::AT | CharacterCodes::OPEN_BRACE | CharacterCodes::CLOSE_BRACE
2397 if !in_backticks =>
2398 {
2399 break;
2400 }
2401 CharacterCodes::BACKTICK => {
2403 if self.pos > self.token_start {
2404 break; }
2406 self.pos += 1;
2408 self.token = SyntaxKind::Unknown; return self.token;
2410 }
2411 _ => {
2412 self.pos += self.char_len_at(self.pos);
2414 }
2415 }
2416 }
2417
2418 if self.pos > self.token_start {
2419 self.token_value = self.substring(self.token_start, self.pos);
2420 self.token = SyntaxKind::Identifier;
2422 } else {
2423 self.token = SyntaxKind::EndOfFileToken;
2424 }
2425 self.token
2426 }
2427
2428 #[wasm_bindgen(js_name = scanShebangTrivia)]
2435 pub fn scan_shebang_trivia(&mut self) -> usize {
2436 if self.pos != 0 {
2438 return 0;
2439 }
2440
2441 if self.pos + 1 < self.end
2443 && self.char_code_unchecked(self.pos) == CharacterCodes::HASH
2444 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::EXCLAMATION
2445 {
2446 let start = self.pos;
2447 self.pos += 2;
2448
2449 while self.pos < self.end {
2451 let ch = self.char_code_unchecked(self.pos);
2452 if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
2453 break;
2454 }
2455 self.pos += self.char_len_at(self.pos);
2457 }
2458
2459 if self.pos < self.end {
2461 let ch = self.char_code_unchecked(self.pos);
2462 if ch == CharacterCodes::CARRIAGE_RETURN {
2463 self.pos += 1;
2464 if self.pos < self.end
2465 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
2466 {
2467 self.pos += 1;
2468 }
2469 } else if ch == CharacterCodes::LINE_FEED {
2470 self.pos += 1;
2471 }
2472 }
2473
2474 return self.pos - start;
2475 }
2476
2477 0
2478 }
2479
2480 #[wasm_bindgen(js_name = reScanInvalidIdentifier)]
2482 pub fn re_scan_invalid_identifier(&mut self) -> SyntaxKind {
2483 if self.token == SyntaxKind::Unknown && !self.token_value.is_empty() {
2486 let chars: Vec<char> = self.token_value.chars().collect();
2488 if !chars.is_empty() && is_identifier_start(chars[0] as u32) {
2489 let mut all_valid = true;
2490 for c in chars.iter().skip(1) {
2491 if !is_identifier_part(*c as u32) {
2492 all_valid = false;
2493 break;
2494 }
2495 }
2496 if all_valid {
2497 self.token =
2498 crate::text_to_keyword(&self.token_value).unwrap_or(SyntaxKind::Identifier);
2499 }
2500 }
2501 }
2502 self.token
2503 }
2504}
2505
2506impl ScannerState {
2511 #[must_use]
2513 pub fn save_state(&self) -> ScannerSnapshot {
2514 ScannerSnapshot {
2515 pos: self.pos,
2516 full_start_pos: self.full_start_pos,
2517 token_start: self.token_start,
2518 token: self.token,
2519 token_value: self.token_value.clone(),
2520 token_flags: self.token_flags,
2521 token_atom: self.token_atom,
2522 token_invalid_separator_pos: self.token_invalid_separator_pos,
2523 token_invalid_separator_is_consecutive: self.token_invalid_separator_is_consecutive,
2524 regex_flag_errors: self.regex_flag_errors.clone(),
2525 }
2526 }
2527
2528 pub fn restore_state(&mut self, snapshot: ScannerSnapshot) {
2530 self.pos = snapshot.pos;
2531 self.full_start_pos = snapshot.full_start_pos;
2532 self.token_start = snapshot.token_start;
2533 self.token = snapshot.token;
2534 self.token_value = snapshot.token_value;
2535 self.token_flags = snapshot.token_flags;
2536 self.token_atom = snapshot.token_atom;
2537 self.token_invalid_separator_pos = snapshot.token_invalid_separator_pos;
2538 self.token_invalid_separator_is_consecutive =
2539 snapshot.token_invalid_separator_is_consecutive;
2540 self.regex_flag_errors = snapshot.regex_flag_errors;
2541 }
2542
2543 #[must_use]
2547 pub const fn get_token_atom(&self) -> Atom {
2548 self.token_atom
2549 }
2550
2551 #[must_use]
2552 pub const fn get_invalid_separator_pos(&self) -> Option<usize> {
2553 self.token_invalid_separator_pos
2554 }
2555
2556 #[must_use]
2557 pub const fn invalid_separator_is_consecutive(&self) -> bool {
2558 self.token_invalid_separator_is_consecutive
2559 }
2560
2561 #[must_use]
2563 pub fn get_regex_flag_errors(&self) -> &[RegexFlagError] {
2564 &self.regex_flag_errors
2565 }
2566
2567 #[must_use]
2569 pub fn get_scanner_diagnostics(&self) -> &[ScannerDiagnostic] {
2570 &self.scanner_diagnostics
2571 }
2572
2573 const MERGE_CONFLICT_MARKER_LENGTH: usize = 7;
2575
2576 fn is_conflict_marker_trivia(&self) -> bool {
2581 let pos = self.pos;
2582 if pos > 0 && !is_line_break(self.char_code_unchecked(pos - 1)) {
2584 return false;
2585 }
2586 if pos + Self::MERGE_CONFLICT_MARKER_LENGTH >= self.end {
2588 return false;
2589 }
2590 let ch = self.char_code_unchecked(pos);
2591 for i in 1..Self::MERGE_CONFLICT_MARKER_LENGTH {
2593 if self.char_code_unchecked(pos + i) != ch {
2594 return false;
2595 }
2596 }
2597 ch == CharacterCodes::EQUALS
2600 || (pos + Self::MERGE_CONFLICT_MARKER_LENGTH < self.end
2601 && self.char_code_unchecked(pos + Self::MERGE_CONFLICT_MARKER_LENGTH)
2602 == CharacterCodes::SPACE)
2603 }
2604
2605 fn scan_conflict_marker_trivia(&mut self) {
2609 self.scanner_diagnostics.push(ScannerDiagnostic {
2611 pos: self.pos,
2612 length: Self::MERGE_CONFLICT_MARKER_LENGTH,
2613 message: "Merge conflict marker encountered.",
2614 code: 1185,
2615 });
2616
2617 let ch = self.char_code_unchecked(self.pos);
2618 if ch == CharacterCodes::LESS_THAN || ch == CharacterCodes::GREATER_THAN {
2619 while self.pos < self.end && !is_line_break(self.char_code_unchecked(self.pos)) {
2621 self.pos += 1;
2622 }
2623 } else {
2624 while self.pos < self.end {
2626 let current_char = self.char_code_unchecked(self.pos);
2627 if (current_char == CharacterCodes::EQUALS
2628 || current_char == CharacterCodes::GREATER_THAN)
2629 && current_char != ch
2630 && self.is_conflict_marker_trivia()
2631 {
2632 break;
2633 }
2634 self.pos += 1;
2635 }
2636 }
2637 }
2638
2639 #[must_use]
2642 pub fn resolve_atom(&self, atom: Atom) -> &str {
2643 self.interner.resolve(atom)
2644 }
2645
2646 #[must_use]
2648 pub const fn interner(&self) -> &Interner {
2649 &self.interner
2650 }
2651
2652 pub const fn interner_mut(&mut self) -> &mut Interner {
2654 &mut self.interner
2655 }
2656
2657 pub fn take_interner(&mut self) -> Interner {
2660 std::mem::take(&mut self.interner)
2661 }
2662
2663 #[inline]
2668 #[must_use]
2669 pub fn get_token_value_ref(&self) -> &str {
2670 if self.token_atom != Atom::NONE {
2673 return self.interner.resolve(self.token_atom);
2674 }
2675
2676 if !self.token_value.is_empty()
2680 || super::token_is_template_literal(self.token)
2681 || self.token == SyntaxKind::StringLiteral
2682 {
2683 return &self.token_value;
2684 }
2685
2686 &self.source[self.token_start..self.pos]
2689 }
2690
2691 #[inline]
2694 #[must_use]
2695 pub fn get_token_text_ref(&self) -> &str {
2696 &self.source[self.token_start..self.pos]
2697 }
2698
2699 #[inline]
2701 #[must_use]
2702 pub fn source_slice(&self, start: usize, end: usize) -> &str {
2703 &self.source[start..end]
2704 }
2705
2706 #[inline]
2708 #[must_use]
2709 pub fn source_text(&self) -> &str {
2710 &self.source
2711 }
2712}
2713
2714impl ScannerState {
2715 #[inline]
2717 #[must_use]
2718 pub fn source_text_arc(&self) -> Arc<str> {
2719 std::sync::Arc::clone(&self.source)
2720 }
2721}
2722
2723fn is_white_space_single_line(ch: u32) -> bool {
2728 ch == CharacterCodes::SPACE
2729 || ch == CharacterCodes::TAB
2730 || ch == CharacterCodes::VERTICAL_TAB
2731 || ch == CharacterCodes::FORM_FEED
2732 || ch == CharacterCodes::NON_BREAKING_SPACE
2733 || ch == CharacterCodes::NEXT_LINE || ch == CharacterCodes::OGHAM
2735 || (CharacterCodes::EN_QUAD..=CharacterCodes::ZERO_WIDTH_SPACE).contains(&ch)
2736 || ch == CharacterCodes::NARROW_NO_BREAK_SPACE
2737 || ch == CharacterCodes::MATHEMATICAL_SPACE
2738 || ch == CharacterCodes::IDEOGRAPHIC_SPACE
2739 || ch == CharacterCodes::BYTE_ORDER_MARK
2740}
2741
2742fn is_digit(ch: u32) -> bool {
2743 (CharacterCodes::_0..=CharacterCodes::_9).contains(&ch)
2744}
2745
2746const fn is_binary_digit(ch: u32) -> bool {
2747 ch == CharacterCodes::_0 || ch == CharacterCodes::_1
2748}
2749
2750fn is_octal_digit(ch: u32) -> bool {
2751 (CharacterCodes::_0..=CharacterCodes::_7).contains(&ch)
2752}
2753
2754fn is_hex_digit(ch: u32) -> bool {
2755 is_digit(ch)
2756 || (CharacterCodes::UPPER_A..=CharacterCodes::UPPER_F).contains(&ch)
2757 || (CharacterCodes::LOWER_A..=CharacterCodes::LOWER_F).contains(&ch)
2758}
2759
2760fn is_identifier_start(ch: u32) -> bool {
2761 if ch < 128 {
2763 return (CharacterCodes::UPPER_A..=CharacterCodes::UPPER_Z).contains(&ch)
2764 || (CharacterCodes::LOWER_A..=CharacterCodes::LOWER_Z).contains(&ch)
2765 || ch == CharacterCodes::UNDERSCORE
2766 || ch == CharacterCodes::DOLLAR;
2767 }
2768
2769 if let Some(c) = char::from_u32(ch) {
2774 return c.is_alphabetic();
2775 }
2776
2777 false
2778}
2779
2780fn is_identifier_part(ch: u32) -> bool {
2781 if ch < 128 {
2783 return is_identifier_start(ch) || is_digit(ch);
2784 }
2785
2786 if let Some(c) = char::from_u32(ch) {
2789 if c.is_alphanumeric() {
2791 return true;
2792 }
2793 }
2794
2795 if ch == 0x200C || ch == 0x200D {
2797 return true;
2798 }
2799
2800 is_unicode_combining_mark(ch)
2808}
2809
2810fn is_unicode_combining_mark(ch: u32) -> bool {
2813 if (0x0300..=0x036F).contains(&ch) {
2815 return true;
2816 }
2817 if (0x0900..=0x0903).contains(&ch)
2819 || (0x093A..=0x094F).contains(&ch)
2820 || (0x0951..=0x0957).contains(&ch)
2821 || (0x0962..=0x0963).contains(&ch)
2822 {
2823 return true;
2824 }
2825 if (0x0981..=0x0983).contains(&ch) || (0x09BC..=0x09CD).contains(&ch) {
2827 return true;
2828 }
2829 if (0x064B..=0x0652).contains(&ch) || (0x0670..=0x0670).contains(&ch) {
2831 return true;
2832 }
2833 if (0x0591..=0x05C7).contains(&ch) {
2835 return true;
2836 }
2837 if (0x0B01..=0x0B03).contains(&ch) || (0x0B3C..=0x0B4D).contains(&ch)
2840 || (0x0B82..=0x0B83).contains(&ch) || (0x0BBE..=0x0BCD).contains(&ch)
2842 || (0x0C00..=0x0C04).contains(&ch) || (0x0C3E..=0x0C4D).contains(&ch)
2844 || (0x0C81..=0x0C83).contains(&ch) || (0x0CBC..=0x0CCD).contains(&ch)
2846 || (0x0D00..=0x0D03).contains(&ch) || (0x0D3B..=0x0D4D).contains(&ch)
2848 {
2849 return true;
2850 }
2851 if (0x0E31..=0x0E3A).contains(&ch) || (0x0E47..=0x0E4E).contains(&ch) {
2853 return true;
2854 }
2855 if (0x1AB0..=0x1AFF).contains(&ch)
2857 || (0x1DC0..=0x1DFF).contains(&ch)
2858 || (0x20D0..=0x20FF).contains(&ch)
2859 {
2860 return true;
2861 }
2862 false
2863}
2864
2865const fn is_line_break(ch: u32) -> bool {
2866 ch == CharacterCodes::LINE_FEED
2867 || ch == CharacterCodes::CARRIAGE_RETURN
2868 || ch == CharacterCodes::LINE_SEPARATOR
2869 || ch == CharacterCodes::PARAGRAPH_SEPARATOR
2870}
2871
2872const fn is_regex_flag(ch: u32) -> bool {
2874 matches!(
2875 ch,
2876 CharacterCodes::LOWER_G | CharacterCodes::LOWER_I | CharacterCodes::LOWER_M | CharacterCodes::LOWER_S | CharacterCodes::LOWER_U | CharacterCodes::LOWER_V | CharacterCodes::LOWER_Y | CharacterCodes::LOWER_D )
2885}