1use crate::SyntaxKind;
9use crate::char_codes::CharacterCodes;
10use std::sync::Arc;
11use tsz_common::interner::{Atom, Interner};
12use wasm_bindgen::prelude::wasm_bindgen;
13
14#[wasm_bindgen]
20#[repr(u32)]
21#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
22pub enum TokenFlags {
23 #[default]
24 None = 0,
25 PrecedingLineBreak = 1,
26 PrecedingJSDocComment = 2,
27 Unterminated = 4,
28 ExtendedUnicodeEscape = 8,
29 Scientific = 16,
30 Octal = 32,
31 HexSpecifier = 64,
32 BinarySpecifier = 128,
33 OctalSpecifier = 256,
34 ContainsSeparator = 512,
35 UnicodeEscape = 1024,
36 ContainsInvalidEscape = 2048,
37 HexEscape = 4096,
38 ContainsLeadingZero = 8192,
39 ContainsInvalidSeparator = 16384,
40 PrecedingJSDocLeadingAsterisks = 32768,
41}
42
43#[derive(Clone, Debug)]
49pub struct ScannerDiagnostic {
50 pub pos: usize,
52 pub length: usize,
54 pub message: &'static str,
56 pub code: u32,
58}
59
60#[derive(Clone, Debug)]
62pub struct RegexFlagError {
63 pub kind: RegexFlagErrorKind,
65 pub pos: usize,
67}
68
69#[derive(Clone, Copy, Debug, PartialEq, Eq)]
71pub enum RegexFlagErrorKind {
72 Duplicate,
74 InvalidFlag,
76 IncompatibleFlags,
78}
79
80#[derive(Clone)]
82pub struct ScannerSnapshot {
83 pub pos: usize,
84 pub full_start_pos: usize,
85 pub token_start: usize,
86 pub token: SyntaxKind,
87 pub token_value: String,
88 pub token_flags: u32,
89 pub token_atom: Atom,
90 pub token_invalid_separator_pos: Option<usize>,
91 pub token_invalid_separator_is_consecutive: bool,
92 pub regex_flag_errors: Vec<RegexFlagError>,
93}
94
95#[wasm_bindgen]
101pub struct ScannerState {
102 source: Arc<str>,
107 pos: usize,
109 end: usize,
111 full_start_pos: usize,
113 token_start: usize,
115 token: SyntaxKind,
117 token_value: String,
119 token_flags: u32,
121 token_invalid_separator_pos: Option<usize>,
123 token_invalid_separator_is_consecutive: bool,
125 regex_flag_errors: Vec<RegexFlagError>,
127 scanner_diagnostics: Vec<ScannerDiagnostic>,
129 skip_trivia: bool,
131 #[wasm_bindgen(skip)]
133 pub interner: Interner,
134 token_atom: Atom,
136}
137
138#[wasm_bindgen]
139#[allow(clippy::missing_const_for_fn)]
140impl ScannerState {
141 #[wasm_bindgen(constructor)]
146 #[must_use]
147 pub fn new(text: String, skip_trivia: bool) -> Self {
148 let end = text.len();
150 let interner = Interner::new();
151 let source: Arc<str> = Arc::from(text.into_boxed_str());
152 Self {
153 source,
154 pos: 0,
155 end,
156 full_start_pos: 0,
157 token_start: 0,
158 token: SyntaxKind::Unknown,
159 token_value: String::new(),
160 token_flags: 0,
161 token_invalid_separator_pos: None,
162 token_invalid_separator_is_consecutive: false,
163 regex_flag_errors: Vec::new(),
164 scanner_diagnostics: Vec::new(),
165 skip_trivia,
166 interner,
167 token_atom: Atom::NONE,
168 }
169 }
170
171 #[wasm_bindgen(js_name = getPos)]
173 #[must_use]
174 pub fn get_pos(&self) -> usize {
175 self.pos
176 }
177
178 pub fn set_pos(&mut self, pos: usize) {
181 self.pos = pos;
182 }
183
184 #[wasm_bindgen(js_name = getTokenFullStart)]
186 #[must_use]
187 pub fn get_token_full_start(&self) -> usize {
188 self.full_start_pos
189 }
190
191 #[wasm_bindgen(js_name = getTokenStart)]
193 #[must_use]
194 pub fn get_token_start(&self) -> usize {
195 self.token_start
196 }
197
198 #[wasm_bindgen(js_name = getTokenEnd)]
200 #[must_use]
201 pub fn get_token_end(&self) -> usize {
202 self.pos
203 }
204
205 #[wasm_bindgen(js_name = getToken)]
207 #[must_use]
208 pub fn get_token(&self) -> SyntaxKind {
209 self.token
210 }
211
212 #[must_use]
215 #[wasm_bindgen(js_name = getTokenValue)]
216 pub fn get_token_value(&self) -> String {
217 self.get_token_value_ref().to_string()
218 }
219
220 #[must_use]
222 #[wasm_bindgen(js_name = getTokenText)]
223 pub fn get_token_text(&self) -> String {
224 self.source[self.token_start..self.pos].to_string()
225 }
226
227 #[must_use]
229 #[wasm_bindgen(js_name = getTokenFlags)]
230 pub fn get_token_flags(&self) -> u32 {
231 self.token_flags
232 }
233
234 #[must_use]
236 #[wasm_bindgen(js_name = hasPrecedingLineBreak)]
237 pub fn has_preceding_line_break(&self) -> bool {
238 (self.token_flags & TokenFlags::PrecedingLineBreak as u32) != 0
239 }
240
241 #[must_use]
243 #[wasm_bindgen(js_name = isUnterminated)]
244 pub fn is_unterminated(&self) -> bool {
245 (self.token_flags & TokenFlags::Unterminated as u32) != 0
246 }
247
248 #[must_use]
250 #[wasm_bindgen(js_name = isIdentifier)]
251 pub fn is_identifier(&self) -> bool {
252 self.token == SyntaxKind::Identifier
253 || (self.token as u16) > (SyntaxKind::WithKeyword as u16)
254 }
255
256 #[must_use]
258 #[wasm_bindgen(js_name = isReservedWord)]
259 pub fn is_reserved_word(&self) -> bool {
260 let t = self.token as u16;
261 t >= SyntaxKind::BreakKeyword as u16 && t <= SyntaxKind::WithKeyword as u16
262 }
263
264 #[wasm_bindgen(js_name = setText)]
267 pub fn set_text(&mut self, text: String, start: Option<usize>, length: Option<usize>) {
268 let start = start.unwrap_or(0);
269 let len = length.unwrap_or(text.len() - start);
270 self.source = Arc::from(text.into_boxed_str());
271 self.pos = start;
272 self.end = start + len;
273 self.full_start_pos = start;
274 self.token_start = start;
275 self.token = SyntaxKind::Unknown;
276 self.token_value = String::new();
277 self.token_flags = 0;
278 }
279
280 #[wasm_bindgen(js_name = resetTokenState)]
282 pub fn reset_token_state(&mut self, new_pos: usize) {
283 self.pos = new_pos;
284 self.full_start_pos = new_pos;
285 self.token_start = new_pos;
286 self.token = SyntaxKind::Unknown;
287 self.token_value = String::new();
288 self.token_flags = 0;
289 }
290
291 #[must_use]
293 #[wasm_bindgen(js_name = getText)]
294 pub fn get_text(&self) -> String {
295 self.source.to_string()
296 }
297
298 #[inline]
305 #[must_use]
306 fn char_code_unchecked(&self, index: usize) -> u32 {
307 let bytes = self.source.as_bytes();
308 if index < bytes.len() {
309 let b = bytes[index];
310 if b < 128 {
311 u32::from(b)
313 } else {
314 if self.source.is_char_boundary(index) {
317 self.source[index..].chars().next().map_or(0, |c| c as u32)
318 } else {
319 let mut start = index;
321 while start > 0 && !self.source.is_char_boundary(start) {
322 start -= 1;
323 }
324 self.source[start..].chars().next().map_or(0, |c| c as u32)
325 }
326 }
327 } else {
328 0
329 }
330 }
331
332 #[inline]
335 fn char_code_at(&self, index: usize) -> Option<u32> {
336 let bytes = self.source.as_bytes();
337 if index < bytes.len() {
338 let b = bytes[index];
339 if b < 128 {
340 Some(u32::from(b))
341 } else if self.source.is_char_boundary(index) {
342 self.source[index..].chars().next().map(|c| c as u32)
343 } else {
344 let mut start = index;
345 while start > 0 && !self.source.is_char_boundary(start) {
346 start -= 1;
347 }
348 self.source[start..].chars().next().map(|c| c as u32)
349 }
350 } else {
351 None
352 }
353 }
354
355 #[inline]
357 fn char_len_at(&self, index: usize) -> usize {
358 let bytes = self.source.as_bytes();
359 if index >= bytes.len() {
360 return 0;
361 }
362 let b = bytes[index];
363 if b < 128 {
364 1 } else if b < 0xE0 {
366 2 } else if b < 0xF0 {
368 3 } else {
370 4 }
372 }
373
374 #[inline]
376 fn substring(&self, start: usize, end: usize) -> String {
377 let len = self.source.len();
378 let clamped_start = start.min(len);
379 let clamped_end = end.min(len);
380 if clamped_start >= clamped_end {
381 return String::new();
382 }
383 self.source[clamped_start..clamped_end].to_string()
384 }
385
386 #[wasm_bindgen]
392 pub fn scan(&mut self) -> SyntaxKind {
393 self.full_start_pos = self.pos;
394 self.token_flags = 0;
395 self.token_invalid_separator_pos = None;
396 self.token_invalid_separator_is_consecutive = false;
397 self.regex_flag_errors.clear();
398 self.token_value.clear();
399 self.token_atom = Atom::NONE; loop {
402 self.token_start = self.pos;
403
404 if self.pos >= self.end {
405 self.token = SyntaxKind::EndOfFileToken;
406 return self.token;
407 }
408
409 let ch = self.char_code_unchecked(self.pos);
410
411 match ch {
412 CharacterCodes::LINE_FEED | CharacterCodes::CARRIAGE_RETURN => {
414 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
415 if self.skip_trivia {
416 self.pos += 1;
417 if ch == CharacterCodes::CARRIAGE_RETURN
418 && self.pos < self.end
419 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
420 {
421 self.pos += 1;
422 }
423 continue;
424 }
425 if ch == CharacterCodes::CARRIAGE_RETURN
426 && self.pos + 1 < self.end
427 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::LINE_FEED
428 {
429 self.pos += 2;
430 } else {
431 self.pos += 1;
432 }
433 self.token = SyntaxKind::NewLineTrivia;
434 return self.token;
435 }
436
437 CharacterCodes::TAB
439 | CharacterCodes::VERTICAL_TAB
440 | CharacterCodes::FORM_FEED
441 | CharacterCodes::SPACE
442 | CharacterCodes::NON_BREAKING_SPACE => {
443 if self.skip_trivia {
444 self.pos += self.char_len_at(self.pos);
446 while self.pos < self.end
447 && is_white_space_single_line(self.char_code_unchecked(self.pos))
448 {
449 self.pos += self.char_len_at(self.pos);
450 }
451 continue;
452 }
453 while self.pos < self.end
454 && is_white_space_single_line(self.char_code_unchecked(self.pos))
455 {
456 self.pos += self.char_len_at(self.pos);
457 }
458 self.token = SyntaxKind::WhitespaceTrivia;
459 return self.token;
460 }
461
462 CharacterCodes::BYTE_ORDER_MARK => {
464 if self.skip_trivia {
465 self.pos += 3; while self.pos < self.end
467 && is_white_space_single_line(self.char_code_unchecked(self.pos))
468 {
469 self.pos += self.char_len_at(self.pos);
470 }
471 continue;
472 }
473 self.pos += 3; while self.pos < self.end
475 && is_white_space_single_line(self.char_code_unchecked(self.pos))
476 {
477 self.pos += self.char_len_at(self.pos);
478 }
479 self.token = SyntaxKind::WhitespaceTrivia;
480 return self.token;
481 }
482
483 CharacterCodes::OPEN_BRACE => {
485 self.pos += 1;
486 self.token = SyntaxKind::OpenBraceToken;
487 return self.token;
488 }
489 CharacterCodes::CLOSE_BRACE => {
490 self.pos += 1;
491 self.token = SyntaxKind::CloseBraceToken;
492 return self.token;
493 }
494 CharacterCodes::OPEN_PAREN => {
495 self.pos += 1;
496 self.token = SyntaxKind::OpenParenToken;
497 return self.token;
498 }
499 CharacterCodes::CLOSE_PAREN => {
500 self.pos += 1;
501 self.token = SyntaxKind::CloseParenToken;
502 return self.token;
503 }
504 CharacterCodes::OPEN_BRACKET => {
505 self.pos += 1;
506 self.token = SyntaxKind::OpenBracketToken;
507 return self.token;
508 }
509 CharacterCodes::CLOSE_BRACKET => {
510 self.pos += 1;
511 self.token = SyntaxKind::CloseBracketToken;
512 return self.token;
513 }
514 CharacterCodes::SEMICOLON => {
515 self.pos += 1;
516 self.token = SyntaxKind::SemicolonToken;
517 return self.token;
518 }
519 CharacterCodes::COMMA => {
520 self.pos += 1;
521 self.token = SyntaxKind::CommaToken;
522 return self.token;
523 }
524 CharacterCodes::TILDE => {
525 self.pos += 1;
526 self.token = SyntaxKind::TildeToken;
527 return self.token;
528 }
529 CharacterCodes::AT => {
530 self.pos += 1;
531 self.token = SyntaxKind::AtToken;
532 return self.token;
533 }
534 CharacterCodes::COLON => {
535 self.pos += 1;
536 self.token = SyntaxKind::ColonToken;
537 return self.token;
538 }
539
540 CharacterCodes::DOT => {
542 if self.pos + 1 < self.end && is_digit(self.char_code_unchecked(self.pos + 1)) {
543 self.scan_number();
544 return self.token;
545 }
546 if self.pos + 2 < self.end
547 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::DOT
548 && self.char_code_unchecked(self.pos + 2) == CharacterCodes::DOT
549 {
550 self.pos += 3;
551 self.token = SyntaxKind::DotDotDotToken;
552 return self.token;
553 }
554 self.pos += 1;
555 self.token = SyntaxKind::DotToken;
556 return self.token;
557 }
558
559 CharacterCodes::EXCLAMATION => {
561 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
562 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
563 self.pos += 3;
564 self.token = SyntaxKind::ExclamationEqualsEqualsToken;
565 return self.token;
566 }
567 self.pos += 2;
568 self.token = SyntaxKind::ExclamationEqualsToken;
569 return self.token;
570 }
571 self.pos += 1;
572 self.token = SyntaxKind::ExclamationToken;
573 return self.token;
574 }
575
576 CharacterCodes::EQUALS => {
578 if self.is_conflict_marker_trivia() {
579 self.scan_conflict_marker_trivia();
580 if self.skip_trivia {
581 continue;
582 }
583 self.token = SyntaxKind::ConflictMarkerTrivia;
584 return self.token;
585 }
586 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
587 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
588 self.pos += 3;
589 self.token = SyntaxKind::EqualsEqualsEqualsToken;
590 return self.token;
591 }
592 self.pos += 2;
593 self.token = SyntaxKind::EqualsEqualsToken;
594 return self.token;
595 }
596 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::GREATER_THAN) {
597 self.pos += 2;
598 self.token = SyntaxKind::EqualsGreaterThanToken;
599 return self.token;
600 }
601 self.pos += 1;
602 self.token = SyntaxKind::EqualsToken;
603 return self.token;
604 }
605
606 CharacterCodes::PLUS => {
608 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::PLUS) {
609 self.pos += 2;
610 self.token = SyntaxKind::PlusPlusToken;
611 return self.token;
612 }
613 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
614 self.pos += 2;
615 self.token = SyntaxKind::PlusEqualsToken;
616 return self.token;
617 }
618 self.pos += 1;
619 self.token = SyntaxKind::PlusToken;
620 return self.token;
621 }
622
623 CharacterCodes::MINUS => {
625 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::MINUS) {
626 self.pos += 2;
627 self.token = SyntaxKind::MinusMinusToken;
628 return self.token;
629 }
630 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
631 self.pos += 2;
632 self.token = SyntaxKind::MinusEqualsToken;
633 return self.token;
634 }
635 self.pos += 1;
636 self.token = SyntaxKind::MinusToken;
637 return self.token;
638 }
639
640 CharacterCodes::ASTERISK => {
642 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::ASTERISK) {
643 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
644 self.pos += 3;
645 self.token = SyntaxKind::AsteriskAsteriskEqualsToken;
646 return self.token;
647 }
648 self.pos += 2;
649 self.token = SyntaxKind::AsteriskAsteriskToken;
650 return self.token;
651 }
652 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
653 self.pos += 2;
654 self.token = SyntaxKind::AsteriskEqualsToken;
655 return self.token;
656 }
657 self.pos += 1;
658 self.token = SyntaxKind::AsteriskToken;
659 return self.token;
660 }
661
662 CharacterCodes::PERCENT => {
664 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
665 self.pos += 2;
666 self.token = SyntaxKind::PercentEqualsToken;
667 return self.token;
668 }
669 self.pos += 1;
670 self.token = SyntaxKind::PercentToken;
671 return self.token;
672 }
673
674 CharacterCodes::AMPERSAND => {
676 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::AMPERSAND) {
677 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
678 self.pos += 3;
679 self.token = SyntaxKind::AmpersandAmpersandEqualsToken;
680 return self.token;
681 }
682 self.pos += 2;
683 self.token = SyntaxKind::AmpersandAmpersandToken;
684 return self.token;
685 }
686 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
687 self.pos += 2;
688 self.token = SyntaxKind::AmpersandEqualsToken;
689 return self.token;
690 }
691 self.pos += 1;
692 self.token = SyntaxKind::AmpersandToken;
693 return self.token;
694 }
695
696 CharacterCodes::BAR => {
698 if self.is_conflict_marker_trivia() {
699 self.scan_conflict_marker_trivia();
700 if self.skip_trivia {
701 continue;
702 }
703 self.token = SyntaxKind::ConflictMarkerTrivia;
704 return self.token;
705 }
706 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::BAR) {
707 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
708 self.pos += 3;
709 self.token = SyntaxKind::BarBarEqualsToken;
710 return self.token;
711 }
712 self.pos += 2;
713 self.token = SyntaxKind::BarBarToken;
714 return self.token;
715 }
716 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
717 self.pos += 2;
718 self.token = SyntaxKind::BarEqualsToken;
719 return self.token;
720 }
721 self.pos += 1;
722 self.token = SyntaxKind::BarToken;
723 return self.token;
724 }
725
726 CharacterCodes::CARET => {
728 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
729 self.pos += 2;
730 self.token = SyntaxKind::CaretEqualsToken;
731 return self.token;
732 }
733 self.pos += 1;
734 self.token = SyntaxKind::CaretToken;
735 return self.token;
736 }
737
738 CharacterCodes::QUESTION => {
740 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::DOT)
741 && !is_digit(self.char_code_at(self.pos + 2).unwrap_or(0))
742 {
743 self.pos += 2;
744 self.token = SyntaxKind::QuestionDotToken;
745 return self.token;
746 }
747 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::QUESTION) {
748 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
749 self.pos += 3;
750 self.token = SyntaxKind::QuestionQuestionEqualsToken;
751 return self.token;
752 }
753 self.pos += 2;
754 self.token = SyntaxKind::QuestionQuestionToken;
755 return self.token;
756 }
757 self.pos += 1;
758 self.token = SyntaxKind::QuestionToken;
759 return self.token;
760 }
761
762 CharacterCodes::LESS_THAN => {
766 if self.is_conflict_marker_trivia() {
767 self.scan_conflict_marker_trivia();
768 if self.skip_trivia {
769 continue;
770 }
771 self.token = SyntaxKind::ConflictMarkerTrivia;
772 return self.token;
773 }
774 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::LESS_THAN) {
775 if self.char_code_at(self.pos + 2) == Some(CharacterCodes::EQUALS) {
776 self.pos += 3;
777 self.token = SyntaxKind::LessThanLessThanEqualsToken;
778 return self.token;
779 }
780 self.pos += 2;
781 self.token = SyntaxKind::LessThanLessThanToken;
782 return self.token;
783 }
784 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
785 self.pos += 2;
786 self.token = SyntaxKind::LessThanEqualsToken;
787 return self.token;
788 }
789 self.pos += 1;
791 self.token = SyntaxKind::LessThanToken;
792 return self.token;
793 }
794
795 CharacterCodes::GREATER_THAN => {
798 if self.is_conflict_marker_trivia() {
799 self.scan_conflict_marker_trivia();
800 if self.skip_trivia {
801 continue;
802 }
803 self.token = SyntaxKind::ConflictMarkerTrivia;
804 return self.token;
805 }
806 self.pos += 1;
807 self.token = SyntaxKind::GreaterThanToken;
808 return self.token;
809 }
810
811 CharacterCodes::SLASH => {
813 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::SLASH) {
815 self.pos += 2;
816 while self.pos < self.end {
817 let c = self.char_code_unchecked(self.pos);
818 if c == CharacterCodes::LINE_FEED
819 || c == CharacterCodes::CARRIAGE_RETURN
820 {
821 break;
822 }
823 self.pos += self.char_len_at(self.pos); }
825 if self.skip_trivia {
826 continue;
827 }
828 self.token = SyntaxKind::SingleLineCommentTrivia;
829 return self.token;
830 }
831 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::ASTERISK) {
832 self.pos += 2;
833 let mut comment_closed = false;
834 while self.pos < self.end {
835 let c = self.char_code_unchecked(self.pos);
836 if c == CharacterCodes::ASTERISK
837 && self.char_code_at(self.pos + 1) == Some(CharacterCodes::SLASH)
838 {
839 self.pos += 2;
840 comment_closed = true;
841 break;
842 }
843 if c == CharacterCodes::LINE_FEED
844 || c == CharacterCodes::CARRIAGE_RETURN
845 {
846 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
847 }
848 self.pos += self.char_len_at(self.pos); }
850 if !comment_closed {
851 self.token_flags |= TokenFlags::Unterminated as u32;
852 self.scanner_diagnostics.push(ScannerDiagnostic {
854 pos: self.pos,
855 length: 0,
856 message: "'*/' expected.",
857 code: 1010,
858 });
859 }
860 if self.skip_trivia {
861 continue;
862 }
863 self.token = SyntaxKind::MultiLineCommentTrivia;
864 return self.token;
865 }
866 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
867 self.pos += 2;
868 self.token = SyntaxKind::SlashEqualsToken;
869 return self.token;
870 }
871 self.pos += 1;
872 self.token = SyntaxKind::SlashToken;
873 return self.token;
874 }
875
876 CharacterCodes::DOUBLE_QUOTE | CharacterCodes::SINGLE_QUOTE => {
878 self.scan_string(ch);
879 return self.token;
880 }
881
882 CharacterCodes::BACKTICK => {
884 self.scan_template_literal();
885 return self.token;
886 }
887
888 CharacterCodes::HASH => {
890 self.pos += 1;
893 if self.pos < self.end
894 && is_identifier_start(self.char_code_unchecked(self.pos))
895 {
896 self.pos += self.char_len_at(self.pos); while self.pos < self.end
898 && is_identifier_part(self.char_code_unchecked(self.pos))
899 {
900 self.pos += self.char_len_at(self.pos); }
902 self.token_value = self.substring(self.token_start, self.pos);
903 self.token = SyntaxKind::PrivateIdentifier;
904 } else {
905 self.token = SyntaxKind::HashToken;
906 }
907 return self.token;
908 }
909
910 CharacterCodes::_0..=CharacterCodes::_9 => {
912 self.scan_number();
913 return self.token;
914 }
915
916 CharacterCodes::BACKSLASH => {
918 let escaped_ch = self.peek_unicode_escape();
921 if let Some(code_point) = escaped_ch
922 && is_identifier_start(code_point)
923 {
924 self.scan_identifier_with_escapes();
925 return self.token;
926 }
927 self.pos += 1;
929 self.token = SyntaxKind::Unknown;
930 return self.token;
931 }
932
933 _ => {
935 if ch == CharacterCodes::LINE_SEPARATOR
937 || ch == CharacterCodes::PARAGRAPH_SEPARATOR
938 {
939 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
940 if self.skip_trivia {
941 self.pos += self.char_len_at(self.pos);
942 continue;
943 }
944 self.pos += self.char_len_at(self.pos);
945 self.token = SyntaxKind::NewLineTrivia;
946 return self.token;
947 }
948 if ch > 127 && is_white_space_single_line(ch) {
950 if self.skip_trivia {
951 self.pos += self.char_len_at(self.pos);
952 while self.pos < self.end
953 && is_white_space_single_line(self.char_code_unchecked(self.pos))
954 {
955 self.pos += self.char_len_at(self.pos);
956 }
957 continue;
958 }
959 self.pos += self.char_len_at(self.pos);
960 while self.pos < self.end
961 && is_white_space_single_line(self.char_code_unchecked(self.pos))
962 {
963 self.pos += self.char_len_at(self.pos);
964 }
965 self.token = SyntaxKind::WhitespaceTrivia;
966 return self.token;
967 }
968 if is_identifier_start(ch) {
969 self.scan_identifier();
970 return self.token;
971 }
972 self.pos += self.char_len_at(self.pos);
974 self.token = SyntaxKind::Unknown;
975 return self.token;
976 }
977 }
978 }
979 }
980
981 fn scan_string(&mut self, quote: u32) {
983 self.pos += 1; let mut result = String::new();
985
986 while self.pos < self.end {
987 let ch = self.char_code_unchecked(self.pos);
988 if ch == quote {
989 self.pos += 1; self.token_value = result;
991 self.token = SyntaxKind::StringLiteral;
992 return;
993 }
994 if ch == CharacterCodes::BACKSLASH {
995 self.pos += 1;
996 if self.pos < self.end {
997 self.scan_string_escape(quote, &mut result);
998 }
999 } else if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
1000 self.token_flags |= TokenFlags::Unterminated as u32;
1002 self.token_value = result;
1003 self.token = SyntaxKind::StringLiteral;
1004 return;
1005 } else {
1006 if let Some(c) = char::from_u32(ch) {
1007 result.push(c);
1008 }
1009 self.pos += self.char_len_at(self.pos); }
1011 }
1012
1013 self.token_flags |= TokenFlags::Unterminated as u32;
1015 self.token_value = result;
1016 self.token = SyntaxKind::StringLiteral;
1017 }
1018
1019 fn scan_string_escape(&mut self, quote: u32, result: &mut String) {
1020 let escaped = self.char_code_unchecked(self.pos);
1021 let escaped_len = self.char_len_at(self.pos);
1023 self.pos += escaped_len;
1024
1025 match escaped {
1026 CharacterCodes::_0 => self.scan_string_escape_zero(result),
1027 CharacterCodes::_1
1028 | CharacterCodes::_2
1029 | CharacterCodes::_3
1030 | CharacterCodes::_4
1031 | CharacterCodes::_5
1032 | CharacterCodes::_6
1033 | CharacterCodes::_7 => self.scan_string_escape_octal(escaped, result),
1034 CharacterCodes::LOWER_N => result.push('\n'),
1035 CharacterCodes::LOWER_R => result.push('\r'),
1036 CharacterCodes::LOWER_T => result.push('\t'),
1037 CharacterCodes::LOWER_V => result.push('\x0B'),
1038 CharacterCodes::LOWER_B => result.push('\x08'),
1039 CharacterCodes::LOWER_F => result.push('\x0C'),
1040 CharacterCodes::BACKSLASH => result.push('\\'),
1041 c if c == quote => result.push(char::from_u32(quote).unwrap_or('\0')),
1042 CharacterCodes::LOWER_X => self.scan_string_escape_hex(result),
1043 CharacterCodes::LOWER_U => self.scan_string_escape_unicode(result),
1044 CharacterCodes::LINE_FEED
1045 | CharacterCodes::CARRIAGE_RETURN
1046 | CharacterCodes::LINE_SEPARATOR
1047 | CharacterCodes::PARAGRAPH_SEPARATOR => {
1048 if escaped == CharacterCodes::CARRIAGE_RETURN
1050 && self.pos < self.end
1051 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
1052 {
1053 self.pos += 1;
1054 }
1055 }
1056 _ => {
1057 if let Some(c) = char::from_u32(escaped) {
1058 result.push(c);
1059 }
1060 }
1061 }
1062 }
1063
1064 fn scan_string_escape_zero(&mut self, result: &mut String) {
1065 if self.pos < self.end && is_digit(self.char_code_unchecked(self.pos)) {
1066 let mut value = 0u32;
1068 let octal_start = self.pos - 1; self.pos = octal_start;
1070 while self.pos < self.end
1071 && self.pos < octal_start + 3
1072 && is_octal_digit(self.char_code_unchecked(self.pos))
1073 {
1074 value = value * 8 + (self.char_code_unchecked(self.pos) - CharacterCodes::_0);
1075 self.pos += 1;
1076 }
1077 if let Some(c) = char::from_u32(value) {
1078 result.push(c);
1079 }
1080 } else {
1081 result.push('\0');
1082 }
1083 }
1084
1085 fn scan_string_escape_octal(&mut self, escaped: u32, result: &mut String) {
1086 let mut value = escaped - CharacterCodes::_0;
1088 let mut count = 1;
1089 while count < 3 && self.pos < self.end && is_octal_digit(self.char_code_unchecked(self.pos))
1090 {
1091 value = value * 8 + (self.char_code_unchecked(self.pos) - CharacterCodes::_0);
1092 self.pos += 1;
1093 count += 1;
1094 }
1095 if let Some(c) = char::from_u32(value) {
1096 result.push(c);
1097 }
1098 }
1099
1100 fn scan_string_escape_hex(&mut self, result: &mut String) {
1101 if self.pos + 2 <= self.end {
1102 let hex = self.substring(self.pos, self.pos + 2);
1103 if let Ok(code) = u32::from_str_radix(&hex, 16) {
1104 self.pos += 2;
1105 if let Some(c) = char::from_u32(code) {
1106 result.push(c);
1107 }
1108 return;
1109 }
1110 }
1111 result.push('\\');
1112 result.push('x');
1113 }
1114
1115 fn scan_string_escape_unicode(&mut self, result: &mut String) {
1116 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::OPEN_BRACE {
1117 self.pos += 1;
1118 let hex_start = self.pos;
1119 while self.pos < self.end && is_hex_digit(self.char_code_unchecked(self.pos)) {
1120 self.pos += 1;
1121 }
1122 if self.pos < self.end
1123 && self.char_code_unchecked(self.pos) == CharacterCodes::CLOSE_BRACE
1124 {
1125 let hex = self.substring(hex_start, self.pos);
1126 self.pos += 1;
1127 if let Ok(code) = u32::from_str_radix(&hex, 16)
1128 && let Some(c) = char::from_u32(code)
1129 {
1130 result.push(c);
1131 return;
1132 }
1133 }
1134 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1136 result.push('\\');
1137 result.push('u');
1138 return;
1139 }
1140 if self.pos + 4 <= self.end {
1141 let hex = self.substring(self.pos, self.pos + 4);
1142 if let Ok(code) = u32::from_str_radix(&hex, 16)
1143 && let Some(c) = char::from_u32(code)
1144 {
1145 self.pos += 4;
1146 result.push(c);
1147 return;
1148 }
1149 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1151 result.push('\\');
1152 result.push('u');
1153 return;
1154 }
1155 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1157 result.push('\\');
1158 result.push('u');
1159 }
1160
1161 fn scan_template_literal(&mut self) {
1163 self.pos += 1; let mut result = String::new();
1165
1166 while self.pos < self.end {
1167 let ch = self.char_code_unchecked(self.pos);
1168 if ch == CharacterCodes::BACKTICK {
1169 self.pos += 1;
1170 self.token_value = result;
1171 self.token = SyntaxKind::NoSubstitutionTemplateLiteral;
1172 return;
1173 }
1174 if ch == CharacterCodes::DOLLAR
1175 && self.char_code_at(self.pos + 1) == Some(CharacterCodes::OPEN_BRACE)
1176 {
1177 self.pos += 2;
1178 self.token_value = result;
1179 self.token = SyntaxKind::TemplateHead;
1180 return;
1181 }
1182 if ch == CharacterCodes::BACKSLASH {
1183 self.pos += 1;
1185 let escaped = self.scan_template_escape_sequence();
1186 result.push_str(&escaped);
1187 } else {
1188 if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
1189 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
1190 }
1191 if let Some(c) = char::from_u32(ch) {
1192 result.push(c);
1193 }
1194 self.pos += self.char_len_at(self.pos); }
1196 }
1197
1198 self.token_flags |= TokenFlags::Unterminated as u32;
1199 self.token_value = result;
1200 self.token = SyntaxKind::NoSubstitutionTemplateLiteral;
1201 }
1202
1203 fn scan_number(&mut self) {
1205 let start = self.pos;
1206
1207 if self.char_code_unchecked(self.pos) == CharacterCodes::_0 {
1209 let next = self.char_code_at(self.pos + 1).unwrap_or(0);
1210 if self.scan_prefixed_number(start, next) {
1211 return;
1212 }
1213
1214 if is_digit(next) && self.scan_legacy_octal_number(start) {
1217 return;
1218 }
1219 }
1220
1221 self.scan_decimal_number(start);
1223 }
1224
1225 fn scan_prefixed_number(&mut self, start: usize, next: u32) -> bool {
1226 match next {
1227 CharacterCodes::LOWER_X | CharacterCodes::UPPER_X => {
1228 self.scan_integer_base_literal(start, is_hex_digit, TokenFlags::HexSpecifier);
1229 true
1230 }
1231 CharacterCodes::LOWER_B | CharacterCodes::UPPER_B => {
1232 self.scan_integer_base_literal(start, is_binary_digit, TokenFlags::BinarySpecifier);
1233 true
1234 }
1235 CharacterCodes::LOWER_O | CharacterCodes::UPPER_O => {
1236 self.scan_integer_base_literal(start, is_octal_digit, TokenFlags::OctalSpecifier);
1237 true
1238 }
1239 _ => false,
1240 }
1241 }
1242
1243 fn scan_integer_base_literal(
1244 &mut self,
1245 start: usize,
1246 is_valid_digit: fn(u32) -> bool,
1247 specifier_flag: TokenFlags,
1248 ) {
1249 self.pos += 2;
1250 self.token_flags |= specifier_flag as u32;
1251 self.scan_digits_with_separators(is_valid_digit);
1252
1253 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::LOWER_N {
1254 self.pos += 1;
1255 self.token_value = self.substring(start, self.pos);
1256 self.token = SyntaxKind::BigIntLiteral;
1257 return;
1258 }
1259
1260 self.set_numeric_token_value(start);
1261 self.token = SyntaxKind::NumericLiteral;
1262 }
1263
1264 fn scan_legacy_octal_number(&mut self, start: usize) -> bool {
1265 let mut all_octal = true;
1266 let digit_start = self.pos + 1; let mut scan_pos = digit_start;
1268 while scan_pos < self.end && is_digit(self.char_code_unchecked(scan_pos)) {
1269 if !is_octal_digit(self.char_code_unchecked(scan_pos)) {
1270 all_octal = false;
1271 }
1272 scan_pos += 1;
1273 }
1274 if all_octal && scan_pos > digit_start {
1275 self.pos = scan_pos;
1276 self.token_flags |= TokenFlags::Octal as u32;
1277 self.set_numeric_token_value(start);
1278 self.token = SyntaxKind::NumericLiteral;
1279 true
1280 } else {
1281 self.token_flags |= TokenFlags::ContainsLeadingZero as u32;
1282 false
1283 }
1284 }
1285
1286 fn set_numeric_token_value(&mut self, start: usize) {
1287 if (self.token_flags & TokenFlags::ContainsSeparator as u32) != 0 {
1288 self.token_value = self.substring(start, self.pos);
1289 } else {
1290 self.token_value.clear();
1291 }
1292 }
1293
1294 fn scan_decimal_number(&mut self, start: usize) {
1295 self.scan_digits_with_separators(is_digit);
1296
1297 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::DOT {
1299 self.pos += 1;
1300 self.scan_digits_with_separators(is_digit);
1301 }
1302
1303 if self.pos < self.end {
1305 let ch = self.char_code_unchecked(self.pos);
1306 if ch == CharacterCodes::LOWER_E || ch == CharacterCodes::UPPER_E {
1307 self.pos += 1;
1308 self.token_flags |= TokenFlags::Scientific as u32;
1309 if self.pos < self.end {
1310 let sign = self.char_code_unchecked(self.pos);
1311 if sign == CharacterCodes::PLUS || sign == CharacterCodes::MINUS {
1312 self.pos += 1;
1313 }
1314 }
1315 self.scan_digits_with_separators(is_digit);
1316 }
1317 }
1318
1319 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::LOWER_N {
1321 self.pos += 1;
1322 self.token_value = self.substring(start, self.pos);
1323 self.token = SyntaxKind::BigIntLiteral;
1324 return;
1325 }
1326
1327 self.set_numeric_token_value(start);
1330 self.token = SyntaxKind::NumericLiteral;
1331 }
1332
1333 fn scan_digits_with_separators(&mut self, is_valid_digit: fn(u32) -> bool) {
1334 let mut saw_digit = false;
1335 let mut prev_separator = false;
1336
1337 while self.pos < self.end {
1338 let ch = self.char_code_unchecked(self.pos);
1339 if ch == CharacterCodes::UNDERSCORE {
1340 self.token_flags |= TokenFlags::ContainsSeparator as u32;
1341 if !saw_digit || prev_separator {
1342 self.token_flags |= TokenFlags::ContainsInvalidSeparator as u32;
1343 if self.token_invalid_separator_pos.is_none() {
1344 self.token_invalid_separator_pos = Some(self.pos);
1345 self.token_invalid_separator_is_consecutive = prev_separator;
1346 }
1347 }
1348 prev_separator = true;
1349 self.pos += 1;
1350 continue;
1351 }
1352 if is_valid_digit(ch) {
1353 saw_digit = true;
1354 prev_separator = false;
1355 self.pos += 1;
1356 continue;
1357 }
1358 break;
1359 }
1360
1361 if prev_separator {
1362 self.token_flags |= TokenFlags::ContainsInvalidSeparator as u32;
1363 if self.token_invalid_separator_pos.is_none() {
1364 self.token_invalid_separator_pos = Some(self.pos.saturating_sub(1));
1365 self.token_invalid_separator_is_consecutive = false;
1366 }
1367 }
1368 }
1369
1370 fn scan_identifier(&mut self) {
1374 let start = self.pos;
1375 self.pos += self.char_len_at(self.pos);
1377
1378 while self.pos < self.end {
1379 let ch = self.char_code_unchecked(self.pos);
1380 if ch == CharacterCodes::BACKSLASH {
1381 if let Some(code_point) = self.peek_unicode_escape()
1383 && is_identifier_part(code_point)
1384 {
1385 self.continue_identifier_with_escapes(start);
1387 return;
1388 }
1389 break;
1391 }
1392 if !is_identifier_part(ch) {
1393 break;
1394 }
1395 self.pos += self.char_len_at(self.pos); }
1397
1398 let text_slice = &self.source[start..self.pos];
1400
1401 self.token = crate::text_to_keyword(text_slice).unwrap_or(SyntaxKind::Identifier);
1403
1404 self.token_atom = self.interner.intern(text_slice);
1406
1407 self.token_value.clear();
1410 }
1411
1412 fn continue_identifier_with_escapes(&mut self, start: usize) {
1416 let mut result = String::from(&self.source[start..self.pos]);
1418
1419 while self.pos < self.end {
1421 let ch = self.char_code_unchecked(self.pos);
1422 if ch == CharacterCodes::BACKSLASH {
1423 if let Some(code_point) = self.peek_unicode_escape()
1425 && is_identifier_part(code_point)
1426 {
1427 if let Some(c) = char::from_u32(self.scan_unicode_escape_value().unwrap_or(0)) {
1429 result.push(c);
1430 }
1431 continue;
1432 }
1433 break;
1435 }
1436 if !is_identifier_part(ch) {
1437 break;
1438 }
1439 if let Some(c) = char::from_u32(ch) {
1440 result.push(c);
1441 }
1442 self.pos += self.char_len_at(self.pos);
1443 }
1444
1445 self.token = crate::text_to_keyword(&result).unwrap_or(SyntaxKind::Identifier);
1446 self.token_atom = self.interner.intern(&result);
1447 self.token_value.clear();
1448 self.token_flags |= TokenFlags::UnicodeEscape as u32;
1449 }
1450
1451 fn peek_unicode_escape(&self) -> Option<u32> {
1454 if self.pos + 1 >= self.end {
1456 return None;
1457 }
1458 let bytes = self.source.as_bytes();
1459 if bytes.get(self.pos + 1).copied() != Some(b'u') {
1460 return None;
1461 }
1462 if bytes.get(self.pos + 2).copied() == Some(b'{') {
1464 let start = self.pos + 3;
1465 let mut end = start;
1466 while end < self.end && bytes.get(end).is_some_and(u8::is_ascii_hexdigit) {
1467 end += 1;
1468 }
1469 if end == start || bytes.get(end).copied() != Some(b'}') {
1470 return None;
1471 }
1472 let hex = &self.source[start..end];
1473 u32::from_str_radix(hex, 16)
1474 .ok()
1475 .filter(|&cp| cp <= 0x0010_FFFF)
1476 } else {
1477 if self.pos + 5 >= self.end {
1479 return None;
1480 }
1481 let hex = &self.source[self.pos + 2..self.pos + 6];
1482 if hex.len() == 4 && hex.bytes().all(|b| b.is_ascii_hexdigit()) {
1483 u32::from_str_radix(hex, 16).ok()
1484 } else {
1485 None
1486 }
1487 }
1488 }
1489
1490 fn scan_identifier_with_escapes(&mut self) {
1492 let mut result = String::new();
1493
1494 if let Some(ch) = self.scan_unicode_escape_value()
1496 && let Some(c) = char::from_u32(ch)
1497 {
1498 result.push(c);
1499 }
1500
1501 while self.pos < self.end {
1503 let ch = self.char_code_unchecked(self.pos);
1504 if ch == CharacterCodes::BACKSLASH {
1505 if let Some(code_point) = self.peek_unicode_escape()
1507 && is_identifier_part(code_point)
1508 {
1509 if let Some(c) = char::from_u32(self.scan_unicode_escape_value().unwrap_or(0)) {
1510 result.push(c);
1511 }
1512 continue;
1513 }
1514 break;
1515 }
1516 if !is_identifier_part(ch) {
1517 break;
1518 }
1519 if let Some(c) = char::from_u32(ch) {
1520 result.push(c);
1521 }
1522 self.pos += self.char_len_at(self.pos);
1523 }
1524
1525 self.token = crate::text_to_keyword(&result).unwrap_or(SyntaxKind::Identifier);
1526 self.token_atom = self.interner.intern(&result);
1527 self.token_value.clear();
1528 self.token_flags |= TokenFlags::UnicodeEscape as u32;
1529 }
1530
1531 fn scan_unicode_escape_value(&mut self) -> Option<u32> {
1534 self.pos += 1;
1536 if self.pos >= self.end || self.source.as_bytes()[self.pos] != b'u' {
1537 return None;
1538 }
1539 self.pos += 1; if self.pos < self.end && self.source.as_bytes()[self.pos] == b'{' {
1542 self.pos += 1;
1544 let start = self.pos;
1545 while self.pos < self.end
1546 && self
1547 .source
1548 .as_bytes()
1549 .get(self.pos)
1550 .is_some_and(u8::is_ascii_hexdigit)
1551 {
1552 self.pos += 1;
1553 }
1554 let result = u32::from_str_radix(&self.source[start..self.pos], 16).ok();
1555 if self.pos < self.end && self.source.as_bytes()[self.pos] == b'}' {
1556 self.pos += 1;
1557 }
1558 result
1559 } else {
1560 if self.pos + 4 > self.end {
1562 return None;
1563 }
1564 let hex = &self.source[self.pos..self.pos + 4];
1565 if hex.bytes().all(|b| b.is_ascii_hexdigit()) {
1566 self.pos += 4;
1567 u32::from_str_radix(hex, 16).ok()
1568 } else {
1569 None
1570 }
1571 }
1572 }
1573
1574 #[wasm_bindgen(js_name = reScanGreaterToken)]
1581 pub fn re_scan_greater_token(&mut self) -> SyntaxKind {
1582 if self.token == SyntaxKind::GreaterThanToken {
1583 let next_char = self.char_code_unchecked(self.pos);
1584 if next_char == CharacterCodes::GREATER_THAN {
1585 let next_next = self.char_code_unchecked(self.pos + 1);
1586 if next_next == CharacterCodes::GREATER_THAN {
1587 let next_next_next = self.char_code_unchecked(self.pos + 2);
1589 if next_next_next == CharacterCodes::EQUALS {
1590 self.pos += 3;
1592 self.token = SyntaxKind::GreaterThanGreaterThanGreaterThanEqualsToken;
1593 return self.token;
1594 }
1595 self.pos += 2;
1596 self.token = SyntaxKind::GreaterThanGreaterThanGreaterThanToken;
1597 return self.token;
1598 }
1599 if next_next == CharacterCodes::EQUALS {
1600 self.pos += 2;
1602 self.token = SyntaxKind::GreaterThanGreaterThanEqualsToken;
1603 return self.token;
1604 }
1605 self.pos += 1;
1607 self.token = SyntaxKind::GreaterThanGreaterThanToken;
1608 return self.token;
1609 }
1610 if next_char == CharacterCodes::EQUALS {
1611 self.pos += 1;
1613 self.token = SyntaxKind::GreaterThanEqualsToken;
1614 return self.token;
1615 }
1616 }
1617 self.token
1618 }
1619
1620 #[wasm_bindgen(js_name = reScanSlashToken)]
1623 pub fn re_scan_slash_token(&mut self) -> SyntaxKind {
1624 if self.token == SyntaxKind::SlashToken || self.token == SyntaxKind::SlashEqualsToken {
1625 let start_of_regex_body = self.token_start + 1;
1627 self.pos = start_of_regex_body;
1628 let mut in_escape = false;
1629 let mut in_character_class = false;
1630
1631 while self.pos < self.end {
1633 let ch = self.char_code_unchecked(self.pos);
1634
1635 if is_line_break(ch) {
1637 self.token_flags |= TokenFlags::Unterminated as u32;
1638 break;
1639 }
1640
1641 if in_escape {
1642 in_escape = false;
1644 } else if ch == CharacterCodes::SLASH && !in_character_class {
1645 break;
1647 } else if ch == CharacterCodes::OPEN_BRACKET {
1648 in_character_class = true;
1649 } else if ch == CharacterCodes::BACKSLASH {
1650 in_escape = true;
1651 } else if ch == CharacterCodes::CLOSE_BRACKET {
1652 in_character_class = false;
1653 }
1654 self.pos += self.char_len_at(self.pos);
1656 }
1657
1658 if self.pos >= self.end && (self.token_flags & TokenFlags::Unterminated as u32) == 0 {
1660 self.token_flags |= TokenFlags::Unterminated as u32;
1661 }
1662
1663 if (self.token_flags & TokenFlags::Unterminated as u32) == 0 {
1664 self.pos += 1;
1666
1667 let mut seen_flags: u8 = 0;
1670 let mut has_u = false;
1671 let mut has_v = false;
1672
1673 while self.pos < self.end {
1674 let ch = self.char_code_unchecked(self.pos);
1675 if !is_regex_flag(ch) && !is_identifier_part(ch) {
1676 break;
1677 }
1678
1679 let flag_bit = match ch {
1681 CharacterCodes::LOWER_G => Some(0),
1682 CharacterCodes::LOWER_I => Some(1),
1683 CharacterCodes::LOWER_M => Some(2),
1684 CharacterCodes::LOWER_S => Some(3),
1685 CharacterCodes::LOWER_U => {
1686 has_u = true;
1687 Some(4)
1688 }
1689 CharacterCodes::LOWER_V => {
1690 has_v = true;
1691 Some(5)
1692 }
1693 CharacterCodes::LOWER_Y => Some(6),
1694 CharacterCodes::LOWER_D => Some(7),
1695 _ => None,
1696 };
1697
1698 if let Some(bit) = flag_bit {
1699 let mask = 1 << bit;
1700 if seen_flags & mask != 0 {
1701 self.regex_flag_errors.push(RegexFlagError {
1703 kind: RegexFlagErrorKind::Duplicate,
1704 pos: self.pos,
1705 });
1706 }
1707 seen_flags |= mask;
1708 } else if is_identifier_part(ch) {
1709 self.regex_flag_errors.push(RegexFlagError {
1711 kind: RegexFlagErrorKind::InvalidFlag,
1712 pos: self.pos,
1713 });
1714 }
1715
1716 self.pos += self.char_len_at(self.pos);
1718 }
1719
1720 if has_u && has_v {
1722 self.regex_flag_errors.push(RegexFlagError {
1724 kind: RegexFlagErrorKind::IncompatibleFlags,
1725 pos: self.pos,
1726 });
1727 }
1728 }
1729
1730 self.token_value = self.substring(self.token_start, self.pos);
1731 self.token = SyntaxKind::RegularExpressionLiteral;
1732 }
1733 self.token
1734 }
1735
1736 #[wasm_bindgen(js_name = reScanAsteriskEqualsToken)]
1739 pub fn re_scan_asterisk_equals_token(&mut self) -> SyntaxKind {
1740 if self.token == SyntaxKind::AsteriskEqualsToken {
1741 self.pos = self.token_start + 1;
1742 self.token = SyntaxKind::EqualsToken;
1743 }
1744 self.token
1745 }
1746
1747 #[wasm_bindgen(js_name = reScanTemplateToken)]
1755 pub fn re_scan_template_token(&mut self, _is_tagged_template: bool) -> SyntaxKind {
1756 if self.token_start >= self.end {
1759 self.token = SyntaxKind::EndOfFileToken;
1760 return self.token;
1761 }
1762 self.pos = self.token_start;
1763 self.token = self.scan_template_and_set_token_value(false);
1764 self.token
1765 }
1766
1767 #[wasm_bindgen(js_name = reScanTemplateHeadOrNoSubstitutionTemplate)]
1770 pub fn re_scan_template_head_or_no_substitution_template(&mut self) -> SyntaxKind {
1771 self.pos = self.token_start;
1772 self.token = self.scan_template_and_set_token_value(true);
1773 self.token
1774 }
1775
1776 fn scan_template_and_set_token_value(&mut self, started_with_backtick: bool) -> SyntaxKind {
1782 if self.pos >= self.end {
1785 self.token_flags |= TokenFlags::Unterminated as u32;
1786 self.token_value = String::new();
1787 return if started_with_backtick {
1788 SyntaxKind::NoSubstitutionTemplateLiteral
1789 } else {
1790 SyntaxKind::TemplateTail
1791 };
1792 }
1793 self.pos += 1;
1794 let mut start = self.pos;
1795 let mut contents = String::new();
1796
1797 while self.pos < self.end {
1798 let ch = self.char_code_unchecked(self.pos);
1799
1800 if ch == CharacterCodes::BACKTICK {
1802 contents.push_str(&self.substring(start, self.pos));
1803 self.pos += 1;
1804 self.token_value = contents;
1805 return if started_with_backtick {
1806 SyntaxKind::NoSubstitutionTemplateLiteral
1807 } else {
1808 SyntaxKind::TemplateTail
1809 };
1810 }
1811
1812 if ch == CharacterCodes::DOLLAR
1814 && self.pos + 1 < self.end
1815 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::OPEN_BRACE
1816 {
1817 contents.push_str(&self.substring(start, self.pos));
1818 self.pos += 2;
1819 self.token_value = contents;
1820 return if started_with_backtick {
1821 SyntaxKind::TemplateHead
1822 } else {
1823 SyntaxKind::TemplateMiddle
1824 };
1825 }
1826
1827 if ch == CharacterCodes::BACKSLASH {
1829 contents.push_str(&self.substring(start, self.pos));
1830 let escaped = self.scan_template_escape_sequence();
1831 contents.push_str(&escaped);
1832 start = self.pos;
1834 continue;
1835 }
1836
1837 if ch == CharacterCodes::CARRIAGE_RETURN {
1839 contents.push_str(&self.substring(start, self.pos));
1840 self.pos += 1;
1841 if self.pos < self.end
1842 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
1843 {
1844 self.pos += 1;
1845 }
1846 contents.push('\n');
1847 start = self.pos;
1849 continue;
1850 }
1851
1852 self.pos += self.char_len_at(self.pos);
1855 }
1856
1857 contents.push_str(&self.substring(start, self.pos));
1859 self.token_flags |= TokenFlags::Unterminated as u32;
1860 self.token_value = contents;
1861 if started_with_backtick {
1862 SyntaxKind::NoSubstitutionTemplateLiteral
1863 } else {
1864 SyntaxKind::TemplateTail
1865 }
1866 }
1867
1868 fn scan_template_escape_sequence(&mut self) -> String {
1873 if self.pos >= self.end {
1874 return String::from("\\");
1875 }
1876
1877 let ch = self.char_code_unchecked(self.pos);
1878 let ch_len = self.char_len_at(self.pos);
1880 self.pos += ch_len;
1881
1882 match ch {
1883 CharacterCodes::_0 => self.scan_template_escape_digit_zero(),
1884 CharacterCodes::_1
1885 | CharacterCodes::_2
1886 | CharacterCodes::_3
1887 | CharacterCodes::_4
1888 | CharacterCodes::_5
1889 | CharacterCodes::_6
1890 | CharacterCodes::_7
1891 | CharacterCodes::_8
1892 | CharacterCodes::_9 => self.scan_template_escape_octal_digit(ch),
1893 CharacterCodes::LOWER_N => String::from("\n"),
1894 CharacterCodes::LOWER_R => String::from("\r"),
1895 CharacterCodes::LOWER_T => String::from("\t"),
1896 CharacterCodes::LOWER_V => String::from("\x0B"),
1897 CharacterCodes::LOWER_B => String::from("\x08"),
1898 CharacterCodes::LOWER_F => String::from("\x0C"),
1899 CharacterCodes::SINGLE_QUOTE => String::from("'"),
1900 CharacterCodes::DOUBLE_QUOTE => String::from("\""),
1901 CharacterCodes::BACKTICK => String::from("`"),
1902 CharacterCodes::BACKSLASH => String::from("\\"),
1903 CharacterCodes::DOLLAR => String::from("$"),
1904 CharacterCodes::LINE_FEED
1905 | CharacterCodes::LINE_SEPARATOR
1906 | CharacterCodes::PARAGRAPH_SEPARATOR => String::new(),
1907 CharacterCodes::CARRIAGE_RETURN => self.scan_template_escape_cr(),
1908 CharacterCodes::LOWER_X => self.scan_template_hex_escape(),
1909 CharacterCodes::LOWER_U => self.scan_template_unicode_escape(),
1910 _ => Self::scan_template_unknown_escape(ch),
1911 }
1912 }
1913
1914 fn scan_template_escape_digit_zero(&mut self) -> String {
1915 if self.pos < self.end && is_digit(self.char_code_unchecked(self.pos)) {
1916 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1917 return String::from("\\0");
1918 }
1919 String::from("\0")
1920 }
1921
1922 fn scan_template_escape_octal_digit(&mut self, ch: u32) -> String {
1923 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1924 let digit = char::from_u32(ch).unwrap_or('?');
1925 format!("\\{digit}")
1926 }
1927
1928 fn scan_template_escape_cr(&mut self) -> String {
1929 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED {
1930 self.pos += 1;
1931 }
1932 String::new()
1933 }
1934
1935 fn scan_template_hex_escape(&mut self) -> String {
1936 if self.pos + 2 <= self.end {
1937 let hex = self.substring(self.pos, self.pos + 2);
1938 if let Ok(code) = u32::from_str_radix(&hex, 16) {
1939 self.pos += 2;
1940 if let Some(c) = char::from_u32(code) {
1941 return c.to_string();
1942 }
1943 }
1944 }
1945 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1946 "\\x".to_string()
1947 }
1948
1949 fn scan_template_unicode_escape(&mut self) -> String {
1950 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::OPEN_BRACE {
1951 return self.scan_template_brace_unicode_escape();
1952 }
1953
1954 if self.pos + 4 <= self.end {
1955 let hex = self.substring(self.pos, self.pos + 4);
1956 if let Ok(code) = u32::from_str_radix(&hex, 16) {
1957 self.pos += 4;
1958 if let Some(c) = char::from_u32(code) {
1959 return c.to_string();
1960 }
1961 }
1962 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1963 return String::from("\\u");
1964 }
1965
1966 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1967 String::from("\\u")
1968 }
1969
1970 fn scan_template_brace_unicode_escape(&mut self) -> String {
1971 self.pos += 1;
1972 let hex_start = self.pos;
1973 while self.pos < self.end && is_hex_digit(self.char_code_unchecked(self.pos)) {
1974 self.pos += 1;
1975 }
1976 if self.pos < self.end && self.char_code_unchecked(self.pos) == CharacterCodes::CLOSE_BRACE
1977 {
1978 let hex = self.substring(hex_start, self.pos);
1979 self.pos += 1;
1980 if let Ok(code) = u32::from_str_radix(&hex, 16)
1981 && let Some(c) = char::from_u32(code)
1982 {
1983 return c.to_string();
1984 }
1985 }
1986 self.token_flags |= TokenFlags::ContainsInvalidEscape as u32;
1987 String::from("\\u")
1988 }
1989
1990 fn scan_template_unknown_escape(ch: u32) -> String {
1991 if let Some(c) = char::from_u32(ch) {
1992 c.to_string()
1993 } else {
1994 String::new()
1995 }
1996 }
1997
1998 #[wasm_bindgen(js_name = scanJsxIdentifier)]
2005 pub fn scan_jsx_identifier(&mut self) -> SyntaxKind {
2006 if crate::token_is_identifier_or_keyword(self.token) {
2007 while self.pos < self.end {
2011 let ch = self.char_code_unchecked(self.pos);
2012 if ch == CharacterCodes::MINUS {
2013 self.pos += 1;
2015 if self.pos < self.end
2017 && is_identifier_start(self.char_code_unchecked(self.pos))
2018 {
2019 self.pos += self.char_len_at(self.pos); while self.pos < self.end
2021 && is_identifier_part(self.char_code_unchecked(self.pos))
2022 {
2023 self.pos += self.char_len_at(self.pos); }
2025 }
2026 } else {
2027 break;
2028 }
2029 }
2030 self.token_atom = self
2032 .interner
2033 .intern(&self.source[self.token_start..self.pos]);
2034 self.token_value.clear();
2035 self.token = SyntaxKind::Identifier;
2037 }
2038 self.token
2039 }
2040
2041 #[wasm_bindgen(js_name = reScanJsxToken)]
2047 pub fn re_scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> SyntaxKind {
2048 self.pos = self.full_start_pos;
2049 self.scan_jsx_token(allow_multiline_jsx_text)
2050 }
2051
2052 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> SyntaxKind {
2054 self.full_start_pos = self.pos;
2055 self.token_start = self.pos;
2056 self.token_atom = Atom::NONE;
2059
2060 if self.pos >= self.end {
2061 self.token = SyntaxKind::EndOfFileToken;
2062 return self.token;
2063 }
2064
2065 let ch = self.char_code_unchecked(self.pos);
2066
2067 if ch == CharacterCodes::LESS_THAN {
2069 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::SLASH) {
2071 self.pos += 2;
2072 self.token = SyntaxKind::LessThanSlashToken;
2073 return self.token;
2074 }
2075 self.pos += 1;
2076 self.token = SyntaxKind::LessThanToken;
2077 return self.token;
2078 }
2079
2080 if ch == CharacterCodes::OPEN_BRACE {
2081 self.pos += 1;
2082 self.token = SyntaxKind::OpenBraceToken;
2083 return self.token;
2084 }
2085
2086 let mut text = String::new();
2088 while self.pos < self.end {
2089 let c = self.char_code_unchecked(self.pos);
2090
2091 if c == CharacterCodes::OPEN_BRACE || c == CharacterCodes::LESS_THAN {
2093 break;
2094 }
2095
2096 if is_line_break(c) {
2098 if !allow_multiline_jsx_text {
2099 break;
2100 }
2101 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
2102 }
2103
2104 if let Some(char) = char::from_u32(c) {
2105 text.push(char);
2106 }
2107 self.pos += self.char_len_at(self.pos); }
2109
2110 if !text.is_empty() {
2111 self.token_value = text;
2112 self.token = SyntaxKind::JsxText;
2113 return self.token;
2114 }
2115
2116 self.token = SyntaxKind::Unknown;
2117 self.token
2118 }
2119
2120 #[wasm_bindgen(js_name = scanJsxAttributeValue)]
2122 pub fn scan_jsx_attribute_value(&mut self) -> SyntaxKind {
2123 self.full_start_pos = self.pos;
2124 self.token_flags = 0;
2125
2126 while self.pos < self.end && is_white_space_single_line(self.char_code_unchecked(self.pos))
2128 {
2129 self.pos += 1;
2130 }
2131
2132 self.token_start = self.pos;
2133
2134 if self.pos >= self.end {
2135 self.token = SyntaxKind::EndOfFileToken;
2136 return self.token;
2137 }
2138
2139 let ch = self.char_code_unchecked(self.pos);
2140
2141 if ch == CharacterCodes::DOUBLE_QUOTE || ch == CharacterCodes::SINGLE_QUOTE {
2143 self.scan_jsx_string_literal(ch);
2144 return self.token;
2145 }
2146
2147 self.scan()
2148 }
2149
2150 fn scan_jsx_string_literal(&mut self, quote: u32) {
2153 self.pos += 1; let mut result = String::new();
2155
2156 while self.pos < self.end {
2157 let ch = self.char_code_unchecked(self.pos);
2158 if ch == quote {
2159 self.pos += 1; self.token_value = result;
2161 self.token = SyntaxKind::StringLiteral;
2162 return;
2163 }
2164 if let Some(c) = char::from_u32(ch) {
2166 result.push(c);
2167 }
2168 self.pos += 1;
2169 }
2170
2171 self.token_flags |= TokenFlags::Unterminated as u32;
2173 self.token_value = result;
2174 self.token = SyntaxKind::StringLiteral;
2175 }
2176
2177 #[wasm_bindgen(js_name = reScanJsxAttributeValue)]
2179 pub fn re_scan_jsx_attribute_value(&mut self) -> SyntaxKind {
2180 self.pos = self.token_start;
2181 self.scan_jsx_attribute_value()
2182 }
2183
2184 #[wasm_bindgen(js_name = reScanLessThanToken)]
2187 pub fn re_scan_less_than_token(&mut self) -> SyntaxKind {
2188 if self.token == SyntaxKind::LessThanToken
2189 && self.pos < self.end
2190 && self.char_code_unchecked(self.pos) == CharacterCodes::SLASH
2191 {
2192 self.pos += 1;
2193 self.token = SyntaxKind::LessThanSlashToken;
2194 }
2195 self.token
2196 }
2197
2198 #[wasm_bindgen(js_name = reScanHashToken)]
2200 pub fn re_scan_hash_token(&mut self) -> SyntaxKind {
2201 if self.token == SyntaxKind::HashToken
2202 && self.pos < self.end
2203 && is_identifier_start(self.char_code_unchecked(self.pos))
2204 {
2205 self.pos += self.char_len_at(self.pos);
2207 while self.pos < self.end && is_identifier_part(self.char_code_unchecked(self.pos)) {
2208 self.pos += self.char_len_at(self.pos);
2209 }
2210 self.token_value = self.substring(self.token_start, self.pos);
2211 self.token = SyntaxKind::PrivateIdentifier;
2212 }
2213 self.token
2214 }
2215
2216 #[wasm_bindgen(js_name = reScanQuestionToken)]
2218 pub fn re_scan_question_token(&mut self) -> SyntaxKind {
2219 if self.token == SyntaxKind::QuestionToken {
2220 let ch = self.char_code_at(self.pos);
2221 if ch == Some(CharacterCodes::DOT) {
2222 let next = self.char_code_at(self.pos + 1);
2224 if !next.is_some_and(is_digit) {
2225 self.pos += 1;
2226 self.token = SyntaxKind::QuestionDotToken;
2227 }
2228 } else if ch == Some(CharacterCodes::QUESTION) {
2229 if self.char_code_at(self.pos + 1) == Some(CharacterCodes::EQUALS) {
2230 self.pos += 2;
2231 self.token = SyntaxKind::QuestionQuestionEqualsToken;
2232 } else {
2233 self.pos += 1;
2234 self.token = SyntaxKind::QuestionQuestionToken;
2235 }
2236 }
2237 }
2238 self.token
2239 }
2240
2241 #[wasm_bindgen(js_name = scanJsDocToken)]
2248 pub fn scan_jsdoc_token(&mut self) -> SyntaxKind {
2249 self.full_start_pos = self.pos;
2250 self.token_flags = 0;
2251
2252 if self.pos >= self.end {
2253 self.token = SyntaxKind::EndOfFileToken;
2254 return self.token;
2255 }
2256
2257 self.token_start = self.pos;
2258 let ch = self.char_code_unchecked(self.pos);
2259
2260 if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
2262 self.token_flags |= TokenFlags::PrecedingLineBreak as u32;
2263 self.pos += 1;
2264 if ch == CharacterCodes::CARRIAGE_RETURN
2265 && self.pos < self.end
2266 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
2267 {
2268 self.pos += 1;
2269 }
2270 self.token = SyntaxKind::NewLineTrivia;
2271 return self.token;
2272 }
2273
2274 if is_white_space_single_line(ch) {
2276 while self.pos < self.end
2277 && is_white_space_single_line(self.char_code_unchecked(self.pos))
2278 {
2279 self.pos += 1;
2280 }
2281 self.token = SyntaxKind::WhitespaceTrivia;
2282 return self.token;
2283 }
2284
2285 if self.scan_jsdoc_punctuation_token(ch) {
2286 return self.token;
2287 }
2288
2289 if is_identifier_start(ch) {
2291 return self.scan_jsdoc_identifier();
2292 }
2293
2294 self.scan_jsdoc_unknown_character();
2296 self.token
2297 }
2298
2299 fn scan_jsdoc_punctuation_token(&mut self, ch: u32) -> bool {
2300 match ch {
2301 CharacterCodes::AT => {
2302 self.pos += 1;
2303 self.token = SyntaxKind::AtToken;
2304 }
2305 CharacterCodes::ASTERISK => {
2306 self.pos += 1;
2307 self.token = SyntaxKind::AsteriskToken;
2308 }
2309 CharacterCodes::OPEN_BRACE => {
2310 self.pos += 1;
2311 self.token = SyntaxKind::OpenBraceToken;
2312 }
2313 CharacterCodes::CLOSE_BRACE => {
2314 self.pos += 1;
2315 self.token = SyntaxKind::CloseBraceToken;
2316 }
2317 CharacterCodes::OPEN_BRACKET => {
2318 self.pos += 1;
2319 self.token = SyntaxKind::OpenBracketToken;
2320 }
2321 CharacterCodes::CLOSE_BRACKET => {
2322 self.pos += 1;
2323 self.token = SyntaxKind::CloseBracketToken;
2324 }
2325 CharacterCodes::LESS_THAN => {
2326 self.pos += 1;
2327 self.token = SyntaxKind::LessThanToken;
2328 }
2329 CharacterCodes::GREATER_THAN => {
2330 self.pos += 1;
2331 self.token = SyntaxKind::GreaterThanToken;
2332 }
2333 CharacterCodes::EQUALS => {
2334 self.pos += 1;
2335 self.token = SyntaxKind::EqualsToken;
2336 }
2337 CharacterCodes::COMMA => {
2338 self.pos += 1;
2339 self.token = SyntaxKind::CommaToken;
2340 }
2341 CharacterCodes::DOT => {
2342 self.pos += 1;
2343 self.token = SyntaxKind::DotToken;
2344 }
2345 CharacterCodes::BACKTICK => {
2346 self.pos += 1;
2347 while self.pos < self.end
2348 && self.char_code_unchecked(self.pos) != CharacterCodes::BACKTICK
2349 {
2350 self.pos += 1;
2351 }
2352 if self.pos < self.end {
2353 self.pos += 1;
2354 }
2355 self.token_value = self.substring(self.token_start, self.pos);
2356 self.token = SyntaxKind::NoSubstitutionTemplateLiteral;
2357 }
2358 _ => return false,
2359 }
2360 true
2361 }
2362
2363 fn scan_jsdoc_identifier(&mut self) -> SyntaxKind {
2364 self.pos += self.char_len_at(self.pos);
2365 while self.pos < self.end && is_identifier_part(self.char_code_unchecked(self.pos)) {
2366 self.pos += self.char_len_at(self.pos);
2367 }
2368 self.token_value = self.substring(self.token_start, self.pos);
2369 self.token = crate::text_to_keyword(&self.token_value).unwrap_or(SyntaxKind::Identifier);
2370 self.token
2371 }
2372
2373 fn scan_jsdoc_unknown_character(&mut self) {
2374 self.pos += self.char_len_at(self.pos);
2375 self.token = SyntaxKind::Unknown;
2376 }
2377
2378 #[wasm_bindgen(js_name = scanJsDocCommentTextToken)]
2381 pub fn scan_jsdoc_comment_text_token(&mut self, in_backticks: bool) -> SyntaxKind {
2382 self.full_start_pos = self.pos;
2383 self.token_flags = 0;
2384 self.token_start = self.pos;
2385
2386 if self.pos >= self.end {
2387 self.token = SyntaxKind::EndOfFileToken;
2388 return self.token;
2389 }
2390
2391 while self.pos < self.end {
2393 let ch = self.char_code_unchecked(self.pos);
2394
2395 match ch {
2397 CharacterCodes::LINE_FEED | CharacterCodes::CARRIAGE_RETURN => {
2399 break;
2400 }
2401 CharacterCodes::AT | CharacterCodes::OPEN_BRACE | CharacterCodes::CLOSE_BRACE
2403 if !in_backticks =>
2404 {
2405 break;
2406 }
2407 CharacterCodes::BACKTICK => {
2409 if self.pos > self.token_start {
2410 break; }
2412 self.pos += 1;
2414 self.token = SyntaxKind::Unknown; return self.token;
2416 }
2417 _ => {
2418 self.pos += self.char_len_at(self.pos);
2420 }
2421 }
2422 }
2423
2424 if self.pos > self.token_start {
2425 self.token_value = self.substring(self.token_start, self.pos);
2426 self.token = SyntaxKind::Identifier;
2428 } else {
2429 self.token = SyntaxKind::EndOfFileToken;
2430 }
2431 self.token
2432 }
2433
2434 #[wasm_bindgen(js_name = scanShebangTrivia)]
2441 pub fn scan_shebang_trivia(&mut self) -> usize {
2442 if self.pos != 0 {
2444 return 0;
2445 }
2446
2447 if self.pos + 1 < self.end
2449 && self.char_code_unchecked(self.pos) == CharacterCodes::HASH
2450 && self.char_code_unchecked(self.pos + 1) == CharacterCodes::EXCLAMATION
2451 {
2452 let start = self.pos;
2453 self.pos += 2;
2454
2455 while self.pos < self.end {
2457 let ch = self.char_code_unchecked(self.pos);
2458 if ch == CharacterCodes::LINE_FEED || ch == CharacterCodes::CARRIAGE_RETURN {
2459 break;
2460 }
2461 self.pos += self.char_len_at(self.pos);
2463 }
2464
2465 if self.pos < self.end {
2467 let ch = self.char_code_unchecked(self.pos);
2468 if ch == CharacterCodes::CARRIAGE_RETURN {
2469 self.pos += 1;
2470 if self.pos < self.end
2471 && self.char_code_unchecked(self.pos) == CharacterCodes::LINE_FEED
2472 {
2473 self.pos += 1;
2474 }
2475 } else if ch == CharacterCodes::LINE_FEED {
2476 self.pos += 1;
2477 }
2478 }
2479
2480 return self.pos - start;
2481 }
2482
2483 0
2484 }
2485
2486 #[wasm_bindgen(js_name = reScanInvalidIdentifier)]
2488 pub fn re_scan_invalid_identifier(&mut self) -> SyntaxKind {
2489 if self.token == SyntaxKind::Unknown && !self.token_value.is_empty() {
2492 let chars: Vec<char> = self.token_value.chars().collect();
2494 if !chars.is_empty() && is_identifier_start(chars[0] as u32) {
2495 let mut all_valid = true;
2496 for c in chars.iter().skip(1) {
2497 if !is_identifier_part(*c as u32) {
2498 all_valid = false;
2499 break;
2500 }
2501 }
2502 if all_valid {
2503 self.token =
2504 crate::text_to_keyword(&self.token_value).unwrap_or(SyntaxKind::Identifier);
2505 }
2506 }
2507 }
2508 self.token
2509 }
2510}
2511
2512impl ScannerState {
2517 #[must_use]
2519 pub fn save_state(&self) -> ScannerSnapshot {
2520 ScannerSnapshot {
2521 pos: self.pos,
2522 full_start_pos: self.full_start_pos,
2523 token_start: self.token_start,
2524 token: self.token,
2525 token_value: self.token_value.clone(),
2526 token_flags: self.token_flags,
2527 token_atom: self.token_atom,
2528 token_invalid_separator_pos: self.token_invalid_separator_pos,
2529 token_invalid_separator_is_consecutive: self.token_invalid_separator_is_consecutive,
2530 regex_flag_errors: self.regex_flag_errors.clone(),
2531 }
2532 }
2533
2534 pub fn restore_state(&mut self, snapshot: ScannerSnapshot) {
2536 self.pos = snapshot.pos;
2537 self.full_start_pos = snapshot.full_start_pos;
2538 self.token_start = snapshot.token_start;
2539 self.token = snapshot.token;
2540 self.token_value = snapshot.token_value;
2541 self.token_flags = snapshot.token_flags;
2542 self.token_atom = snapshot.token_atom;
2543 self.token_invalid_separator_pos = snapshot.token_invalid_separator_pos;
2544 self.token_invalid_separator_is_consecutive =
2545 snapshot.token_invalid_separator_is_consecutive;
2546 self.regex_flag_errors = snapshot.regex_flag_errors;
2547 }
2548
2549 #[must_use]
2553 pub const fn get_token_atom(&self) -> Atom {
2554 self.token_atom
2555 }
2556
2557 #[must_use]
2558 pub const fn get_invalid_separator_pos(&self) -> Option<usize> {
2559 self.token_invalid_separator_pos
2560 }
2561
2562 #[must_use]
2563 pub const fn invalid_separator_is_consecutive(&self) -> bool {
2564 self.token_invalid_separator_is_consecutive
2565 }
2566
2567 #[must_use]
2569 pub fn get_regex_flag_errors(&self) -> &[RegexFlagError] {
2570 &self.regex_flag_errors
2571 }
2572
2573 #[must_use]
2575 pub fn get_scanner_diagnostics(&self) -> &[ScannerDiagnostic] {
2576 &self.scanner_diagnostics
2577 }
2578
2579 const MERGE_CONFLICT_MARKER_LENGTH: usize = 7;
2581
2582 fn is_conflict_marker_trivia(&self) -> bool {
2587 let pos = self.pos;
2588 if pos > 0 && !is_line_break(self.char_code_unchecked(pos - 1)) {
2590 return false;
2591 }
2592 if pos + Self::MERGE_CONFLICT_MARKER_LENGTH >= self.end {
2594 return false;
2595 }
2596 let ch = self.char_code_unchecked(pos);
2597 for i in 1..Self::MERGE_CONFLICT_MARKER_LENGTH {
2599 if self.char_code_unchecked(pos + i) != ch {
2600 return false;
2601 }
2602 }
2603 ch == CharacterCodes::EQUALS
2606 || (pos + Self::MERGE_CONFLICT_MARKER_LENGTH < self.end
2607 && self.char_code_unchecked(pos + Self::MERGE_CONFLICT_MARKER_LENGTH)
2608 == CharacterCodes::SPACE)
2609 }
2610
2611 fn scan_conflict_marker_trivia(&mut self) {
2615 self.scanner_diagnostics.push(ScannerDiagnostic {
2617 pos: self.pos,
2618 length: Self::MERGE_CONFLICT_MARKER_LENGTH,
2619 message: "Merge conflict marker encountered.",
2620 code: 1185,
2621 });
2622
2623 let ch = self.char_code_unchecked(self.pos);
2624 if ch == CharacterCodes::LESS_THAN || ch == CharacterCodes::GREATER_THAN {
2625 while self.pos < self.end && !is_line_break(self.char_code_unchecked(self.pos)) {
2627 self.pos += 1;
2628 }
2629 } else {
2630 while self.pos < self.end {
2632 let current_char = self.char_code_unchecked(self.pos);
2633 if (current_char == CharacterCodes::EQUALS
2634 || current_char == CharacterCodes::GREATER_THAN)
2635 && current_char != ch
2636 && self.is_conflict_marker_trivia()
2637 {
2638 break;
2639 }
2640 self.pos += 1;
2641 }
2642 }
2643 }
2644
2645 #[must_use]
2648 pub fn resolve_atom(&self, atom: Atom) -> &str {
2649 self.interner.resolve(atom)
2650 }
2651
2652 #[must_use]
2654 pub const fn interner(&self) -> &Interner {
2655 &self.interner
2656 }
2657
2658 pub const fn interner_mut(&mut self) -> &mut Interner {
2660 &mut self.interner
2661 }
2662
2663 pub fn take_interner(&mut self) -> Interner {
2666 std::mem::take(&mut self.interner)
2667 }
2668
2669 #[inline]
2674 #[must_use]
2675 pub fn get_token_value_ref(&self) -> &str {
2676 if self.token_atom != Atom::NONE {
2679 return self.interner.resolve(self.token_atom);
2680 }
2681
2682 if !self.token_value.is_empty()
2686 || super::token_is_template_literal(self.token)
2687 || self.token == SyntaxKind::StringLiteral
2688 {
2689 return &self.token_value;
2690 }
2691
2692 &self.source[self.token_start..self.pos]
2695 }
2696
2697 #[inline]
2700 #[must_use]
2701 pub fn get_token_text_ref(&self) -> &str {
2702 &self.source[self.token_start..self.pos]
2703 }
2704
2705 #[inline]
2707 #[must_use]
2708 pub fn source_slice(&self, start: usize, end: usize) -> &str {
2709 &self.source[start..end]
2710 }
2711
2712 #[inline]
2714 #[must_use]
2715 pub fn source_text(&self) -> &str {
2716 &self.source
2717 }
2718}
2719
2720impl ScannerState {
2721 #[inline]
2723 #[must_use]
2724 pub fn source_text_arc(&self) -> Arc<str> {
2725 std::sync::Arc::clone(&self.source)
2726 }
2727}
2728
2729fn is_white_space_single_line(ch: u32) -> bool {
2734 ch == CharacterCodes::SPACE
2735 || ch == CharacterCodes::TAB
2736 || ch == CharacterCodes::VERTICAL_TAB
2737 || ch == CharacterCodes::FORM_FEED
2738 || ch == CharacterCodes::NON_BREAKING_SPACE
2739 || ch == CharacterCodes::NEXT_LINE || ch == CharacterCodes::OGHAM
2741 || (CharacterCodes::EN_QUAD..=CharacterCodes::ZERO_WIDTH_SPACE).contains(&ch)
2742 || ch == CharacterCodes::NARROW_NO_BREAK_SPACE
2743 || ch == CharacterCodes::MATHEMATICAL_SPACE
2744 || ch == CharacterCodes::IDEOGRAPHIC_SPACE
2745 || ch == CharacterCodes::BYTE_ORDER_MARK
2746}
2747
2748fn is_digit(ch: u32) -> bool {
2749 (CharacterCodes::_0..=CharacterCodes::_9).contains(&ch)
2750}
2751
2752const fn is_binary_digit(ch: u32) -> bool {
2753 ch == CharacterCodes::_0 || ch == CharacterCodes::_1
2754}
2755
2756fn is_octal_digit(ch: u32) -> bool {
2757 (CharacterCodes::_0..=CharacterCodes::_7).contains(&ch)
2758}
2759
2760fn is_hex_digit(ch: u32) -> bool {
2761 is_digit(ch)
2762 || (CharacterCodes::UPPER_A..=CharacterCodes::UPPER_F).contains(&ch)
2763 || (CharacterCodes::LOWER_A..=CharacterCodes::LOWER_F).contains(&ch)
2764}
2765
2766fn is_identifier_start(ch: u32) -> bool {
2767 if ch < 128 {
2769 return (CharacterCodes::UPPER_A..=CharacterCodes::UPPER_Z).contains(&ch)
2770 || (CharacterCodes::LOWER_A..=CharacterCodes::LOWER_Z).contains(&ch)
2771 || ch == CharacterCodes::UNDERSCORE
2772 || ch == CharacterCodes::DOLLAR;
2773 }
2774
2775 if let Some(c) = char::from_u32(ch) {
2780 return c.is_alphabetic();
2781 }
2782
2783 false
2784}
2785
2786fn is_identifier_part(ch: u32) -> bool {
2787 if ch < 128 {
2789 return is_identifier_start(ch) || is_digit(ch);
2790 }
2791
2792 if let Some(c) = char::from_u32(ch) {
2795 if c.is_alphanumeric() {
2797 return true;
2798 }
2799 }
2800
2801 if ch == 0x200C || ch == 0x200D {
2803 return true;
2804 }
2805
2806 is_unicode_combining_mark(ch)
2814}
2815
2816fn is_unicode_combining_mark(ch: u32) -> bool {
2819 if (0x0300..=0x036F).contains(&ch) {
2821 return true;
2822 }
2823 if (0x0900..=0x0903).contains(&ch)
2825 || (0x093A..=0x094F).contains(&ch)
2826 || (0x0951..=0x0957).contains(&ch)
2827 || (0x0962..=0x0963).contains(&ch)
2828 {
2829 return true;
2830 }
2831 if (0x0981..=0x0983).contains(&ch) || (0x09BC..=0x09CD).contains(&ch) {
2833 return true;
2834 }
2835 if (0x064B..=0x0652).contains(&ch) || (0x0670..=0x0670).contains(&ch) {
2837 return true;
2838 }
2839 if (0x0591..=0x05C7).contains(&ch) {
2841 return true;
2842 }
2843 if (0x0B01..=0x0B03).contains(&ch) || (0x0B3C..=0x0B4D).contains(&ch)
2846 || (0x0B82..=0x0B83).contains(&ch) || (0x0BBE..=0x0BCD).contains(&ch)
2848 || (0x0C00..=0x0C04).contains(&ch) || (0x0C3E..=0x0C4D).contains(&ch)
2850 || (0x0C81..=0x0C83).contains(&ch) || (0x0CBC..=0x0CCD).contains(&ch)
2852 || (0x0D00..=0x0D03).contains(&ch) || (0x0D3B..=0x0D4D).contains(&ch)
2854 {
2855 return true;
2856 }
2857 if (0x0E31..=0x0E3A).contains(&ch) || (0x0E47..=0x0E4E).contains(&ch) {
2859 return true;
2860 }
2861 if (0x1AB0..=0x1AFF).contains(&ch)
2863 || (0x1DC0..=0x1DFF).contains(&ch)
2864 || (0x20D0..=0x20FF).contains(&ch)
2865 {
2866 return true;
2867 }
2868 false
2869}
2870
2871const fn is_line_break(ch: u32) -> bool {
2872 ch == CharacterCodes::LINE_FEED
2873 || ch == CharacterCodes::CARRIAGE_RETURN
2874 || ch == CharacterCodes::LINE_SEPARATOR
2875 || ch == CharacterCodes::PARAGRAPH_SEPARATOR
2876}
2877
2878const fn is_regex_flag(ch: u32) -> bool {
2880 matches!(
2881 ch,
2882 CharacterCodes::LOWER_G | CharacterCodes::LOWER_I | CharacterCodes::LOWER_M | CharacterCodes::LOWER_S | CharacterCodes::LOWER_U | CharacterCodes::LOWER_V | CharacterCodes::LOWER_Y | CharacterCodes::LOWER_D )
2891}