1#![allow(clippy::as_conversions, clippy::cast_possible_truncation)]
6
7use super::{Span, TSXToken};
8use crate::{
9 errors::LexingErrors, html_tag_contains_literal_content, html_tag_is_self_closing, Comments,
10 Quoted,
11};
12use tokenizer_lib::{sized_tokens::TokenStart, Token, TokenSender};
13
14use derive_finite_automaton::{
15 FiniteAutomata, FiniteAutomataConstructor, GetAutomataStateForValue, GetNextResult,
16};
17
18mod html {}
19
20#[allow(clippy::struct_excessive_bools)]
21pub struct LexerOptions {
22 pub comments: Comments,
24 pub lex_jsx: bool,
27 pub allow_unsupported_characters_in_jsx_attribute_keys: bool,
29 pub allow_expressions_in_jsx: bool,
30 pub top_level_html: bool,
31}
32
33impl Default for LexerOptions {
34 fn default() -> Self {
35 Self {
36 comments: Comments::All,
37 lex_jsx: true,
38 allow_unsupported_characters_in_jsx_attribute_keys: true,
39 allow_expressions_in_jsx: true,
40 top_level_html: false,
41 }
42 }
43}
44
45fn is_number_delimiter(chr: char) -> bool {
46 matches!(
47 chr,
48 ' ' | ','
49 | '\n' | '\r'
50 | ';' | '+'
51 | '-' | '*'
52 | '/' | '&'
53 | '|' | '!'
54 | '^' | '('
55 | '{' | '['
56 | ')' | '}'
57 | ']' | '%'
58 | '=' | ':'
59 | '<' | '>'
60 | '?' | '"'
61 | '\'' | '`'
62 | '#'
63 )
64}
65
66#[doc(hidden)]
73pub fn lex_script(
74 script: &str,
75 sender: &mut impl TokenSender<TSXToken, crate::TokenStart>,
76 options: &LexerOptions,
77 offset: Option<u32>,
78) -> Result<(), (LexingErrors, Span)> {
79 #[derive(PartialEq, Debug)]
80 enum JSXAttributeValueDelimiter {
81 None,
82 SingleQuote,
83 DoubleQuote,
84 }
85
86 #[derive(PartialEq, Debug, Eq)]
87 enum JSXTagNameDirection {
88 Opening,
89 Closing,
90 }
91
92 #[derive(PartialEq, Debug)]
93 enum JSXLexingState {
94 ExpectingOpenChevron,
96 TagName {
97 direction: JSXTagNameDirection,
98 lexed_start: bool,
99 },
100 SelfClosingTagClose,
102 AttributeKey,
103 AttributeEqual,
104 AttributeValue(JSXAttributeValueDelimiter),
105 Comment,
106 Content,
107 LiteralContent {
109 last_char_was_open_chevron: bool,
110 },
111 }
112
113 #[derive(PartialEq, Debug)]
114 enum NumberLiteralType {
115 BinaryLiteral,
116 OctalLiteral,
118 HexadecimalLiteral,
119 Decimal {
121 fractional: bool,
123 },
124 BigInt,
125 Exponent,
126 }
127
128 impl Default for NumberLiteralType {
129 fn default() -> Self {
130 Self::Decimal { fractional: false }
131 }
132 }
133
134 #[derive(PartialEq, Debug)]
136 enum LexingState {
137 None,
138 Identifier,
139 Symbol(GetAutomataStateForValue<TSXToken>),
140 Number(NumberLiteralType),
142 String {
143 double_quoted: bool,
144 escaped: bool,
145 },
146 TemplateLiteral {
147 interpolation_depth: u16,
148 last_char_was_dollar: bool,
149 escaped: bool,
150 },
151 JSXLiteral {
152 state: JSXLexingState,
153 interpolation_depth: u16,
154 tag_depth: u16,
155 no_inner_tags_or_expressions: bool,
158 is_self_closing_tag: bool,
159 },
160 SingleLineComment,
161 MultiLineComment {
162 last_char_was_star: bool,
163 },
164 RegexLiteral {
165 escaped: bool,
166 after_last_slash: bool,
168 in_set: bool,
170 },
171 }
172
173 const DEFAULT_JSX_LEXING_STATE: LexingState = LexingState::JSXLiteral {
175 interpolation_depth: 0,
176 tag_depth: 0,
177 state: JSXLexingState::ExpectingOpenChevron,
178 no_inner_tags_or_expressions: false,
179 is_self_closing_tag: false,
180 };
181 const FIRST_CHEVRON_JSX_LEXING_STATE: LexingState = LexingState::JSXLiteral {
182 interpolation_depth: 0,
183 tag_depth: 0,
184 state: JSXLexingState::TagName {
185 direction: JSXTagNameDirection::Opening,
186 lexed_start: false,
187 },
188 no_inner_tags_or_expressions: false,
189 is_self_closing_tag: false,
190 };
191
192 if script.len() > u32::MAX as usize {
193 return Err((LexingErrors::CannotLoadLargeFile(script.len()), source_map::Nullable::NULL));
194 }
195
196 let mut state: LexingState =
197 if options.top_level_html { DEFAULT_JSX_LEXING_STATE } else { LexingState::None };
198
199 let mut state_stack: Vec<LexingState> = Vec::new();
201
202 let mut start: usize = 0;
204 let offset = offset.unwrap_or_default();
205
206 let mut expect_expression = true;
213
214 macro_rules! return_err {
215 ($err:expr) => {{
216 sender.push(Token(TSXToken::EOS, TokenStart::new(script.len() as u32)));
217 return Err((
218 $err,
219 Span {
220 start: start as u32 + offset,
221 end: start as u32 + offset,
223 source: (),
224 },
225 ));
226 }};
227 }
228
229 let mut characters = script.char_indices();
230 if script.starts_with("#!") {
231 for (idx, c) in characters.by_ref() {
232 if c == '\n' {
233 sender.push(Token(
234 TSXToken::HashBangComment(script[2..idx].to_owned()),
235 TokenStart::new(0),
236 ));
237 break;
238 }
239 }
240 }
241
242 if options.top_level_html && script.starts_with("<!DOCTYPE html>") {
243 for (_idx, c) in characters.by_ref() {
244 if c == '>' {
245 sender.push(Token(TSXToken::DocTypeHTML, TokenStart::new(0)));
246 break;
247 }
248 }
249 }
250
251 for (idx, chr) in characters {
252 macro_rules! set_state {
256 ($s:expr) => {{
257 start = idx;
258 state = $s;
259 expect_expression = false;
260 }};
261
262 ($s:expr, EXPECT_EXPRESSION: $v:expr) => {{
263 start = idx;
264 state = $s;
265 expect_expression = $v;
266 }};
267 }
268
269 macro_rules! push_token {
271 ($t:expr $(,)?) => {{
272 let res = sender.push(Token($t, TokenStart::new(start as u32 + offset)));
273 if !res {
274 return Ok(());
275 }
276 }};
277 }
278
279 match state {
280 LexingState::Number(ref mut literal_type) => {
281 match chr {
282 _ if matches!(literal_type, NumberLiteralType::BigInt) => {
283 if is_number_delimiter(chr) {
284 push_token!(TSXToken::NumberLiteral(script[start..idx].to_owned()));
286 set_state!(LexingState::None);
287 } else {
288 return_err!(LexingErrors::UnexpectedEndToNumberLiteral)
289 }
290 }
291 'b' | 'B' | 'x' | 'X' | 'o' | 'O' if start + 1 == idx => {
293 if script[start..].starts_with('0') {
294 *literal_type = match chr {
295 'b' | 'B' => NumberLiteralType::BinaryLiteral,
296 'o' | 'O' => NumberLiteralType::OctalLiteral,
297 'x' | 'X' => NumberLiteralType::HexadecimalLiteral,
298 _ => unreachable!(),
299 }
300 } else {
301 return_err!(
302 LexingErrors::NumberLiteralBaseSpecifierMustPrecededWithZero
303 );
304 }
305 }
306 '0'..='9' | 'a'..='f' | 'A'..='F' => match literal_type {
307 NumberLiteralType::BinaryLiteral => {
308 if !matches!(chr, '0' | '1') {
309 return_err!(LexingErrors::InvalidNumeralItemBecauseOfLiteralKind)
310 }
311 }
312 NumberLiteralType::OctalLiteral => {
313 if !matches!(chr, '0'..='7') {
314 return_err!(LexingErrors::InvalidNumeralItemBecauseOfLiteralKind)
315 }
316 }
317 NumberLiteralType::Decimal { fractional } => {
319 if matches!(chr, 'e' | 'E')
320 && !(*fractional || script[..idx].ends_with('_'))
321 {
322 *literal_type = NumberLiteralType::Exponent;
323 } else if !chr.is_ascii_digit() {
324 return_err!(LexingErrors::InvalidNumeralItemBecauseOfLiteralKind)
325 }
326 }
327 NumberLiteralType::Exponent => {
328 if !chr.is_ascii_digit() {
329 return_err!(LexingErrors::InvalidNumeralItemBecauseOfLiteralKind)
330 }
331 }
332 NumberLiteralType::HexadecimalLiteral => {}
334 NumberLiteralType::BigInt => unreachable!(),
335 },
336 '.' => {
337 if let NumberLiteralType::Decimal { fractional } = literal_type {
338 if script[..idx].ends_with(['_']) {
339 return_err!(LexingErrors::InvalidUnderscore)
340 } else if *fractional {
341 if start + 1 == idx {
343 let automaton = TSXToken::new_automaton();
344 let derive_finite_automaton::GetNextResult::NewState(
345 dot_state_one,
346 ) = automaton.get_next('.')
347 else {
348 unreachable!()
349 };
350 let derive_finite_automaton::GetNextResult::NewState(
351 dot_state_two,
352 ) = dot_state_one.get_next('.')
353 else {
354 unreachable!()
355 };
356 state = LexingState::Symbol(dot_state_two);
357 } else {
358 return_err!(LexingErrors::SecondDecimalPoint);
359 }
360 } else {
361 *fractional = true;
362 }
363 } else {
364 return_err!(LexingErrors::NumberLiteralCannotHaveDecimalPoint);
365 }
366 }
367 '_' => {
368 let invalid = match literal_type {
369 NumberLiteralType::BinaryLiteral |
370 NumberLiteralType::OctalLiteral |
371 NumberLiteralType::HexadecimalLiteral => {
373 if start + 2 == idx {
374 script[..idx].ends_with(['b', 'B', 'x', 'X', 'o' , 'O'])
375 } else {
376 false
377 }
378 },
379 NumberLiteralType::Decimal { .. } => script[..idx].ends_with('.') || &script[start..idx] == "0",
380 NumberLiteralType::Exponent => script[..idx].ends_with(['e', 'E']),
381 NumberLiteralType::BigInt => false
382 };
383 if invalid {
384 return_err!(LexingErrors::InvalidUnderscore);
385 }
386 }
387 'n' if matches!(
388 literal_type,
389 NumberLiteralType::Decimal { fractional: false }
390 ) =>
391 {
392 *literal_type = NumberLiteralType::BigInt;
393 }
394 '-' if matches!(literal_type, NumberLiteralType::Exponent if script[..idx].ends_with(['e', 'E'])) =>
396 {}
397 chr => {
398 if is_number_delimiter(chr) {
399 let num_slice = &script[start..idx];
401 if num_slice.trim_end() == "."
402 || num_slice.ends_with(['x', 'X', 'o', 'O', '_', '-'])
403 || (!matches!(literal_type, NumberLiteralType::HexadecimalLiteral)
404 && num_slice.ends_with(['e', 'E', 'b', 'B']))
405 {
406 return_err!(LexingErrors::UnexpectedEndToNumberLiteral)
407 }
408 push_token!(TSXToken::NumberLiteral(num_slice.to_owned()));
409 set_state!(LexingState::None);
410 } else {
411 return_err!(LexingErrors::UnexpectedEndToNumberLiteral)
412 }
413 }
414 }
415 }
416 LexingState::Symbol(symbol_state) => {
417 match symbol_state.get_next(chr) {
420 GetNextResult::Result { result, ate_character } => {
421 match result {
423 TSXToken::Comment(_) => {
424 state = LexingState::SingleLineComment;
425 continue;
426 }
427 TSXToken::MultiLineComment(_) => {
428 state = LexingState::MultiLineComment { last_char_was_star: false };
429 continue;
430 }
431 _ => {}
432 }
433 state = LexingState::None;
434 expect_expression = result.is_expression_prefix();
435 if ate_character {
436 push_token!(result);
437 start = idx + chr.len_utf8();
438 continue;
439 }
440
441 push_token!(result);
442 start = idx;
443 }
444 GetNextResult::NewState(new_state) => {
445 state = LexingState::Symbol(new_state);
446 }
447 GetNextResult::InvalidCharacter(err) => {
448 return_err!(LexingErrors::UnexpectedCharacter(err));
449 }
450 }
451 }
452 LexingState::Identifier => match chr {
453 'A'..='Z' | 'a'..='z' | '0'..='9' | '_' | '$' => {}
454 _ => {
455 let token = TSXToken::from_slice(&script[start..idx]);
456 let is_expression_prefix = token.is_expression_prefix();
457 push_token!(token);
458 set_state!(LexingState::None, EXPECT_EXPRESSION: is_expression_prefix);
459 }
460 },
461 LexingState::String { ref mut double_quoted, ref mut escaped } => match chr {
462 '\n' => {
463 return_err!(LexingErrors::NewLineInStringLiteral);
464 }
465 '\'' if !*double_quoted && !*escaped => {
466 push_token!(TSXToken::StringLiteral(
467 script[(start + 1)..idx].to_owned(),
468 Quoted::Single
469 ));
470 state = LexingState::None;
471 start = idx + 1;
472 expect_expression = false;
473 continue;
474 }
475 '"' if *double_quoted && !*escaped => {
476 push_token!(TSXToken::StringLiteral(
477 script[(start + 1)..idx].to_owned(),
478 Quoted::Double
479 ));
480 state = LexingState::None;
481 start = idx + 1;
482 expect_expression = false;
483 continue;
484 }
485 '\\' if !*escaped => {
486 *escaped = true;
487 }
488 _ => {
489 *escaped = false;
490 }
491 },
492 LexingState::SingleLineComment => {
493 if let '\n' = chr {
494 let content = &script[(start + 2)..idx];
495 if options.comments.should_add_comment(content) {
496 push_token!(TSXToken::Comment(content.trim_end().to_owned()));
497 }
498 set_state!(LexingState::None);
499 continue;
500 }
501 }
502 LexingState::MultiLineComment { ref mut last_char_was_star } => match chr {
503 '/' if *last_char_was_star => {
504 let content = &script[(start + 2)..(idx - 1)];
505 if options.comments.should_add_comment(content) {
506 push_token!(TSXToken::MultiLineComment(content.to_owned()));
507 }
508 set_state!(LexingState::None);
509 continue;
510 }
511 chr => {
512 *last_char_was_star = chr == '*';
513 }
514 },
515 LexingState::RegexLiteral {
516 ref mut escaped,
517 ref mut after_last_slash,
518 ref mut in_set,
519 } => {
520 if *after_last_slash {
521 if !chr.is_alphabetic() {
522 if start != idx {
523 push_token!(TSXToken::RegexFlagLiteral(script[start..idx].to_owned()));
524 }
525 set_state!(LexingState::None);
526 }
527 } else {
528 match chr {
529 '/' if start + 1 == idx => {
530 state = LexingState::SingleLineComment;
531 continue;
532 }
533 '*' if start + 1 == idx => {
534 state = LexingState::MultiLineComment { last_char_was_star: false };
535 continue;
536 }
537 '/' if !*escaped && !*in_set => {
538 push_token!(TSXToken::RegexLiteral(
539 script[(start + 1)..idx].to_owned()
540 ));
541 *after_last_slash = true;
542 start = idx + 1;
543 }
544 '\\' if !*escaped => {
545 *escaped = true;
546 }
547 '[' => {
548 *in_set = true;
549 }
550 ']' if *in_set => {
551 *in_set = false;
552 }
553 '\n' => {
554 return_err!(LexingErrors::ExpectedEndToRegexLiteral);
555 }
556 _ => {
557 *escaped = false;
558 }
559 }
560 }
561 }
562 LexingState::TemplateLiteral {
563 ref mut last_char_was_dollar,
564 ref mut interpolation_depth,
565 ref mut escaped,
566 } => match chr {
567 '$' if !*escaped => *last_char_was_dollar = true,
568 '{' if *last_char_was_dollar => {
569 if idx > start + 1 {
570 push_token!(TSXToken::TemplateLiteralChunk(
571 script[start..(idx - 1)].to_owned()
572 ));
573 }
574 start = idx - 1;
575 push_token!(TSXToken::TemplateLiteralExpressionStart);
576 *interpolation_depth += 1;
577 *last_char_was_dollar = false;
578 state_stack.push(state);
579
580 start = idx + 1;
581 state = LexingState::None;
582 expect_expression = true;
583 continue;
584 }
585 '`' if !*escaped => {
586 if idx > start {
587 push_token!(TSXToken::TemplateLiteralChunk(script[start..idx].to_owned()));
588 }
589 start = idx;
590 push_token!(TSXToken::TemplateLiteralEnd);
591 start = idx + 1;
592 state = LexingState::None;
593 expect_expression = false;
594 continue;
595 }
596 '\\' => {
597 *last_char_was_dollar = false;
598 *escaped = true;
599 }
600 _ => {
601 *last_char_was_dollar = false;
602 *escaped = false;
603 }
604 },
605 LexingState::JSXLiteral {
606 ref mut interpolation_depth,
607 ref mut tag_depth,
608 ref mut no_inner_tags_or_expressions,
609 ref mut is_self_closing_tag,
610 state: ref mut jsx_state,
611 } => {
612 match jsx_state {
613 JSXLexingState::ExpectingOpenChevron => {
614 if chr == '<' {
615 set_state!(FIRST_CHEVRON_JSX_LEXING_STATE);
616 } else if !chr.is_whitespace() {
617 dbg!(chr);
618 return_err!(LexingErrors::ExpectedOpenChevron);
619 }
620 }
621 JSXLexingState::TagName { ref mut direction, ref mut lexed_start } => match chr
622 {
623 '>' if *direction == JSXTagNameDirection::Closing => {
625 *tag_depth = match tag_depth.checked_sub(1) {
626 Some(value) => value,
627 None => {
628 return_err!(LexingErrors::UnbalancedJSXClosingTags);
629 }
630 };
631 if *lexed_start {
632 push_token!(TSXToken::JSXClosingTagName(
633 script[start..idx].trim().to_owned()
634 ));
635 } else {
636 push_token!(TSXToken::JSXFragmentEnd);
637 }
638 if *tag_depth == 0 {
640 set_state!(LexingState::None);
641 continue;
642 }
643
644 start = idx + 1;
645 *jsx_state = JSXLexingState::Content;
646 }
647 '>' if !*lexed_start => {
649 push_token!(TSXToken::JSXFragmentStart);
650 *jsx_state = JSXLexingState::Content;
651 start = idx + 1;
652 *tag_depth += 1;
653 continue;
654 }
655 'A'..='Z' | 'a'..='z' | '0'..='9' => {
657 if !*lexed_start {
659 match direction {
660 JSXTagNameDirection::Opening => {
661 push_token!(TSXToken::JSXOpeningTagStart);
662 start += 1;
663 }
664 JSXTagNameDirection::Closing => {
665 push_token!(TSXToken::JSXClosingTagStart);
666 start += 2;
667 }
668 }
669 *lexed_start = true;
670 }
671 }
672 '-' => {
673 if start + 1 == idx {
674 return_err!(LexingErrors::InvalidCharacterInJSXTag('-'))
676 }
677 }
678 '/' if start + 1 == idx => {
680 *direction = JSXTagNameDirection::Closing;
681 }
682 '!' if start + 1 == idx => {
684 *jsx_state = JSXLexingState::Comment;
685 }
686 chr => {
688 if *direction == JSXTagNameDirection::Closing {
689 return_err!(LexingErrors::ExpectedJSXEndTag);
690 }
691 let tag_name = script[start..idx].trim();
692 *is_self_closing_tag = html_tag_is_self_closing(tag_name);
693 *no_inner_tags_or_expressions =
694 html_tag_contains_literal_content(tag_name);
695 push_token!(TSXToken::JSXTagName(tag_name.to_owned()));
696 start = idx;
697 *tag_depth += 1;
698 match chr {
699 '/' if *is_self_closing_tag => {
700 *jsx_state = JSXLexingState::SelfClosingTagClose;
701 }
702 '>' => {
703 push_token!(TSXToken::JSXOpeningTagEnd);
704 start = idx + 1;
705 *jsx_state = if *no_inner_tags_or_expressions {
706 JSXLexingState::LiteralContent {
707 last_char_was_open_chevron: false,
708 }
709 } else {
710 JSXLexingState::Content
711 };
712 continue;
713 }
714 chr if chr.is_whitespace() => {
715 *jsx_state = JSXLexingState::AttributeKey;
716 }
717 chr => {
718 return_err!(LexingErrors::InvalidCharacterInJSXTag(chr));
719 }
720 }
721 start = idx + chr.len_utf8();
722 }
723 },
724 JSXLexingState::SelfClosingTagClose => {
725 if chr == '>' {
726 *tag_depth = match tag_depth.checked_sub(1) {
727 Some(value) => value,
728 None => {
729 return_err!(LexingErrors::UnbalancedJSXClosingTags);
730 }
731 };
732 push_token!(TSXToken::JSXSelfClosingTag);
733 start = idx + 1;
734 if *tag_depth == 0 {
736 set_state!(LexingState::None);
737 } else {
738 *jsx_state = JSXLexingState::Content;
739 }
740 continue;
741 }
742 return_err!(LexingErrors::ExpectedClosingChevronAtEndOfSelfClosingTag);
743 }
744 JSXLexingState::AttributeKey => match chr {
745 '=' => {
746 if start >= idx {
747 return_err!(LexingErrors::EmptyAttributeName);
748 }
749 let key_slice = script[start..idx].trim();
750 if !key_slice.is_empty() {
751 push_token!(TSXToken::JSXAttributeKey(key_slice.to_owned()));
752 }
753 start = idx;
754 push_token!(TSXToken::JSXAttributeAssign);
755 *jsx_state = JSXLexingState::AttributeEqual;
756 start = idx + 1;
757 }
758 '{' => {
759 push_token!(TSXToken::JSXExpressionStart);
760 *interpolation_depth += 1;
761 state_stack.push(state);
762 set_state!(LexingState::None, EXPECT_EXPRESSION: true);
763 continue;
764 }
765 '/' => {
766 *jsx_state = JSXLexingState::SelfClosingTagClose;
767 }
768 '>' => {
769 if start < idx {
771 push_token!(TSXToken::JSXAttributeKey(
772 script[start..idx].to_owned()
773 ));
774 }
775 if *is_self_closing_tag {
776 *tag_depth = match tag_depth.checked_sub(1) {
777 Some(value) => value,
778 None => {
779 return_err!(LexingErrors::UnbalancedJSXClosingTags);
780 }
781 };
782 push_token!(TSXToken::JSXSelfClosingTag);
783 start = idx + 1;
784 if *tag_depth == 0 {
786 set_state!(LexingState::None);
787 } else {
788 *jsx_state = JSXLexingState::Content;
789 *is_self_closing_tag = false;
790 }
791 } else {
792 push_token!(TSXToken::JSXOpeningTagEnd);
793 start = idx + 1;
794 *jsx_state = if *no_inner_tags_or_expressions {
795 JSXLexingState::LiteralContent {
796 last_char_was_open_chevron: false,
797 }
798 } else {
799 JSXLexingState::Content
800 };
801 }
802 continue;
803 }
804 chr if chr.is_whitespace() => {
805 if start < idx {
806 push_token!(TSXToken::JSXAttributeKey(
807 script[start..idx].to_owned()
808 ));
809 }
810 start = idx + chr.len_utf8();
811 }
812 chr => {
813 let character_allowed = chr.is_alphanumeric()
814 || chr == '-' || (options
815 .allow_unsupported_characters_in_jsx_attribute_keys
816 && matches!(
817 chr,
818 '@' | ':' | '.' | '[' | ']' | '+' | '$' | '*' | '%'
819 ));
820 if !character_allowed {
821 return_err!(LexingErrors::InvalidCharacterInAttributeKey(chr));
822 }
823 }
824 },
825 JSXLexingState::AttributeEqual => {
826 let delimiter = match chr {
827 '{' if options.allow_expressions_in_jsx => {
828 push_token!(TSXToken::JSXExpressionStart);
829 *interpolation_depth += 1;
830 *jsx_state = JSXLexingState::AttributeKey;
831 state_stack.push(state);
832 set_state!(LexingState::None, EXPECT_EXPRESSION: true);
833 continue;
834 }
835 '"' => JSXAttributeValueDelimiter::DoubleQuote,
836 '\'' => JSXAttributeValueDelimiter::SingleQuote,
837 '>' => {
838 return_err!(LexingErrors::EmptyAttributeName);
839 }
840 _ => JSXAttributeValueDelimiter::None,
841 };
842 *jsx_state = JSXLexingState::AttributeValue(delimiter);
843 }
844 JSXLexingState::AttributeValue(delimiter) => match (delimiter, chr) {
845 (JSXAttributeValueDelimiter::DoubleQuote, '"')
846 | (JSXAttributeValueDelimiter::SingleQuote, '\'') => {
847 push_token!(TSXToken::JSXAttributeValue(
848 script[(start + 1)..idx].to_owned()
849 ));
850 *jsx_state = JSXLexingState::AttributeKey;
851 start = idx + 1;
852 continue;
853 }
854 (JSXAttributeValueDelimiter::None, ' ') => {
855 push_token!(TSXToken::JSXAttributeValue(script[start..idx].to_owned()));
856 *jsx_state = JSXLexingState::AttributeKey;
857 start = idx;
858 }
859 (JSXAttributeValueDelimiter::None, '>') => {
860 push_token!(TSXToken::JSXAttributeValue(script[start..idx].to_owned()));
861 if *is_self_closing_tag {
862 *tag_depth = match tag_depth.checked_sub(1) {
863 Some(value) => value,
864 None => {
865 return_err!(LexingErrors::UnbalancedJSXClosingTags);
866 }
867 };
868 push_token!(TSXToken::JSXSelfClosingTag);
869 start = idx + 1;
870 if *tag_depth == 0 {
872 set_state!(LexingState::None);
873 } else {
874 *jsx_state = JSXLexingState::Content;
875 *is_self_closing_tag = false;
876 }
877 } else {
878 push_token!(TSXToken::JSXOpeningTagEnd);
879 start = idx + 1;
880 *jsx_state = if *no_inner_tags_or_expressions {
881 JSXLexingState::LiteralContent {
882 last_char_was_open_chevron: false,
883 }
884 } else {
885 JSXLexingState::Content
886 };
887 }
888 continue;
889 }
890 _ => {}
891 },
892 JSXLexingState::Content => {
893 match chr {
894 '<' => {
895 let content_slice = &script[start..idx];
896 if !content_slice.trim().is_empty() {
897 push_token!(TSXToken::JSXContent(content_slice.to_owned()));
898 }
899 *jsx_state = JSXLexingState::TagName {
900 direction: JSXTagNameDirection::Opening,
901 lexed_start: false,
902 };
903 start = idx;
904 }
905 '{' if options.allow_expressions_in_jsx => {
906 let content_slice = &script[start..idx];
907 if !content_slice.trim().is_empty() {
908 push_token!(TSXToken::JSXContent(content_slice.to_owned()));
909 }
910 push_token!(TSXToken::JSXExpressionStart);
911 *interpolation_depth += 1;
912 state_stack.push(state);
913 set_state!(LexingState::None, EXPECT_EXPRESSION: true);
914 continue;
915 }
916 '\n' => {
917 let source = script[start..idx].trim();
918 if !source.is_empty() {
919 push_token!(TSXToken::JSXContent(source.to_owned()));
920 start = idx;
921 }
922 push_token!(TSXToken::JSXContentLineBreak);
923 start = idx + 1;
924 }
925 _ => {}
927 }
928 }
929 JSXLexingState::LiteralContent { ref mut last_char_was_open_chevron } => {
930 match chr {
931 '<' => {
932 *last_char_was_open_chevron = true;
933 }
934 '/' if *last_char_was_open_chevron => {
935 let end = idx - '<'.len_utf8();
936 let source = script[start..end].trim();
937 if !source.is_empty() {
938 push_token!(TSXToken::JSXContent(source.to_owned()));
939 }
940 start = end;
941 push_token!(TSXToken::JSXClosingTagStart);
942 start = idx + '/'.len_utf8();
943 *jsx_state = JSXLexingState::TagName {
944 direction: JSXTagNameDirection::Closing,
945 lexed_start: true,
946 };
947 *no_inner_tags_or_expressions = false;
948 }
949 _ => {
950 *last_char_was_open_chevron = false;
951 }
952 }
953 }
954 JSXLexingState::Comment => {
956 if idx - start < 4 {
957 if chr != '-' {
958 return_err!(LexingErrors::ExpectedDashInComment);
959 }
960 } else if chr == '>' && script[..idx].ends_with("--") {
961 push_token!(TSXToken::JSXComment(
962 script[(start + 4)..(idx - 2)].to_owned()
963 ));
964 start = idx + 1;
965 if *tag_depth == 0 {
966 set_state!(if options.top_level_html {
967 DEFAULT_JSX_LEXING_STATE
968 } else {
969 LexingState::None
970 });
971 } else {
972 *jsx_state = JSXLexingState::Content;
973 }
974 continue;
975 }
976 }
977 }
978 }
979 LexingState::None => {}
980 }
981
982 if state == LexingState::None {
984 match chr {
985 '0' if matches!(script.as_bytes().get(idx + 1), Some(b'0'..=b'7')) => {
986 set_state!(LexingState::Number(NumberLiteralType::OctalLiteral));
988 }
989 '0'..='9' => set_state!(LexingState::Number(Default::default())),
990 '"' => set_state!(LexingState::String { double_quoted: true, escaped: false }),
991 '\'' => set_state!(LexingState::String { double_quoted: false, escaped: false }),
992 '_' | '$' => {
993 set_state!(LexingState::Identifier);
994 }
995 chr if chr.is_alphabetic() => {
996 set_state!(LexingState::Identifier);
997 }
998 chr if chr.is_whitespace() => {
999 continue;
1000 }
1001 chr => {
1002 match (chr, state_stack.last_mut()) {
1004 (
1005 '}',
1006 Some(LexingState::TemplateLiteral {
1007 ref mut interpolation_depth, ..
1008 }),
1009 ) => {
1010 *interpolation_depth -= 1;
1011 if *interpolation_depth == 0 {
1012 push_token!(TSXToken::TemplateLiteralExpressionEnd);
1013 start = idx + '}'.len_utf8();
1014 state = state_stack.pop().unwrap();
1015 continue;
1016 }
1017 }
1018 (
1019 '}',
1020 Some(LexingState::JSXLiteral { ref mut interpolation_depth, .. }),
1021 ) => {
1022 *interpolation_depth -= 1;
1023 if *interpolation_depth == 0 {
1024 push_token!(TSXToken::JSXExpressionEnd);
1025 start = idx + '}'.len_utf8();
1026 state = state_stack.pop().unwrap();
1027 continue;
1028 }
1029 }
1030 (
1031 '{',
1032 Some(
1033 LexingState::JSXLiteral { ref mut interpolation_depth, .. }
1034 | LexingState::TemplateLiteral {
1035 ref mut interpolation_depth, ..
1036 },
1037 ),
1038 ) => {
1039 *interpolation_depth += 1;
1041 }
1042 (_, _) => {}
1043 }
1044
1045 start = idx;
1046
1047 match (expect_expression, chr) {
1049 (_, '`') => {
1050 push_token!(TSXToken::TemplateLiteralStart);
1051 start = idx + 1;
1052 state = LexingState::TemplateLiteral {
1053 interpolation_depth: 0,
1054 last_char_was_dollar: false,
1055 escaped: false,
1056 };
1057 }
1058 (true, '<') if options.lex_jsx => {
1059 set_state!(FIRST_CHEVRON_JSX_LEXING_STATE);
1060 }
1061 (true, '/') => {
1062 state = LexingState::RegexLiteral {
1063 escaped: false,
1064 after_last_slash: false,
1065 in_set: false,
1066 };
1067 }
1068 (true, '.') => {
1069 state = LexingState::Number(NumberLiteralType::Decimal {
1070 fractional: true,
1071 });
1072 }
1073 (_, _) => {
1074 let automaton = TSXToken::new_automaton();
1076 match automaton.get_next(chr) {
1077 GetNextResult::Result {
1078 result,
1079 ate_character: _, } => {
1081 expect_expression = result.is_expression_prefix();
1082 push_token!(result);
1083 }
1084 GetNextResult::NewState(new_state) => {
1085 state = LexingState::Symbol(new_state);
1086 }
1087 GetNextResult::InvalidCharacter(err) => {
1088 return_err!(LexingErrors::UnexpectedCharacter(err));
1089 }
1090 }
1091 }
1092 }
1093 }
1094 }
1095 }
1096 }
1097
1098 match state {
1100 LexingState::Number(literal_type) => {
1101 if script[start..].trim_end() == "."
1103 || script.ends_with(['x', 'X', 'o', 'O', '_', '-'])
1104 || (!matches!(literal_type, NumberLiteralType::HexadecimalLiteral)
1105 && script.ends_with(['e', 'E', 'b', 'B']))
1106 {
1107 return_err!(LexingErrors::UnexpectedEndToNumberLiteral)
1108 }
1109 sender.push(Token(
1110 TSXToken::NumberLiteral(script[start..].to_owned()),
1111 TokenStart::new(start as u32 + offset),
1112 ));
1113 }
1114 LexingState::Identifier => {
1115 sender.push(Token(
1116 TSXToken::from_slice(&script[start..]),
1117 TokenStart::new(start as u32 + offset),
1118 ));
1119 }
1120 LexingState::Symbol(symbol_state) => {
1121 match symbol_state.get_next(0 as char) {
1124 GetNextResult::Result {
1125 result,
1126 ate_character: _, } => {
1128 sender.push(Token(result, TokenStart::new(start as u32 + offset)));
1129 }
1130 GetNextResult::NewState(_new_state) => unreachable!(),
1131 GetNextResult::InvalidCharacter(err) => {
1132 return_err!(LexingErrors::UnexpectedCharacter(err));
1133 }
1134 }
1135 }
1136 LexingState::SingleLineComment => {
1137 let content = &script[(start + 2)..];
1138 if options.comments.should_add_comment(content) {
1139 sender.push(Token(
1140 TSXToken::Comment(content.trim_end().to_owned()),
1141 TokenStart::new(start as u32 + offset),
1142 ));
1143 }
1144 }
1145 LexingState::MultiLineComment { .. } => {
1146 return_err!(LexingErrors::ExpectedEndToMultilineComment);
1147 }
1148 LexingState::String { .. } => {
1149 return_err!(LexingErrors::ExpectedEndToStringLiteral);
1150 }
1151 LexingState::RegexLiteral { after_last_slash, .. } => {
1153 if after_last_slash {
1154 sender.push(Token(
1155 TSXToken::RegexFlagLiteral(script[start..].to_owned()),
1156 TokenStart::new(start as u32 + offset),
1157 ));
1158 sender.push(Token(TSXToken::EOS, TokenStart::new(script.len() as u32)));
1159 } else {
1160 sender.push(Token(TSXToken::EOS, TokenStart::new(script.len() as u32)));
1161 return_err!(LexingErrors::ExpectedEndToRegexLiteral);
1162 }
1163 }
1164 LexingState::JSXLiteral { state, .. } => {
1165 if !matches!(state, JSXLexingState::ExpectingOpenChevron) {
1166 return_err!(LexingErrors::ExpectedEndToJSXLiteral);
1167 }
1168 }
1169 LexingState::TemplateLiteral { .. } => {
1170 return_err!(LexingErrors::ExpectedEndToTemplateLiteral);
1171 }
1172 LexingState::None => {}
1173 }
1174
1175 sender.push(Token(TSXToken::EOS, TokenStart::new(script.len() as u32)));
1176
1177 Ok(())
1178}