1use crate::char::CharFilter;
2use crate::char::DIGIT;
3use crate::char::DIGIT_BIN;
4use crate::char::DIGIT_HEX;
5use crate::char::DIGIT_OCT;
6use crate::char::ID_CONTINUE;
7use crate::char::ID_CONTINUE_JSX;
8use crate::char::ID_START;
9use crate::char::ID_START_CHARSTR;
10use crate::char::WHITESPACE;
11use crate::error::SyntaxError;
12use crate::error::SyntaxErrorType;
13use crate::error::SyntaxResult;
14use crate::loc::Loc;
15use crate::token::Token;
16use crate::token::TokenType;
17use ahash::AHashMap;
18use aho_corasick::AhoCorasick;
19use aho_corasick::AhoCorasickBuilder;
20use aho_corasick::AhoCorasickKind;
21use aho_corasick::Anchored;
22use aho_corasick::Input;
23use aho_corasick::MatchKind;
24use aho_corasick::StartKind;
25use core::ops::Index;
26use memchr::memchr;
27use memchr::memchr2;
28use memchr::memchr3;
29use once_cell::sync::Lazy;
30
31mod tests;
32
33#[derive(Copy, Clone, Eq, PartialEq)]
34pub enum LexMode {
35 JsxTag,
36 JsxTextContent,
37 SlashIsRegex,
38 Standard,
39}
40
41#[derive(Copy, Clone)]
42pub struct LexerCheckpoint {
43 next: usize,
44}
45
46#[derive(Copy, Clone)]
47struct Match {
48 len: usize,
49}
50
51impl Match {
52 pub fn len(&self) -> usize {
53 self.len
54 }
55
56 pub fn prefix(&self, n: usize) -> Match {
57 debug_assert!(n <= self.len);
58 Match { len: n }
59 }
60
61 pub fn is_empty(&self) -> bool {
62 self.len == 0
63 }
64}
65
66#[derive(Copy, Clone)]
67struct AhoCorasickMatch {
68 id: usize,
69 mat: Match,
70}
71
72pub struct Lexer<'a> {
73 source: &'a [u8],
74 next: usize,
75}
76
77impl<'a> Lexer<'a> {
78 pub fn new(code: &'a [u8]) -> Lexer<'a> {
79 Lexer {
80 source: code,
81 next: 0,
82 }
83 }
84
85 fn end(&self) -> usize {
86 self.source.len()
87 }
88
89 fn remaining(&self) -> usize {
90 self.end() - self.next
91 }
92
93 pub fn source_range(&self) -> Loc {
94 Loc(0, self.end())
95 }
96
97 fn eof_range(&self) -> Loc {
98 Loc(self.end(), self.end())
99 }
100
101 fn error(&self, typ: SyntaxErrorType) -> SyntaxError {
102 Loc(self.next, self.end()).error(typ, None)
103 }
104
105 fn at_end(&self) -> bool {
106 self.next >= self.end()
107 }
108
109 fn peek(&self, n: usize) -> SyntaxResult<u8> {
110 self
111 .peek_or_eof(n)
112 .ok_or_else(|| self.error(SyntaxErrorType::UnexpectedEnd))
113 }
114
115 fn peek_or_eof(&self, n: usize) -> Option<u8> {
116 self.source.get(self.next + n).copied()
117 }
118
119 pub fn checkpoint(&self) -> LexerCheckpoint {
120 LexerCheckpoint { next: self.next }
121 }
122
123 pub fn since_checkpoint(&self, checkpoint: LexerCheckpoint) -> Loc {
124 Loc(checkpoint.next, self.next)
125 }
126
127 pub fn apply_checkpoint(&mut self, checkpoint: LexerCheckpoint) {
128 self.next = checkpoint.next;
129 }
130
131 fn n(&self, n: usize) -> SyntaxResult<Match> {
132 if self.next + n > self.end() {
133 return Err(self.error(SyntaxErrorType::UnexpectedEnd));
134 };
135 Ok(Match { len: n })
136 }
137
138 fn if_char(&self, c: u8) -> Match {
139 Match {
140 len: (!self.at_end() && self.source[self.next] == c) as usize,
141 }
142 }
143
144 fn through_char_or_end(&self, c: u8) -> Match {
145 memchr(c, &self.source[self.next..])
146 .map(|pos| Match { len: pos + 1 })
147 .unwrap_or_else(|| Match {
148 len: self.remaining(),
149 })
150 }
151
152 fn through_char(&self, c: u8) -> SyntaxResult<Match> {
153 memchr(c, &self.source[self.next..])
154 .map(|pos| Match { len: pos + 1 })
155 .ok_or_else(|| self.error(SyntaxErrorType::UnexpectedEnd))
156 }
157
158 fn while_not_char(&self, a: u8) -> Match {
159 Match {
160 len: memchr(a, &self.source[self.next..]).unwrap_or(self.remaining()),
161 }
162 }
163
164 fn while_not_2_chars(&self, a: u8, b: u8) -> Match {
165 Match {
166 len: memchr2(a, b, &self.source[self.next..]).unwrap_or(self.remaining()),
167 }
168 }
169
170 fn while_not_3_chars(&self, a: u8, b: u8, c: u8) -> Match {
171 Match {
172 len: memchr3(a, b, c, &self.source[self.next..]).unwrap_or(self.remaining()),
173 }
174 }
175
176 fn while_chars(&self, chars: &CharFilter) -> Match {
177 let mut len = 0;
178 while len < self.remaining() && chars.has(self.source[self.next + len]) {
179 len += 1;
180 }
181 Match { len }
182 }
183
184 fn aho_corasick(&self, ac: &AhoCorasick, anchored: Anchored) -> SyntaxResult<AhoCorasickMatch> {
185 ac.find(Input::new(&self.source[self.next..]).anchored(anchored))
186 .map(|m| AhoCorasickMatch {
187 id: m.pattern().as_usize(),
188 mat: Match { len: m.end() },
189 })
190 .ok_or_else(|| self.error(SyntaxErrorType::ExpectedNotFound))
191 }
192
193 fn range(&self, m: Match) -> Loc {
194 Loc(self.next, self.next + m.len)
195 }
196
197 fn consume(&mut self, m: Match) -> Match {
198 self.next += m.len;
199 m
200 }
201
202 fn consume_next(&mut self) -> SyntaxResult<u8> {
203 let c = self.peek(0)?;
204 self.next += 1;
205 Ok(c)
206 }
207
208 fn skip_expect(&mut self, n: usize) {
209 debug_assert!(self.next + n <= self.end());
210 self.next += n;
211 }
212}
213
214impl<'a> Index<Loc> for Lexer<'a> {
215 type Output = [u8];
216
217 fn index(&self, index: Loc) -> &Self::Output {
218 &self.source[index.0..index.1]
219 }
220}
221
222impl<'a> Index<Match> for Lexer<'a> {
223 type Output = [u8];
224
225 fn index(&self, index: Match) -> &Self::Output {
226 &self.source[self.next - index.len..self.next]
227 }
228}
229
230#[rustfmt::skip]
231pub static OPERATORS_MAPPING: Lazy<AHashMap<TokenType, &'static [u8]>> = Lazy::new(|| {
232 let mut map = AHashMap::<TokenType, &'static [u8]>::new();
233 map.insert(TokenType::Ampersand, b"&");
234 map.insert(TokenType::AmpersandAmpersand, b"&&");
235 map.insert(TokenType::AmpersandAmpersandEquals, b"&&=");
236 map.insert(TokenType::AmpersandEquals, b"&=");
237 map.insert(TokenType::Asterisk, b"*");
238 map.insert(TokenType::AsteriskAsterisk, b"**");
239 map.insert(TokenType::AsteriskAsteriskEquals, b"**=");
240 map.insert(TokenType::AsteriskEquals, b"*=");
241 map.insert(TokenType::Bar, b"|");
242 map.insert(TokenType::BarBar, b"||");
243 map.insert(TokenType::BarBarEquals, b"||=");
244 map.insert(TokenType::BarEquals, b"|=");
245 map.insert(TokenType::BraceClose, b"}");
246 map.insert(TokenType::BraceOpen, b"{");
247 map.insert(TokenType::BracketClose, b"]");
248 map.insert(TokenType::BracketOpen, b"[");
249 map.insert(TokenType::Caret, b"^");
250 map.insert(TokenType::CaretEquals, b"^=");
251 map.insert(TokenType::ChevronLeft, b"<");
252 map.insert(TokenType::ChevronLeftChevronLeft, b"<<");
253 map.insert(TokenType::ChevronLeftChevronLeftEquals, b"<<=");
254 map.insert(TokenType::ChevronLeftEquals, b"<=");
255 map.insert(TokenType::ChevronRight, b">");
256 map.insert(TokenType::ChevronRightChevronRight, b">>");
257 map.insert(TokenType::ChevronRightChevronRightChevronRight, b">>>");
258 map.insert(TokenType::ChevronRightChevronRightChevronRightEquals, b">>>=");
259 map.insert(TokenType::ChevronRightChevronRightEquals, b">>=");
260 map.insert(TokenType::ChevronRightEquals, b">=");
261 map.insert(TokenType::Colon, b":");
262 map.insert(TokenType::Comma, b",");
263 map.insert(TokenType::Dot, b".");
264 map.insert(TokenType::DotDotDot, b"...");
265 map.insert(TokenType::Equals, b"=");
266 map.insert(TokenType::EqualsChevronRight, b"=>");
267 map.insert(TokenType::EqualsEquals, b"==");
268 map.insert(TokenType::EqualsEqualsEquals, b"===");
269 map.insert(TokenType::Exclamation, b"!");
270 map.insert(TokenType::ExclamationEquals, b"!=");
271 map.insert(TokenType::ExclamationEqualsEquals, b"!==");
272 map.insert(TokenType::Hyphen, b"-");
273 map.insert(TokenType::HyphenEquals, b"-=");
274 map.insert(TokenType::HyphenHyphen, b"--");
275 map.insert(TokenType::ParenthesisClose, b")");
276 map.insert(TokenType::ParenthesisOpen, b"(");
277 map.insert(TokenType::Percent, b"%");
278 map.insert(TokenType::PercentEquals, b"%=");
279 map.insert(TokenType::Plus, b"+");
280 map.insert(TokenType::PlusEquals, b"+=");
281 map.insert(TokenType::PlusPlus, b"++");
282 map.insert(TokenType::PrivateMember, b"#");
283 map.insert(TokenType::Question, b"?");
284 map.insert(TokenType::QuestionDot, b"?.");
285 map.insert(TokenType::QuestionDotBracketOpen, b"?.[");
286 map.insert(TokenType::QuestionDotParenthesisOpen, b"?.(");
287 map.insert(TokenType::QuestionQuestion, b"??");
288 map.insert(TokenType::QuestionQuestionEquals, b"??=");
289 map.insert(TokenType::Semicolon, b";");
290 map.insert(TokenType::Slash, b"/");
291 map.insert(TokenType::SlashEquals, b"/=");
292 map.insert(TokenType::Tilde, b"~");
293 map
294});
295
296pub static KEYWORDS_MAPPING: Lazy<AHashMap<TokenType, &'static [u8]>> = Lazy::new(|| {
297 let mut map = AHashMap::<TokenType, &'static [u8]>::new();
298 map.insert(TokenType::KeywordAs, b"as");
299 map.insert(TokenType::KeywordAsync, b"async");
300 map.insert(TokenType::KeywordAwait, b"await");
301 map.insert(TokenType::KeywordBreak, b"break");
302 map.insert(TokenType::KeywordCase, b"case");
303 map.insert(TokenType::KeywordCatch, b"catch");
304 map.insert(TokenType::KeywordClass, b"class");
305 map.insert(TokenType::KeywordConst, b"const");
306 map.insert(TokenType::KeywordConstructor, b"constructor");
307 map.insert(TokenType::KeywordContinue, b"continue");
308 map.insert(TokenType::KeywordDebugger, b"debugger");
309 map.insert(TokenType::KeywordDefault, b"default");
310 map.insert(TokenType::KeywordDelete, b"delete");
311 map.insert(TokenType::KeywordDo, b"do");
312 map.insert(TokenType::KeywordElse, b"else");
313 map.insert(TokenType::KeywordEnum, b"enum");
314 map.insert(TokenType::KeywordExport, b"export");
315 map.insert(TokenType::KeywordExtends, b"extends");
316 map.insert(TokenType::KeywordFinally, b"finally");
317 map.insert(TokenType::KeywordFor, b"for");
318 map.insert(TokenType::KeywordFrom, b"from");
319 map.insert(TokenType::KeywordFunction, b"function");
320 map.insert(TokenType::KeywordGet, b"get");
321 map.insert(TokenType::KeywordIf, b"if");
322 map.insert(TokenType::KeywordImport, b"import");
323 map.insert(TokenType::KeywordIn, b"in");
324 map.insert(TokenType::KeywordInstanceof, b"instanceof");
325 map.insert(TokenType::KeywordLet, b"let");
326 map.insert(TokenType::KeywordNew, b"new");
327 map.insert(TokenType::KeywordOf, b"of");
328 map.insert(TokenType::KeywordReturn, b"return");
329 map.insert(TokenType::KeywordSet, b"set");
330 map.insert(TokenType::KeywordStatic, b"static");
331 map.insert(TokenType::KeywordSuper, b"super");
332 map.insert(TokenType::KeywordSwitch, b"switch");
333 map.insert(TokenType::KeywordThis, b"this");
334 map.insert(TokenType::KeywordThrow, b"throw");
335 map.insert(TokenType::KeywordTry, b"try");
336 map.insert(TokenType::KeywordTypeof, b"typeof");
337 map.insert(TokenType::KeywordVar, b"var");
338 map.insert(TokenType::KeywordVoid, b"void");
339 map.insert(TokenType::KeywordWhile, b"while");
340 map.insert(TokenType::KeywordWith, b"with");
341 map.insert(TokenType::KeywordYield, b"yield");
342 map.insert(TokenType::LiteralFalse, b"false");
343 map.insert(TokenType::LiteralNull, b"null");
344 map.insert(TokenType::LiteralTrue, b"true");
345 map
346});
347
348pub static KEYWORD_STRS: Lazy<AHashMap<&'static [u8], usize>> = Lazy::new(|| {
349 AHashMap::<&'static [u8], usize>::from_iter(
350 KEYWORDS_MAPPING.values().enumerate().map(|(i, v)| (*v, i)),
351 )
352});
353
354static PATTERNS: Lazy<Vec<(TokenType, &'static [u8])>> = Lazy::new(|| {
356 let mut patterns: Vec<(TokenType, &'static [u8])> = Vec::new();
357 for (&k, &v) in OPERATORS_MAPPING.iter() {
358 patterns.push((k, v));
359 }
360 for (&k, &v) in KEYWORDS_MAPPING.iter() {
361 patterns.push((k, &v));
362 }
363 patterns.push((TokenType::ChevronLeftSlash, b"</"));
364 patterns.push((TokenType::CommentMultiple, b"/*"));
365 patterns.push((TokenType::CommentSingle, b"//"));
366 for c in ID_START_CHARSTR.chunks(1) {
367 patterns.push((TokenType::Identifier, c));
368 }
369 for c in b"0123456789".chunks(1) {
370 patterns.push((TokenType::LiteralNumber, c));
371 }
372 patterns.push((TokenType::LiteralNumberBin, b"0b"));
373 patterns.push((TokenType::LiteralNumberBin, b"0B"));
374 patterns.push((TokenType::LiteralNumberHex, b"0x"));
375 patterns.push((TokenType::LiteralNumberHex, b"0X"));
376 patterns.push((TokenType::LiteralNumberOct, b"0o"));
377 patterns.push((TokenType::LiteralNumberOct, b"0O"));
378 for c in b".0.1.2.3.4.5.6.7.8.9".chunks(2) {
380 patterns.push((TokenType::LiteralNumber, c));
381 }
382 for c in b"?.0?.1?.2?.3?.4?.5?.6?.7?.8?.9".chunks(3) {
384 patterns.push((TokenType::Question, c));
385 }
386 patterns.push((TokenType::LiteralString, b"\""));
387 patterns.push((TokenType::LiteralString, b"'"));
388 patterns.push((TokenType::LiteralTemplatePartString, b"`"));
389 patterns
390});
391
392static MATCHER: Lazy<AhoCorasick> = Lazy::new(|| {
393 AhoCorasickBuilder::new()
394 .start_kind(StartKind::Anchored)
395 .kind(Some(AhoCorasickKind::DFA))
396 .match_kind(MatchKind::LeftmostLongest)
397 .build(PATTERNS.iter().map(|(_, pat)| pat))
398 .unwrap()
399});
400
401static COMMENT_END: Lazy<AhoCorasick> = Lazy::new(|| AhoCorasick::new([b"*/"]).unwrap());
402
403fn lex_multiple_comment(lexer: &mut Lexer<'_>) -> SyntaxResult<()> {
404 lexer.skip_expect(2);
406 lexer.consume(lexer.aho_corasick(&COMMENT_END, Anchored::No)?.mat);
407 Ok(())
408}
409
410fn lex_single_comment(lexer: &mut Lexer<'_>) -> SyntaxResult<()> {
411 lexer.skip_expect(2);
413 lexer.consume(lexer.through_char_or_end(b'\n'));
415 Ok(())
416}
417
418fn lex_identifier(
419 lexer: &mut Lexer<'_>,
420 mode: LexMode,
421 preceded_by_line_terminator: bool,
422) -> SyntaxResult<Token> {
423 let cp = lexer.checkpoint();
424 lexer.skip_expect(1);
426 loop {
427 lexer.consume(lexer.while_chars(if mode == LexMode::JsxTag {
428 &ID_CONTINUE_JSX
429 } else {
430 &ID_CONTINUE
431 }));
432 if lexer.peek_or_eof(0).filter(|c| !c.is_ascii()).is_none() {
434 break;
435 };
436 lexer.skip_expect(1);
437 }
438 Ok(Token::new(
439 lexer.since_checkpoint(cp),
440 TokenType::Identifier,
441 preceded_by_line_terminator,
442 ))
443}
444
445fn lex_bigint_or_number(
446 lexer: &mut Lexer<'_>,
447 preceded_by_line_terminator: bool,
448) -> SyntaxResult<Token> {
449 let cp = lexer.checkpoint();
450 lexer.consume(lexer.while_chars(&DIGIT));
452 if !lexer.consume(lexer.if_char(b'n')).is_empty() {
453 return Ok(Token::new(
454 lexer.since_checkpoint(cp),
455 TokenType::LiteralBigInt,
456 preceded_by_line_terminator,
457 ));
458 }
459 lexer.consume(lexer.if_char(b'.'));
460 lexer.consume(lexer.while_chars(&DIGIT));
461 if lexer
462 .peek_or_eof(0)
463 .filter(|&c| c == b'e' || c == b'E')
464 .is_some()
465 {
466 lexer.skip_expect(1);
467 match lexer.peek(0)? {
468 b'+' | b'-' => lexer.skip_expect(1),
469 _ => {}
470 };
471 lexer.consume(lexer.while_chars(&DIGIT));
472 }
473 Ok(Token::new(
474 lexer.since_checkpoint(cp),
475 TokenType::LiteralNumber,
476 preceded_by_line_terminator,
477 ))
478}
479
480fn lex_bigint_or_number_bin(
481 lexer: &mut Lexer<'_>,
482 preceded_by_line_terminator: bool,
483) -> SyntaxResult<Token> {
484 let cp = lexer.checkpoint();
485 lexer.skip_expect(2);
486 lexer.consume(lexer.while_chars(&DIGIT_BIN));
487 if !lexer.consume(lexer.if_char(b'n')).is_empty() {
488 return Ok(Token::new(
489 lexer.since_checkpoint(cp),
490 TokenType::LiteralBigInt,
491 preceded_by_line_terminator,
492 ));
493 }
494 Ok(Token::new(
495 lexer.since_checkpoint(cp),
496 TokenType::LiteralNumber,
497 preceded_by_line_terminator,
498 ))
499}
500
501fn lex_bigint_or_number_hex(
502 lexer: &mut Lexer<'_>,
503 preceded_by_line_terminator: bool,
504) -> SyntaxResult<Token> {
505 let cp = lexer.checkpoint();
506 lexer.skip_expect(2);
507 lexer.consume(lexer.while_chars(&DIGIT_HEX));
508 if !lexer.consume(lexer.if_char(b'n')).is_empty() {
509 return Ok(Token::new(
510 lexer.since_checkpoint(cp),
511 TokenType::LiteralBigInt,
512 preceded_by_line_terminator,
513 ));
514 }
515 Ok(Token::new(
516 lexer.since_checkpoint(cp),
517 TokenType::LiteralNumber,
518 preceded_by_line_terminator,
519 ))
520}
521
522fn lex_bigint_or_number_oct(
523 lexer: &mut Lexer<'_>,
524 preceded_by_line_terminator: bool,
525) -> SyntaxResult<Token> {
526 let cp = lexer.checkpoint();
527 lexer.skip_expect(2);
528 lexer.consume(lexer.while_chars(&DIGIT_OCT));
529 if !lexer.consume(lexer.if_char(b'n')).is_empty() {
530 return Ok(Token::new(
531 lexer.since_checkpoint(cp),
532 TokenType::LiteralBigInt,
533 preceded_by_line_terminator,
534 ));
535 }
536 Ok(Token::new(
537 lexer.since_checkpoint(cp),
538 TokenType::LiteralNumber,
539 preceded_by_line_terminator,
540 ))
541}
542
543fn lex_private_member(
544 lexer: &mut Lexer<'_>,
545 preceded_by_line_terminator: bool,
546) -> SyntaxResult<Token> {
547 let cp = lexer.checkpoint();
548 lexer.skip_expect(1);
550 if !ID_START.has(lexer.peek(0)?) {
551 return Err(lexer.error(SyntaxErrorType::ExpectedSyntax("private member")));
552 };
553 lexer.skip_expect(1);
554 loop {
556 lexer.consume(lexer.while_chars(&ID_CONTINUE));
557 if lexer.peek_or_eof(0).filter(|c| !c.is_ascii()).is_none() {
559 break;
560 };
561 lexer.skip_expect(1);
562 }
563 Ok(Token::new(
564 lexer.since_checkpoint(cp),
565 TokenType::PrivateMember,
566 preceded_by_line_terminator,
567 ))
568}
569
570fn lex_regex(lexer: &mut Lexer<'_>, preceded_by_line_terminator: bool) -> SyntaxResult<Token> {
572 let cp = lexer.checkpoint();
573 lexer.consume(lexer.n(1)?);
575 let mut in_charset = false;
576 loop {
577 match lexer.consume_next()? {
579 b'\\' => {
580 if lexer.peek(1)? == b'\n' {
583 return Err(lexer.error(SyntaxErrorType::LineTerminatorInRegex));
584 };
585 lexer.skip_expect(1);
586 }
587 b'/' if !in_charset => {
588 break;
589 }
590 b'[' => {
591 in_charset = true;
592 }
593 b']' if in_charset => {
594 in_charset = false;
595 }
596 b'\n' => {
597 return Err(lexer.error(SyntaxErrorType::LineTerminatorInRegex));
598 }
599 _ => {}
600 };
601 }
602 lexer.consume(lexer.while_chars(&ID_CONTINUE));
603 Ok(Token::new(
604 lexer.since_checkpoint(cp),
605 TokenType::LiteralRegex,
606 preceded_by_line_terminator,
607 ))
608}
609
610fn lex_string(lexer: &mut Lexer<'_>, preceded_by_line_terminator: bool) -> SyntaxResult<Token> {
612 let cp = lexer.checkpoint();
613 let quote = lexer.peek(0)?;
614 lexer.skip_expect(1);
615 loop {
616 lexer.consume(lexer.while_not_3_chars(b'\\', b'\n', quote));
618 match lexer.peek(0)? {
619 b'\\' => {
620 lexer.consume(lexer.n(2)?);
621 }
622 b'\n' => {
623 return Err(lexer.error(SyntaxErrorType::LineTerminatorInString));
624 }
625 c if c == quote => {
626 lexer.skip_expect(1);
627 break;
628 }
629 _ => unreachable!(),
630 };
631 }
632 Ok(Token::new(
633 lexer.since_checkpoint(cp),
634 TokenType::LiteralString,
635 preceded_by_line_terminator,
636 ))
637}
638
639pub fn lex_template_string_continue(
640 lexer: &mut Lexer<'_>,
641 preceded_by_line_terminator: bool,
642) -> SyntaxResult<Token> {
643 let cp = lexer.checkpoint();
644 let mut ended = false;
645 let loc = loop {
646 lexer.consume(lexer.while_not_3_chars(b'\\', b'`', b'$'));
647 match lexer.peek(0)? {
648 b'\\' => {
649 lexer.consume(lexer.n(2)?);
650 }
651 b'`' => {
652 ended = true;
653 let loc = Some(lexer.since_checkpoint(cp));
654 lexer.skip_expect(1);
655 break loc;
656 }
657 b'$' => {
658 if lexer.peek(1)? == b'{' {
659 let loc = Some(lexer.since_checkpoint(cp));
660 lexer.skip_expect(2);
661 break loc;
662 } else {
663 lexer.skip_expect(1);
664 }
665 }
666 _ => unreachable!(),
667 };
668 };
669 Ok(Token::new(
670 loc.unwrap(),
671 if ended {
672 TokenType::LiteralTemplatePartStringEnd
673 } else {
674 TokenType::LiteralTemplatePartString
675 },
676 preceded_by_line_terminator,
677 ))
678}
679
680fn lex_template(lexer: &mut Lexer<'_>, preceded_by_line_terminator: bool) -> SyntaxResult<Token> {
682 lexer.skip_expect(1);
684 lex_template_string_continue(lexer, preceded_by_line_terminator)
685}
686
687pub fn lex_next(lexer: &mut Lexer<'_>, mode: LexMode) -> SyntaxResult<Token> {
688 let mut preceded_by_line_terminator = false;
689 loop {
690 if mode == LexMode::JsxTextContent {
691 let cp = lexer.checkpoint();
692 lexer.consume(lexer.while_not_2_chars(b'{', b'<'));
694 return Ok(Token::new(
695 lexer.since_checkpoint(cp),
696 TokenType::JsxTextContent,
697 false,
698 ));
699 };
700
701 let ws = lexer.while_chars(&WHITESPACE);
702 lexer.consume(ws);
703 preceded_by_line_terminator =
706 preceded_by_line_terminator || memchr(b'\n', &lexer[ws]).is_some();
707
708 if lexer.at_end() {
709 return Ok(Token::new(
710 lexer.eof_range(),
711 TokenType::EOF,
712 preceded_by_line_terminator,
713 ));
714 };
715
716 let is_utf8_start = if let Some(c) = lexer.peek_or_eof(0) {
718 c >> 5 == 0b110 || c >> 4 == 0b1110 || c >> 3 == 0b11110
719 } else {
720 false
721 };
722
723 if is_utf8_start {
724 return lex_identifier(lexer, mode, preceded_by_line_terminator);
725 };
726
727 let AhoCorasickMatch { id, mut mat } = lexer.aho_corasick(&MATCHER, Anchored::Yes)?;
728 match PATTERNS[id].0 {
729 TokenType::CommentMultiple => lex_multiple_comment(lexer)?,
730 TokenType::CommentSingle => {
731 preceded_by_line_terminator = true;
733 lex_single_comment(lexer)?
734 }
735 pat => {
736 return match pat {
737 TokenType::Identifier => lex_identifier(lexer, mode, preceded_by_line_terminator),
738 TokenType::LiteralNumber => lex_bigint_or_number(lexer, preceded_by_line_terminator),
739 TokenType::LiteralNumberBin => {
740 lex_bigint_or_number_bin(lexer, preceded_by_line_terminator)
741 }
742 TokenType::LiteralNumberHex => {
743 lex_bigint_or_number_hex(lexer, preceded_by_line_terminator)
744 }
745 TokenType::LiteralNumberOct => {
746 lex_bigint_or_number_oct(lexer, preceded_by_line_terminator)
747 }
748 TokenType::LiteralString => lex_string(lexer, preceded_by_line_terminator),
749 TokenType::LiteralTemplatePartString => lex_template(lexer, preceded_by_line_terminator),
750 TokenType::PrivateMember => lex_private_member(lexer, preceded_by_line_terminator),
751 TokenType::Slash | TokenType::SlashEquals if mode == LexMode::SlashIsRegex => {
752 lex_regex(lexer, preceded_by_line_terminator)
753 }
754 typ => {
755 if typ == TokenType::Question && mat.len() != 1 {
756 mat = mat.prefix(1);
758 } else if KEYWORDS_MAPPING.contains_key(&typ)
759 && lexer
760 .peek_or_eof(mat.len())
761 .filter(|c| ID_CONTINUE.has(*c))
762 .is_some()
763 {
764 return lex_identifier(lexer, mode, preceded_by_line_terminator);
766 };
767 let loc = lexer.range(mat);
768 lexer.consume(mat);
769 Ok(Token::new(loc, typ, preceded_by_line_terminator))
770 }
771 };
772 }
773 };
774 }
775}