1#![doc = include_str!("../examples/parser.rs")]
41#![doc = include_str!("../examples/parser_tsx.rs")]
46#![warn(missing_docs)]
68
69mod context;
70mod cursor;
71mod error_handler;
72mod modifiers;
73mod module_record;
74mod state;
75
76mod js;
77mod jsx;
78mod ts;
79
80mod diagnostics;
81
82#[cfg(not(feature = "benchmarking"))]
84mod lexer;
85#[cfg(feature = "benchmarking")]
86#[doc(hidden)]
87pub mod lexer;
88
89use oxc_allocator::{Allocator, Box as ArenaBox, Dummy};
90use oxc_ast::{
91 AstBuilder,
92 ast::{Expression, Program},
93};
94use oxc_diagnostics::OxcDiagnostic;
95use oxc_span::{ModuleKind, SourceType, Span};
96use oxc_syntax::module_record::ModuleRecord;
97
98use crate::{
99 context::{Context, StatementContext},
100 error_handler::FatalError,
101 lexer::{Lexer, Token},
102 module_record::ModuleRecordBuilder,
103 state::ParserState,
104};
105
106pub(crate) const MAX_LEN: usize = if size_of::<usize>() >= 8 {
113 u32::MAX as usize
115} else {
116 isize::MAX as usize
118};
119
120#[non_exhaustive]
144pub struct ParserReturn<'a> {
145 pub program: Program<'a>,
157
158 pub module_record: ModuleRecord<'a>,
160
161 pub errors: Vec<OxcDiagnostic>,
167
168 pub irregular_whitespaces: Box<[Span]>,
170
171 pub panicked: bool,
180
181 pub is_flow_language: bool,
183}
184
185#[derive(Debug, Clone, Copy)]
189pub struct ParseOptions {
190 #[cfg(feature = "regular_expression")]
194 pub parse_regular_expression: bool,
195
196 pub allow_return_outside_function: bool,
204
205 pub preserve_parens: bool,
216
217 pub allow_v8_intrinsics: bool,
224}
225
226impl Default for ParseOptions {
227 fn default() -> Self {
228 Self {
229 #[cfg(feature = "regular_expression")]
230 parse_regular_expression: false,
231 allow_return_outside_function: false,
232 preserve_parens: true,
233 allow_v8_intrinsics: false,
234 }
235 }
236}
237
238pub struct Parser<'a> {
242 allocator: &'a Allocator,
243 source_text: &'a str,
244 source_type: SourceType,
245 options: ParseOptions,
246}
247
248impl<'a> Parser<'a> {
249 pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
256 let options = ParseOptions::default();
257 Self { allocator, source_text, source_type, options }
258 }
259
260 #[must_use]
262 pub fn with_options(mut self, options: ParseOptions) -> Self {
263 self.options = options;
264 self
265 }
266}
267
268mod parser_parse {
269 use super::*;
270
271 pub struct UniquePromise(());
286
287 impl UniquePromise {
288 #[inline]
289 fn new() -> Self {
290 Self(())
291 }
292
293 #[cfg(any(test, feature = "benchmarking"))]
297 pub fn new_for_tests_and_benchmarks() -> Self {
298 Self(())
299 }
300 }
301
302 impl<'a> Parser<'a> {
303 pub fn parse(self) -> ParserReturn<'a> {
310 let unique = UniquePromise::new();
311 let parser = ParserImpl::new(
312 self.allocator,
313 self.source_text,
314 self.source_type,
315 self.options,
316 unique,
317 );
318 parser.parse()
319 }
320
321 pub fn parse_expression(self) -> Result<Expression<'a>, Vec<OxcDiagnostic>> {
341 let unique = UniquePromise::new();
342 let parser = ParserImpl::new(
343 self.allocator,
344 self.source_text,
345 self.source_type,
346 self.options,
347 unique,
348 );
349 parser.parse_expression()
350 }
351 }
352}
353use parser_parse::UniquePromise;
354
355struct ParserImpl<'a> {
358 options: ParseOptions,
359
360 pub(crate) lexer: Lexer<'a>,
361
362 source_type: SourceType,
364
365 source_text: &'a str,
367
368 errors: Vec<OxcDiagnostic>,
371
372 fatal_error: Option<FatalError>,
373
374 token: Token,
376
377 prev_token_end: u32,
379
380 state: ParserState<'a>,
382
383 ctx: Context,
385
386 ast: AstBuilder<'a>,
388
389 module_record_builder: ModuleRecordBuilder<'a>,
391
392 is_ts: bool,
394}
395
396impl<'a> ParserImpl<'a> {
397 #[inline]
402 pub fn new(
403 allocator: &'a Allocator,
404 source_text: &'a str,
405 source_type: SourceType,
406 options: ParseOptions,
407 unique: UniquePromise,
408 ) -> Self {
409 Self {
410 options,
411 lexer: Lexer::new(allocator, source_text, source_type, unique),
412 source_type,
413 source_text,
414 errors: vec![],
415 fatal_error: None,
416 token: Token::default(),
417 prev_token_end: 0,
418 state: ParserState::new(),
419 ctx: Self::default_context(source_type, options),
420 ast: AstBuilder::new(allocator),
421 module_record_builder: ModuleRecordBuilder::new(allocator),
422 is_ts: source_type.is_typescript(),
423 }
424 }
425
426 #[inline]
431 pub fn parse(mut self) -> ParserReturn<'a> {
432 let mut program = self.parse_program();
433 let mut panicked = false;
434
435 if let Some(fatal_error) = self.fatal_error.take() {
436 panicked = true;
437 self.errors.truncate(fatal_error.errors_len);
438 if !self.lexer.errors.is_empty() && self.cur_kind().is_eof() {
439 } else {
441 self.error(fatal_error.error);
442 }
443
444 program = Program::dummy(self.ast.allocator);
445 program.source_type = self.source_type;
446 program.source_text = self.source_text;
447 }
448
449 self.check_unfinished_errors();
450
451 if let Some(overlong_error) = self.overlong_error() {
452 panicked = true;
453 self.lexer.errors.clear();
454 self.errors.clear();
455 self.error(overlong_error);
456 }
457
458 let mut is_flow_language = false;
459 let mut errors = vec![];
460 if (!self.lexer.errors.is_empty() || !self.errors.is_empty())
462 && let Some(error) = self.flow_error()
463 {
464 is_flow_language = true;
465 errors.push(error);
466 }
467 let (module_record, module_record_errors) = self.module_record_builder.build();
468 if errors.len() != 1 {
469 errors.reserve(self.lexer.errors.len() + self.errors.len());
470 errors.extend(self.lexer.errors);
471 errors.extend(self.errors);
472 if !self.source_type.is_typescript() {
474 errors.extend(module_record_errors);
475 }
476 }
477 let irregular_whitespaces =
478 self.lexer.trivia_builder.irregular_whitespaces.into_boxed_slice();
479
480 let source_type = program.source_type;
481 if source_type.is_unambiguous() {
482 program.source_type = if module_record.has_module_syntax {
483 source_type.with_module(true)
484 } else {
485 source_type.with_script(true)
486 };
487 }
488
489 ParserReturn {
490 program,
491 module_record,
492 errors,
493 irregular_whitespaces,
494 panicked,
495 is_flow_language,
496 }
497 }
498
499 pub fn parse_expression(mut self) -> Result<Expression<'a>, Vec<OxcDiagnostic>> {
500 self.bump_any();
502 let expr = self.parse_expr();
503 if let Some(FatalError { error, .. }) = self.fatal_error.take() {
504 return Err(vec![error]);
505 }
506 self.check_unfinished_errors();
507 let errors = self.lexer.errors.into_iter().chain(self.errors).collect::<Vec<_>>();
508 if !errors.is_empty() {
509 return Err(errors);
510 }
511 Ok(expr)
512 }
513
514 #[expect(clippy::cast_possible_truncation)]
515 fn parse_program(&mut self) -> Program<'a> {
516 self.token = self.lexer.first_token();
519
520 let hashbang = self.parse_hashbang();
521 let (directives, statements) =
522 self.parse_directives_and_statements(true);
523
524 let span = Span::new(0, self.source_text.len() as u32);
525 let comments = self.ast.vec_from_iter(self.lexer.trivia_builder.comments.iter().copied());
526 self.ast.program(
527 span,
528 self.source_type,
529 self.source_text,
530 comments,
531 hashbang,
532 directives,
533 statements,
534 )
535 }
536
537 fn default_context(source_type: SourceType, options: ParseOptions) -> Context {
538 let mut ctx = Context::default().and_ambient(source_type.is_typescript_definition());
539 if source_type.module_kind() == ModuleKind::Module {
540 ctx = ctx.and_await(true);
542 }
543 if options.allow_return_outside_function {
544 ctx = ctx.and_return(true);
545 }
546 ctx
547 }
548
549 fn flow_error(&mut self) -> Option<OxcDiagnostic> {
552 if !self.source_type.is_javascript() {
553 return None;
554 }
555 let span = self.lexer.trivia_builder.comments.first()?.span;
556 if span.source_text(self.source_text).contains("@flow") {
557 self.errors.clear();
558 Some(diagnostics::flow(span))
559 } else {
560 None
561 }
562 }
563
564 fn check_unfinished_errors(&mut self) {
565 use oxc_span::GetSpan;
566 for expr in self.state.cover_initialized_name.values() {
569 self.errors.push(diagnostics::cover_initialized_name(expr.span()));
570 }
571 }
572
573 #[cold]
576 fn overlong_error(&self) -> Option<OxcDiagnostic> {
577 if self.source_text.len() > MAX_LEN {
578 return Some(diagnostics::overlong_source());
579 }
580 None
581 }
582
583 #[inline]
584 fn alloc<T>(&self, value: T) -> ArenaBox<'a, T> {
585 self.ast.alloc(value)
586 }
587}
588
589#[cfg(test)]
590mod test {
591 use std::path::Path;
592
593 use oxc_ast::ast::{CommentKind, Expression, Statement};
594 use oxc_span::GetSpan;
595
596 use super::*;
597
598 #[test]
599 fn parse_program_smoke_test() {
600 let allocator = Allocator::default();
601 let source_type = SourceType::default();
602 let source = "";
603 let ret = Parser::new(&allocator, source, source_type).parse();
604 assert!(ret.program.is_empty());
605 assert!(ret.errors.is_empty());
606 assert!(!ret.is_flow_language);
607 }
608
609 #[test]
610 fn parse_expression_smoke_test() {
611 let allocator = Allocator::default();
612 let source_type = SourceType::default();
613 let source = "a";
614 let expr = Parser::new(&allocator, source, source_type).parse_expression().unwrap();
615 assert!(matches!(expr, Expression::Identifier(_)));
616 }
617
618 #[test]
619 fn flow_error() {
620 let allocator = Allocator::default();
621 let source_type = SourceType::default();
622 let sources = [
623 "// @flow\nasdf adsf",
624 "/* @flow */\n asdf asdf",
625 "/**
626 * @flow
627 */
628 asdf asdf
629 ",
630 "/* @flow */ super;",
631 ];
632 for source in sources {
633 let ret = Parser::new(&allocator, source, source_type).parse();
634 assert!(ret.is_flow_language);
635 assert_eq!(ret.errors.len(), 1);
636 assert_eq!(ret.errors.first().unwrap().to_string(), "Flow is not supported");
637 }
638 }
639
640 #[test]
641 fn ts_module_declaration() {
642 let allocator = Allocator::default();
643 let source_type = SourceType::from_path(Path::new("module.ts")).unwrap();
644 let source = "declare module 'test'\n";
645 let ret = Parser::new(&allocator, source, source_type).parse();
646 assert_eq!(ret.errors.len(), 0);
647 }
648
649 #[test]
650 fn directives() {
651 let allocator = Allocator::default();
652 let source_type = SourceType::default();
653 let sources = [
654 ("import x from 'foo'; 'use strict';", 2),
655 ("export {x} from 'foo'; 'use strict';", 2),
656 (";'use strict';", 2),
657 ];
658 for (source, body_length) in sources {
659 let ret = Parser::new(&allocator, source, source_type).parse();
660 assert!(ret.program.directives.is_empty(), "{source}");
661 assert_eq!(ret.program.body.len(), body_length, "{source}");
662 }
663 }
664
665 #[test]
666 fn v8_intrinsics() {
667 let allocator = Allocator::default();
668 let source_type = SourceType::default();
669 {
670 let source = "%DebugPrint('Raging against the Dying Light')";
671 let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
672 let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
673 assert!(ret.errors.is_empty());
674
675 if let Some(Statement::ExpressionStatement(expr_stmt)) = ret.program.body.first() {
676 if let Expression::V8IntrinsicExpression(expr) = &expr_stmt.expression {
677 assert_eq!(expr.span().source_text(source), source);
678 } else {
679 panic!("Expected V8IntrinsicExpression");
680 }
681 } else {
682 panic!("Expected ExpressionStatement");
683 }
684 }
685 {
686 let source = "%DebugPrint(...illegalSpread)";
687 let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
688 let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
689 assert_eq!(ret.errors.len(), 1);
690 assert_eq!(
691 ret.errors[0].to_string(),
692 "V8 runtime calls cannot have spread elements as arguments"
693 );
694 }
695 {
696 let source = "%DebugPrint('~~')";
697 let ret = Parser::new(&allocator, source, source_type).parse();
698 assert_eq!(ret.errors.len(), 1);
699 assert_eq!(ret.errors[0].to_string(), "Unexpected token");
700 }
701 {
702 let source = "interface Props extends %enuProps {}";
704 let source_type = SourceType::default().with_typescript(true);
705 let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
707 let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
708 assert_eq!(ret.errors.len(), 1);
709 let ret = Parser::new(&allocator, source, source_type).parse();
710 assert_eq!(ret.errors.len(), 1);
711 }
712 }
713
714 #[test]
715 fn comments() {
716 let allocator = Allocator::default();
717 let source_type = SourceType::default().with_typescript(true);
718 let sources = [
719 ("// line comment", CommentKind::Line),
720 ("/* line comment */", CommentKind::Block),
721 (
722 "type Foo = ( /* Require properties which are not generated automatically. */ 'bar')",
723 CommentKind::Block,
724 ),
725 ];
726 for (source, kind) in sources {
727 let ret = Parser::new(&allocator, source, source_type).parse();
728 let comments = &ret.program.comments;
729 assert_eq!(comments.len(), 1, "{source}");
730 assert_eq!(comments.first().unwrap().kind, kind, "{source}");
731 }
732 }
733
734 #[test]
735 fn hashbang() {
736 let allocator = Allocator::default();
737 let source_type = SourceType::default();
738 let source = "#!/usr/bin/node\n;";
739 let ret = Parser::new(&allocator, source, source_type).parse();
740 assert_eq!(ret.program.hashbang.unwrap().value.as_str(), "/usr/bin/node");
741 }
742
743 #[test]
744 fn unambiguous() {
745 let allocator = Allocator::default();
746 let source_type = SourceType::unambiguous();
747 assert!(source_type.is_unambiguous());
748 let sources = ["import x from 'foo';", "export {x} from 'foo';", "import.meta"];
749 for source in sources {
750 let ret = Parser::new(&allocator, source, source_type).parse();
751 assert!(ret.program.source_type.is_module());
752 }
753
754 let sources = ["", "import('foo')"];
755 for source in sources {
756 let ret = Parser::new(&allocator, source, source_type).parse();
757 assert!(ret.program.source_type.is_script());
758 }
759 }
760
761 #[test]
762 fn memory_leak() {
763 let allocator = Allocator::default();
764 let source_type = SourceType::default();
765 let sources = ["2n", ";'1234567890123456789012345678901234567890'"];
766 for source in sources {
767 let ret = Parser::new(&allocator, source, source_type).parse();
768 assert!(!ret.program.body.is_empty());
769 }
770 }
771
772 #[cfg(target_pointer_width = "64")]
776 #[cfg(not(miri))]
777 #[test]
778 fn overlong_source() {
779 let mut source = String::with_capacity(MAX_LEN + 1);
781 let line = "var x = 123456;\n";
782 let chunk = line.repeat(1024);
783 while source.len() < MAX_LEN + 1 - chunk.len() {
784 source.push_str(&chunk);
785 }
786 while source.len() < MAX_LEN + 1 - line.len() {
787 source.push_str(line);
788 }
789 while source.len() < MAX_LEN + 1 {
790 source.push('\n');
791 }
792 assert_eq!(source.len(), MAX_LEN + 1);
793
794 let allocator = Allocator::default();
795 let ret = Parser::new(&allocator, &source, SourceType::default()).parse();
796 assert!(ret.program.is_empty());
797 assert!(ret.panicked);
798 assert_eq!(ret.errors.len(), 1);
799 assert_eq!(ret.errors.first().unwrap().to_string(), "Source length exceeds 4 GiB limit");
800 }
801
802 #[cfg(not(debug_assertions))]
807 #[cfg(not(miri))]
808 #[test]
809 fn legal_length_source() {
810 let head = "const x = 1;\n/*";
812 let foot = "*/\nconst y = 2;\n";
813 let mut source = "x".repeat(MAX_LEN);
814 source.replace_range(..head.len(), head);
815 source.replace_range(MAX_LEN - foot.len().., foot);
816 assert_eq!(source.len(), MAX_LEN);
817
818 let allocator = Allocator::default();
819 let ret = Parser::new(&allocator, &source, SourceType::default()).parse();
820 assert!(!ret.panicked);
821 assert!(ret.errors.is_empty());
822 assert_eq!(ret.program.body.len(), 2);
823 }
824}