Skip to main content

oxc_parser/
lib.rs

1//! Oxc Parser for JavaScript and TypeScript
2//!
3//! Oxc's [`Parser`] has full support for
4//! - The latest stable ECMAScript syntax
5//! - TypeScript
6//! - JSX and TSX
7//! - [Stage 3 Decorators](https://github.com/tc39/proposal-decorator-metadata)
8//!
9//! # Usage
10//!
11//! The parser has a minimal API with three inputs (a [memory arena](oxc_allocator::Allocator), a
12//! source string, and a [`SourceType`]) and one return struct (a [ParserReturn]).
13//!
14//! ```rust,ignore
15//! let parser_return = Parser::new(&allocator, &source_text, source_type).parse();
16//! ```
17//!
18//! # Abstract Syntax Tree (AST)
19//! Oxc's AST is located in a separate [`oxc_ast`] crate. You can find type definitions for AST
20//! nodes [here][`oxc_ast::ast`].
21//!
22//! # Performance
23//!
24//! The following optimization techniques are used:
25//! * AST is allocated in a memory arena ([oxc_allocator](https://docs.rs/oxc_allocator)) for fast AST drop
26//! * [`oxc_span::Span`] offsets uses `u32` instead of `usize`
27//! * Scope binding, symbol resolution and complicated syntax errors are not done in the parser,
28//! they are delegated to the [semantic analyzer](https://docs.rs/oxc_semantic)
29//!
30//! <div class="warning">
31//! Because [`oxc_span::Span`] uses `u32` instead of `usize`, Oxc can only parse files up
32//! to 4 GiB in size. This shouldn't be a limitation in almost all cases.
33//! </div>
34//!
35//! # Examples
36//!
37//! <https://github.com/oxc-project/oxc/blob/main/crates/oxc_parser/examples/parser.rs>
38//!
39//! ```rust,ignore
40#![doc = include_str!("../examples/parser.rs")]
41//! ```
42//!
43//! ### Parsing TSX
44//! ```rust,ignore
45#![doc = include_str!("../examples/parser_tsx.rs")]
46//! ```
47//!
48//! # Visitor
49//!
50//! See [`Visit`](http://docs.rs/oxc_ast_visit) and [`VisitMut`](http://docs.rs/oxc_ast_visit).
51//!
52//! # Visiting without a visitor
53//!
54//! For ad-hoc tasks, the semantic analyzer can be used to get a parent pointing tree with untyped nodes,
55//! the nodes can be iterated through a sequential loop.
56//!
57//! ```rust,ignore
58//! for node in semantic.nodes().iter() {
59//!     match node.kind() {
60//!         // check node
61//!     }
62//! }
63//! ```
64//!
65//! See [full linter example](https://github.com/Boshen/oxc/blob/ab2ef4f89ba3ca50c68abb2ca43e36b7793f3673/crates/oxc_linter/examples/linter.rs#L38-L39)
66
67pub mod config;
68mod context;
69mod cursor;
70mod error_handler;
71mod modifiers;
72mod module_record;
73mod state;
74
75mod js;
76mod jsx;
77mod ts;
78
79mod diagnostics;
80
81// Expose lexer only in benchmarks
82#[cfg(not(feature = "benchmarking"))]
83mod lexer;
84#[cfg(feature = "benchmarking")]
85#[doc(hidden)]
86pub mod lexer;
87
88use oxc_allocator::{Allocator, Box as ArenaBox, Dummy, Vec as ArenaVec};
89use oxc_ast::{
90    AstBuilder,
91    ast::{Expression, Program},
92};
93use oxc_diagnostics::OxcDiagnostic;
94use oxc_span::{SourceType, Span};
95use oxc_syntax::module_record::ModuleRecord;
96
97pub use crate::lexer::{Kind, Token};
98use crate::{
99    config::{LexerConfig, NoTokensParserConfig, ParserConfig},
100    context::{Context, StatementContext},
101    error_handler::FatalError,
102    lexer::Lexer,
103    module_record::ModuleRecordBuilder,
104    state::ParserState,
105};
106
107/// Maximum length of source which can be parsed (in bytes).
108/// ~4 GiB on 64-bit systems, ~2 GiB on 32-bit systems.
109// Length is constrained by 2 factors:
110// 1. `Span`'s `start` and `end` are `u32`s, which limits length to `u32::MAX` bytes.
111// 2. Rust's allocator APIs limit allocations to `isize::MAX`.
112// https://doc.rust-lang.org/std/alloc/struct.Layout.html#method.from_size_align
113pub(crate) const MAX_LEN: usize = if size_of::<usize>() >= 8 {
114    // 64-bit systems
115    u32::MAX as usize
116} else {
117    // 32-bit or 16-bit systems
118    isize::MAX as usize
119};
120
121/// Return value of [`Parser::parse`] consisting of AST, errors and comments
122///
123/// ## AST Validity
124///
125/// [`program`] will always contain a structurally valid AST, even if there are syntax errors.
126/// However, the AST may be semantically invalid. To ensure a valid AST,
127/// 1. Check that [`errors`] is empty
128/// 2. Run semantic analysis with [syntax error checking
129///    enabled](https://docs.rs/oxc_semantic/latest/oxc_semantic/struct.SemanticBuilder.html#method.with_check_syntax_error)
130///
131/// ## Errors
132/// Oxc's [`Parser`] is able to recover from some syntax errors and continue parsing. When this
133/// happens,
134/// 1. [`errors`] will be non-empty
135/// 2. [`program`] will contain a full AST
136/// 3. [`panicked`] will be false
137///
138/// When the parser cannot recover, it will abort and terminate parsing early. [`program`] will
139/// be empty and [`panicked`] will be `true`.
140///
141/// [`program`]: ParserReturn::program
142/// [`errors`]: ParserReturn::errors
143/// [`panicked`]: ParserReturn::panicked
144#[non_exhaustive]
145pub struct ParserReturn<'a> {
146    /// The parsed AST.
147    ///
148    /// Will be empty (e.g. no statements, directives, etc) if the parser panicked.
149    ///
150    /// ## Validity
151    /// It is possible for the AST to be present and semantically invalid. This will happen if
152    /// 1. The [`Parser`] encounters a recoverable syntax error
153    /// 2. The logic for checking the violation is in the semantic analyzer
154    ///
155    /// To ensure a valid AST, check that [`errors`](ParserReturn::errors) is empty. Then, run
156    /// semantic analysis with syntax error checking enabled.
157    pub program: Program<'a>,
158
159    /// See <https://tc39.es/ecma262/#sec-abstract-module-records>
160    pub module_record: ModuleRecord<'a>,
161
162    /// Syntax errors encountered while parsing.
163    ///
164    /// This list is not comprehensive. Oxc offloads more-expensive checks to [semantic
165    /// analysis](https://docs.rs/oxc_semantic), which can be enabled using
166    /// [`SemanticBuilder::with_check_syntax_error`](https://docs.rs/oxc_semantic/latest/oxc_semantic/struct.SemanticBuilder.html#method.with_check_syntax_error).
167    pub errors: Vec<OxcDiagnostic>,
168
169    /// Irregular whitespaces for `Oxlint`
170    pub irregular_whitespaces: Box<[Span]>,
171
172    /// Lexed tokens in source order.
173    ///
174    /// Tokens are only collected when tokens are enabled in [`ParserConfig`].
175    pub tokens: oxc_allocator::Vec<'a, Token>,
176
177    /// Whether the parser panicked and terminated early.
178    ///
179    /// This will be `false` if parsing was successful, or if parsing was able to recover from a
180    /// syntax error. When `true`, [`program`] will be empty and [`errors`] will contain at least
181    /// one error.
182    ///
183    /// [`program`]: ParserReturn::program
184    /// [`errors`]: ParserReturn::errors
185    pub panicked: bool,
186
187    /// Whether the file is [flow](https://flow.org).
188    pub is_flow_language: bool,
189}
190
191/// Parse options
192///
193/// You may provide options to the [`Parser`] using [`Parser::with_options`].
194#[derive(Debug, Clone, Copy)]
195pub struct ParseOptions {
196    /// Whether to parse regular expressions or not.
197    ///
198    /// Default: `false`
199    #[cfg(feature = "regular_expression")]
200    pub parse_regular_expression: bool,
201
202    /// Allow [`return`] statements outside of functions.
203    ///
204    /// By default, a return statement at the top level raises an error (`false`).
205    ///
206    /// Default: `false`
207    ///
208    /// [`return`]: oxc_ast::ast::ReturnStatement
209    pub allow_return_outside_function: bool,
210
211    /// Emit [`ParenthesizedExpression`]s and [`TSParenthesizedType`] in AST.
212    ///
213    /// If this option is `true`, parenthesized expressions are represented by
214    /// (non-standard) [`ParenthesizedExpression`] and [`TSParenthesizedType`] nodes
215    /// that have a single `expression` property containing the expression inside parentheses.
216    ///
217    /// Default: `true`
218    ///
219    /// [`ParenthesizedExpression`]: oxc_ast::ast::ParenthesizedExpression
220    /// [`TSParenthesizedType`]: oxc_ast::ast::TSParenthesizedType
221    pub preserve_parens: bool,
222
223    /// Allow V8 runtime calls in the AST.
224    /// See: [V8's Parser::ParseV8Intrinsic](https://chromium.googlesource.com/v8/v8/+/35a14c75e397302655d7b3fbe648f9490ae84b7d/src/parsing/parser.cc#4811).
225    ///
226    /// Default: `false`
227    ///
228    /// [`V8IntrinsicExpression`]: oxc_ast::ast::V8IntrinsicExpression
229    pub allow_v8_intrinsics: bool,
230}
231
232impl Default for ParseOptions {
233    fn default() -> Self {
234        Self {
235            #[cfg(feature = "regular_expression")]
236            parse_regular_expression: false,
237            allow_return_outside_function: false,
238            preserve_parens: true,
239            allow_v8_intrinsics: false,
240        }
241    }
242}
243
244/// Recursive Descent Parser for ECMAScript and TypeScript
245///
246/// See [`Parser::parse`] for entry function.
247pub struct Parser<'a, C: ParserConfig = NoTokensParserConfig> {
248    allocator: &'a Allocator,
249    source_text: &'a str,
250    source_type: SourceType,
251    options: ParseOptions,
252    config: C,
253}
254
255impl<'a> Parser<'a> {
256    /// Create a new [`Parser`]
257    ///
258    /// # Parameters
259    /// - `allocator`: [Memory arena](oxc_allocator::Allocator) for allocating AST nodes
260    /// - `source_text`: Source code to parse
261    /// - `source_type`: Source type (e.g. JavaScript, TypeScript, JSX, ESM Module, Script)
262    pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
263        let options = ParseOptions::default();
264        Self { allocator, source_text, source_type, options, config: NoTokensParserConfig }
265    }
266}
267
268impl<'a, C: ParserConfig> Parser<'a, C> {
269    /// Set parse options
270    #[must_use]
271    pub fn with_options(mut self, options: ParseOptions) -> Self {
272        self.options = options;
273        self
274    }
275
276    /// Set parser config.
277    ///
278    /// See [`ParserConfig`] for more details.
279    #[must_use]
280    pub fn with_config<Config: ParserConfig>(self, config: Config) -> Parser<'a, Config> {
281        Parser {
282            allocator: self.allocator,
283            source_text: self.source_text,
284            source_type: self.source_type,
285            options: self.options,
286            config,
287        }
288    }
289}
290
291mod parser_parse {
292    use super::*;
293
294    /// `UniquePromise` is a way to use the type system to enforce the invariant that only
295    /// a single `ParserImpl`, `Lexer` and `lexer::Source` can exist at any time on a thread.
296    /// This constraint is required to guarantee the soundness of some methods of these types
297    /// e.g. `Source::set_position`.
298    ///
299    /// `ParserImpl::new`, `Lexer::new` and `lexer::Source::new` all require a `UniquePromise`
300    /// to be provided to them. `UniquePromise::new` is not visible outside this module, so only
301    /// `Parser::parse` can create one, and it only calls `ParserImpl::new` once.
302    /// This enforces the invariant throughout the entire parser.
303    ///
304    /// `UniquePromise` is a zero-sized type and has no runtime cost. It's purely for the type-checker.
305    ///
306    /// `UniquePromise::new_for_tests_and_benchmarks` is a backdoor for tests/benchmarks, so they can
307    /// create a `ParserImpl` or `Lexer`, and manipulate it directly, for testing/benchmarking purposes.
308    pub struct UniquePromise(());
309
310    impl UniquePromise {
311        #[inline]
312        fn new() -> Self {
313            Self(())
314        }
315
316        /// Backdoor for tests/benchmarks to create a `UniquePromise` (see above).
317        /// This function must NOT be exposed outside of tests and benchmarks,
318        /// as it allows circumventing safety invariants of the parser.
319        #[cfg(any(test, feature = "benchmarking"))]
320        pub fn new_for_tests_and_benchmarks() -> Self {
321            Self(())
322        }
323    }
324
325    impl<'a, C: ParserConfig> Parser<'a, C> {
326        /// Main entry point
327        ///
328        /// Returns an empty `Program` on unrecoverable error,
329        /// Recoverable errors are stored inside `errors`.
330        ///
331        /// See the [module-level documentation](crate) for examples and more information.
332        pub fn parse(self) -> ParserReturn<'a> {
333            let unique = UniquePromise::new();
334            let parser = ParserImpl::new(
335                self.allocator,
336                self.source_text,
337                self.source_type,
338                self.options,
339                self.config,
340                unique,
341            );
342            parser.parse()
343        }
344
345        /// Parse a single [`Expression`].
346        ///
347        /// # Example
348        ///
349        /// ```rust
350        /// use oxc_allocator::Allocator;
351        /// use oxc_ast::ast::Expression;
352        /// use oxc_parser::Parser;
353        /// use oxc_span::SourceType;
354        ///
355        /// let src = "let x = 1 + 2;";
356        /// let allocator = Allocator::new();
357        /// let source_type = SourceType::default();
358        ///
359        /// let expr: Expression<'_> = Parser::new(&allocator, src, source_type).parse_expression().unwrap();
360        /// ```
361        ///
362        /// # Errors
363        /// If the source code being parsed has syntax errors.
364        pub fn parse_expression(self) -> Result<Expression<'a>, Vec<OxcDiagnostic>> {
365            let unique = UniquePromise::new();
366            let parser = ParserImpl::new(
367                self.allocator,
368                self.source_text,
369                self.source_type,
370                self.options,
371                self.config,
372                unique,
373            );
374            parser.parse_expression()
375        }
376    }
377}
378use parser_parse::UniquePromise;
379
380/// Implementation of parser.
381/// `Parser` is just a public wrapper, the guts of the implementation is in this type.
382struct ParserImpl<'a, C: ParserConfig> {
383    /// Options
384    options: ParseOptions,
385
386    pub(crate) lexer: Lexer<'a, C::LexerConfig>,
387
388    /// SourceType: JavaScript or TypeScript, Script or Module, jsx support?
389    source_type: SourceType,
390
391    /// Source Code
392    source_text: &'a str,
393
394    /// All syntax errors from parser and lexer
395    /// Note: favor adding to `Diagnostics` instead of raising Err
396    errors: Vec<OxcDiagnostic>,
397
398    /// Errors that are only valid if the file is determined to be a Script (not a Module).
399    /// For `ModuleKind::Unambiguous`, we defer ESM-only errors (like top-level await)
400    /// until we know whether the file is ESM or Script.
401    /// If resolved to Module → discard these errors.
402    /// If resolved to Script → emit these errors.
403    deferred_script_errors: Vec<OxcDiagnostic>,
404
405    fatal_error: Option<FatalError>,
406
407    /// The current parsing token
408    token: Token,
409
410    /// The end range of the previous token
411    prev_token_end: u32,
412
413    /// Parser state
414    state: ParserState<'a>,
415
416    /// Parsing context
417    ctx: Context,
418
419    /// Ast builder for creating AST nodes
420    ast: AstBuilder<'a>,
421
422    /// Module Record Builder
423    module_record_builder: ModuleRecordBuilder<'a>,
424
425    /// Precomputed typescript detection
426    is_ts: bool,
427}
428
429impl<'a, C: ParserConfig> ParserImpl<'a, C> {
430    /// Create a new `ParserImpl`.
431    ///
432    /// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist
433    /// on a single thread at one time.
434    #[inline]
435    #[expect(clippy::needless_pass_by_value)]
436    pub fn new(
437        allocator: &'a Allocator,
438        source_text: &'a str,
439        source_type: SourceType,
440        options: ParseOptions,
441        config: C,
442        unique: UniquePromise,
443    ) -> Self {
444        Self {
445            options,
446            lexer: Lexer::new(allocator, source_text, source_type, config.lexer_config(), unique),
447            source_type,
448            source_text,
449            errors: vec![],
450            deferred_script_errors: vec![],
451            fatal_error: None,
452            token: Token::default(),
453            prev_token_end: 0,
454            state: ParserState::new(),
455            ctx: Self::default_context(source_type, options),
456            ast: AstBuilder::new(allocator),
457            module_record_builder: ModuleRecordBuilder::new(allocator, source_type),
458            is_ts: source_type.is_typescript(),
459        }
460    }
461
462    /// Main entry point
463    ///
464    /// Returns an empty `Program` on unrecoverable error,
465    /// Recoverable errors are stored inside `errors`.
466    #[inline]
467    pub fn parse(mut self) -> ParserReturn<'a> {
468        let mut program = self.parse_program();
469        let mut panicked = false;
470
471        if let Some(fatal_error) = self.fatal_error.take() {
472            panicked = true;
473            self.errors.truncate(fatal_error.errors_len);
474            if !self.lexer.errors.is_empty() && self.cur_kind().is_eof() {
475                // Noop
476            } else {
477                self.error(fatal_error.error);
478            }
479
480            program = Program::dummy(self.ast.allocator);
481            program.source_type = self.source_type;
482            program.source_text = self.source_text;
483        }
484
485        self.check_unfinished_errors();
486
487        if let Some(overlong_error) = self.overlong_error() {
488            panicked = true;
489            self.lexer.errors.clear();
490            self.errors.clear();
491            self.error(overlong_error);
492        }
493
494        let mut is_flow_language = false;
495        let mut errors = vec![];
496        // only check for `@flow` if the file failed to parse.
497        if (!self.lexer.errors.is_empty() || !self.errors.is_empty())
498            && let Some(error) = self.flow_error()
499        {
500            is_flow_language = true;
501            errors.push(error);
502        }
503        let (module_record, mut module_record_errors) = self.module_record_builder.build();
504        if errors.len() != 1 {
505            errors
506                .reserve(self.lexer.errors.len() + self.errors.len() + module_record_errors.len());
507            errors.append(&mut self.lexer.errors);
508            errors.append(&mut self.errors);
509            errors.append(&mut module_record_errors);
510        }
511        let irregular_whitespaces =
512            std::mem::take(&mut self.lexer.trivia_builder.irregular_whitespaces).into_boxed_slice();
513
514        let source_type = program.source_type;
515        if source_type.is_unambiguous() {
516            if module_record.has_module_syntax {
517                // Resolved to Module - discard deferred script errors (TLA is valid in ESM)
518                // but emit deferred module errors (HTML comments are invalid in ESM)
519                program.source_type = source_type.with_module(true);
520                errors.append(&mut self.lexer.deferred_module_errors);
521            } else {
522                // Resolved to Script - emit deferred script errors
523                // discard deferred module errors (HTML comments are valid in scripts)
524                program.source_type = source_type.with_script(true);
525                errors.extend(self.deferred_script_errors);
526            }
527        }
528
529        let tokens = if panicked {
530            ArenaVec::new_in(self.ast.allocator)
531        } else {
532            self.lexer.finalize_tokens()
533        };
534
535        ParserReturn {
536            program,
537            module_record,
538            errors,
539            irregular_whitespaces,
540            tokens,
541            panicked,
542            is_flow_language,
543        }
544    }
545
546    pub fn parse_expression(mut self) -> Result<Expression<'a>, Vec<OxcDiagnostic>> {
547        // initialize cur_token and prev_token by moving onto the first token
548        self.bump_any();
549        let expr = self.parse_expr();
550        if let Some(FatalError { error, .. }) = self.fatal_error.take() {
551            return Err(vec![error]);
552        }
553        self.check_unfinished_errors();
554        let errors = self.lexer.errors.into_iter().chain(self.errors).collect::<Vec<_>>();
555        if !errors.is_empty() {
556            return Err(errors);
557        }
558        Ok(expr)
559    }
560
561    #[expect(clippy::cast_possible_truncation)]
562    fn parse_program(&mut self) -> Program<'a> {
563        // Initialize by moving onto the first token.
564        // Checks for hashbang comment.
565        self.token = self.lexer.first_token();
566
567        let hashbang = self.parse_hashbang();
568        self.ctx |= Context::TopLevel;
569        let (directives, mut statements) = self.parse_directives_and_statements();
570
571        // In unambiguous mode, if ESM syntax was detected (import/export/import.meta),
572        // we need to reparse statements that were originally parsed with `await` as identifier.
573        // TypeScript's behavior: initially parse `await /x/` as division, then reparse as
574        // await expression with regex when ESM is detected.
575        if self.source_type.is_unambiguous()
576            && self.module_record_builder.has_module_syntax()
577            && !self.state.potential_await_reparse.is_empty()
578        {
579            self.reparse_potential_top_level_awaits(&mut statements);
580        }
581
582        let span = Span::new(0, self.source_text.len() as u32);
583        let comments = self.ast.vec_from_iter(self.lexer.trivia_builder.comments.iter().copied());
584        self.ast.program(
585            span,
586            self.source_type,
587            self.source_text,
588            comments,
589            hashbang,
590            directives,
591            statements,
592        )
593    }
594
595    /// Reparse statements that may contain top-level await expressions.
596    ///
597    /// In unambiguous mode, statements like `await /x/u` are initially parsed as
598    /// `await / x / u` (identifier with divisions). If ESM syntax is detected,
599    /// we need to reparse them with the await context enabled.
600    fn reparse_potential_top_level_awaits(
601        &mut self,
602        statements: &mut oxc_allocator::Vec<'a, oxc_ast::ast::Statement<'a>>,
603    ) {
604        // Token stream is already complete from the first parse.
605        // Reparsing here is only to patch AST nodes, so keep the original token stream.
606        let original_tokens =
607            if self.lexer.config.tokens() { Some(self.lexer.take_tokens()) } else { None };
608
609        let checkpoints = std::mem::take(&mut self.state.potential_await_reparse);
610        for (stmt_index, checkpoint) in checkpoints {
611            // Rewind to the checkpoint
612            self.rewind(checkpoint);
613
614            // Parse the statement with await context enabled (TopLevel context is already set)
615            let stmt = self.context_add(Context::Await, |p| {
616                p.parse_statement_list_item(StatementContext::StatementList)
617            });
618
619            // Replace the statement if the index is valid
620            if stmt_index < statements.len() {
621                statements[stmt_index] = stmt;
622            }
623        }
624
625        if let Some(original_tokens) = original_tokens {
626            self.lexer.set_tokens(original_tokens);
627        }
628    }
629
630    fn default_context(source_type: SourceType, options: ParseOptions) -> Context {
631        let mut ctx = Context::default().and_ambient(source_type.is_typescript_definition());
632        if source_type.is_module() {
633            // for [top-level-await](https://tc39.es/proposal-top-level-await/)
634            ctx = ctx.and_await(true);
635        }
636        // CommonJS files are wrapped in a function, so return is allowed at top-level
637        if options.allow_return_outside_function || source_type.is_commonjs() {
638            ctx = ctx.and_return(true);
639        }
640        ctx
641    }
642
643    /// Check for Flow declaration if the file cannot be parsed.
644    /// The declaration must be [on the first line before any code](https://flow.org/en/docs/usage/#toc-prepare-your-code-for-flow)
645    fn flow_error(&mut self) -> Option<OxcDiagnostic> {
646        if !self.source_type.is_javascript() {
647            return None;
648        }
649        let span = self.lexer.trivia_builder.comments.first()?.span;
650        if span.source_text(self.source_text).contains("@flow") {
651            self.errors.clear();
652            Some(diagnostics::flow(span))
653        } else {
654            None
655        }
656    }
657
658    fn check_unfinished_errors(&mut self) {
659        use oxc_span::GetSpan;
660        // PropertyDefinition : cover_initialized_name
661        // It is a Syntax Error if any source text is matched by this production.
662        for expr in self.state.cover_initialized_name.values() {
663            self.errors.push(diagnostics::cover_initialized_name(expr.span()));
664        }
665    }
666
667    /// Check if source length exceeds MAX_LEN, if the file cannot be parsed.
668    /// Original parsing error is not real - `Lexer::new` substituted "\0" as the source text.
669    #[cold]
670    fn overlong_error(&self) -> Option<OxcDiagnostic> {
671        if self.source_text.len() > MAX_LEN {
672            return Some(diagnostics::overlong_source());
673        }
674        None
675    }
676
677    #[inline]
678    fn alloc<T>(&self, value: T) -> ArenaBox<'a, T> {
679        self.ast.alloc(value)
680    }
681}
682
683#[cfg(test)]
684mod test {
685    use std::path::Path;
686
687    use oxc_ast::ast::{CommentKind, Expression, Statement};
688    use oxc_span::GetSpan;
689
690    use super::*;
691
692    #[test]
693    fn parse_program_smoke_test() {
694        let allocator = Allocator::default();
695        let source_type = SourceType::default();
696        let source = "";
697        let ret = Parser::new(&allocator, source, source_type).parse();
698        assert!(ret.program.is_empty());
699        assert!(ret.errors.is_empty());
700        assert!(!ret.is_flow_language);
701    }
702
703    #[test]
704    fn parse_expression_smoke_test() {
705        let allocator = Allocator::default();
706        let source_type = SourceType::default();
707        let source = "a";
708        let expr = Parser::new(&allocator, source, source_type).parse_expression().unwrap();
709        assert!(matches!(expr, Expression::Identifier(_)));
710    }
711
712    #[test]
713    fn flow_error() {
714        let allocator = Allocator::default();
715        let source_type = SourceType::default();
716        let sources = [
717            "// @flow\nasdf adsf",
718            "/* @flow */\n asdf asdf",
719            "/**
720             * @flow
721             */
722             asdf asdf
723             ",
724            "/* @flow */ super;",
725        ];
726        for source in sources {
727            let ret = Parser::new(&allocator, source, source_type).parse();
728            assert!(ret.is_flow_language);
729            assert_eq!(ret.errors.len(), 1);
730            assert_eq!(ret.errors.first().unwrap().to_string(), "Flow is not supported");
731        }
732    }
733
734    #[test]
735    fn ts_module_declaration() {
736        let allocator = Allocator::default();
737        let source_type = SourceType::from_path(Path::new("module.ts")).unwrap();
738        let source = "declare module 'test'\n";
739        let ret = Parser::new(&allocator, source, source_type).parse();
740        assert_eq!(ret.errors.len(), 0);
741    }
742
743    #[test]
744    fn directives() {
745        let allocator = Allocator::default();
746        let source_type = SourceType::default();
747        let sources = [
748            ("import x from 'foo'; 'use strict';", 2),
749            ("export {x} from 'foo'; 'use strict';", 2),
750            (";'use strict';", 2),
751        ];
752        for (source, body_length) in sources {
753            let ret = Parser::new(&allocator, source, source_type).parse();
754            assert!(ret.program.directives.is_empty(), "{source}");
755            assert_eq!(ret.program.body.len(), body_length, "{source}");
756        }
757    }
758
759    #[test]
760    fn v8_intrinsics() {
761        let allocator = Allocator::default();
762        let source_type = SourceType::default();
763        {
764            let source = "%DebugPrint('Raging against the Dying Light')";
765            let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
766            let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
767            assert!(ret.errors.is_empty());
768
769            if let Some(Statement::ExpressionStatement(expr_stmt)) = ret.program.body.first() {
770                if let Expression::V8IntrinsicExpression(expr) = &expr_stmt.expression {
771                    assert_eq!(expr.span().source_text(source), source);
772                } else {
773                    panic!("Expected V8IntrinsicExpression");
774                }
775            } else {
776                panic!("Expected ExpressionStatement");
777            }
778        }
779        {
780            let source = "%DebugPrint(...illegalSpread)";
781            let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
782            let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
783            assert_eq!(ret.errors.len(), 1);
784            assert_eq!(
785                ret.errors[0].to_string(),
786                "V8 runtime calls cannot have spread elements as arguments"
787            );
788        }
789        {
790            let source = "%DebugPrint('~~')";
791            let ret = Parser::new(&allocator, source, source_type).parse();
792            assert_eq!(ret.errors.len(), 1);
793            assert_eq!(ret.errors[0].to_string(), "Unexpected token");
794        }
795        {
796            // https://github.com/oxc-project/oxc/issues/12121
797            let source = "interface Props extends %enuProps {}";
798            let source_type = SourceType::default().with_typescript(true);
799            // Should not panic whether `allow_v8_intrinsics` is set or not.
800            let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
801            let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
802            assert_eq!(ret.errors.len(), 1);
803            let ret = Parser::new(&allocator, source, source_type).parse();
804            assert_eq!(ret.errors.len(), 1);
805        }
806    }
807
808    #[test]
809    fn comments() {
810        let allocator = Allocator::default();
811        let source_type = SourceType::default().with_typescript(true);
812        let sources = [
813            ("// line comment", CommentKind::Line),
814            ("/* line comment */", CommentKind::SingleLineBlock),
815            (
816                "type Foo = ( /* Require properties which are not generated automatically. */ 'bar')",
817                CommentKind::SingleLineBlock,
818            ),
819        ];
820        for (source, kind) in sources {
821            let ret = Parser::new(&allocator, source, source_type).parse();
822            let comments = &ret.program.comments;
823            assert_eq!(comments.len(), 1, "{source}");
824            assert_eq!(comments.first().unwrap().kind, kind, "{source}");
825        }
826    }
827
828    #[test]
829    fn hashbang() {
830        let allocator = Allocator::default();
831        let source_type = SourceType::default();
832        let source = "#!/usr/bin/node\n;";
833        let ret = Parser::new(&allocator, source, source_type).parse();
834        assert_eq!(ret.program.hashbang.unwrap().value.as_str(), "/usr/bin/node");
835    }
836
837    #[test]
838    fn unambiguous() {
839        let allocator = Allocator::default();
840        let source_type = SourceType::unambiguous();
841        assert!(source_type.is_unambiguous());
842        let sources = ["import x from 'foo';", "export {x} from 'foo';", "import.meta"];
843        for source in sources {
844            let ret = Parser::new(&allocator, source, source_type).parse();
845            assert!(ret.program.source_type.is_module());
846        }
847
848        let sources = ["", "import('foo')"];
849        for source in sources {
850            let ret = Parser::new(&allocator, source, source_type).parse();
851            assert!(ret.program.source_type.is_script());
852        }
853    }
854
855    #[test]
856    fn binary_file() {
857        let allocator = Allocator::default();
858        let source_type = SourceType::default();
859
860        // U+FFFD as a standalone token — file appears to be binary
861        let ret = Parser::new(&allocator, "\u{FFFD}", source_type).parse();
862        assert!(ret.program.is_empty());
863        assert_eq!(ret.errors.len(), 1);
864        assert_eq!(ret.errors[0].to_string(), "File appears to be binary.");
865
866        // U+FFFD inside string literals — should parse fine
867        let ret = Parser::new(&allocator, "\"oops \u{FFFD} oops\";", source_type).parse();
868        assert!(!ret.program.is_empty());
869        assert!(ret.errors.is_empty());
870    }
871
872    #[test]
873    fn memory_leak() {
874        let allocator = Allocator::default();
875        let source_type = SourceType::default();
876        let sources = ["2n", ";'1234567890123456789012345678901234567890'"];
877        for source in sources {
878            let ret = Parser::new(&allocator, source, source_type).parse();
879            assert!(!ret.program.body.is_empty());
880        }
881    }
882
883    // Source with length MAX_LEN + 1 fails to parse.
884    // Skip this test on 32-bit systems as impossible to allocate a string longer than `isize::MAX`.
885    // Also skip running under Miri since it takes so long.
886    #[cfg(target_pointer_width = "64")]
887    #[cfg(not(miri))]
888    #[test]
889    fn overlong_source() {
890        use std::{
891            alloc::{self, Layout},
892            ptr::NonNull,
893            slice, str,
894        };
895
896        /// A string that has a length of `MAX_LEN + 1`, and is entirely zeros.
897        ///
898        /// We need to create a `&str` with `MAX_LEN + 1` length, but don't want to write 4 GiB of data,
899        /// as it's too slow. This type uses `alloc_zeroed` which on most platforms will just create zeroed pages
900        /// without actually writing any data, and so is much faster.
901        struct ZeroedString {
902            ptr: NonNull<u8>,
903        }
904
905        impl ZeroedString {
906            const LEN: usize = MAX_LEN + 1;
907            const PAGE_SIZE: usize = 4096;
908            const LAYOUT: Layout = match Layout::from_size_align(Self::LEN, Self::PAGE_SIZE) {
909                Ok(layout) => layout,
910                Err(_) => panic!("Failed to create layout"),
911            };
912
913            fn new() -> Self {
914                // SAFETY: `LAYOUT` is valid and non-zero size.
915                let ptr = unsafe { alloc::alloc_zeroed(Self::LAYOUT) };
916                let Some(ptr) = NonNull::new(ptr) else {
917                    panic!("Failed to allocate {} bytes", Self::LEN);
918                };
919                Self { ptr }
920            }
921
922            fn as_str(&self) -> &str {
923                // SAFETY: `self.ptr` is pointer to start of `LEN` initialized and zeroed bytes.
924                // A slice consisting entirely of zeros is valid UTF-8.
925                unsafe {
926                    str::from_utf8_unchecked(slice::from_raw_parts(self.ptr.as_ptr(), Self::LEN))
927                }
928            }
929        }
930
931        impl Drop for ZeroedString {
932            fn drop(&mut self) {
933                // SAFETY: `self.ptr` is address of an allocation made with `LAYOUT`
934                unsafe { alloc::dealloc(self.ptr.as_ptr(), Self::LAYOUT) };
935            }
936        }
937
938        // Create long source text (MAX_LEN + 1 bytes)
939        let zeroed_string = ZeroedString::new();
940        let source_text = zeroed_string.as_str();
941
942        // Attempt to parse the source text
943        let allocator = Allocator::default();
944        let ret = Parser::new(&allocator, source_text, SourceType::default()).parse();
945
946        // Parsing should fail
947        assert!(ret.program.is_empty());
948        assert!(ret.panicked);
949        assert_eq!(ret.errors.len(), 1);
950        assert_eq!(ret.errors.first().unwrap().to_string(), "Source length exceeds 4 GiB limit");
951    }
952
953    // Source with length MAX_LEN parses OK.
954    // This test takes over 1 minute on an M1 Macbook Pro unless compiled in release mode.
955    // `not(debug_assertions)` is a proxy for detecting release mode.
956    // Also skip running under Miri since it takes so long.
957    #[cfg(not(debug_assertions))]
958    #[cfg(not(miri))]
959    #[test]
960    fn legal_length_source() {
961        // Build a string MAX_LEN bytes long which doesn't take too long to parse
962        let head = "const x = 1;\n/*";
963        let foot = "*/\nconst y = 2;\n";
964        let mut source = "x".repeat(MAX_LEN);
965        source.replace_range(..head.len(), head);
966        source.replace_range(MAX_LEN - foot.len().., foot);
967        assert_eq!(source.len(), MAX_LEN);
968
969        let allocator = Allocator::default();
970        let ret = Parser::new(&allocator, &source, SourceType::default()).parse();
971        assert!(!ret.panicked);
972        assert!(ret.errors.is_empty());
973        assert_eq!(ret.program.body.len(), 2);
974    }
975}