#![doc = include_str!("../examples/parser.rs")]
#![doc = include_str!("../examples/parser_tsx.rs")]
pub mod config;
mod context;
mod cursor;
mod error_handler;
mod modifiers;
mod module_record;
mod state;
mod js;
mod jsx;
mod ts;
mod diagnostics;
#[cfg(not(feature = "benchmarking"))]
mod lexer;
#[cfg(feature = "benchmarking")]
#[doc(hidden)]
pub mod lexer;
use oxc_allocator::{Allocator, Box as ArenaBox, Dummy, Vec as ArenaVec};
use oxc_ast::{
AstBuilder,
ast::{Expression, Program},
};
use oxc_diagnostics::OxcDiagnostic;
use oxc_span::{SourceType, Span};
use oxc_syntax::module_record::ModuleRecord;
pub use crate::lexer::{Kind, Token};
use crate::{
config::{LexerConfig, NoTokensParserConfig, ParserConfig},
context::{Context, StatementContext},
error_handler::FatalError,
lexer::Lexer,
module_record::ModuleRecordBuilder,
state::ParserState,
};
pub(crate) const MAX_LEN: usize = if size_of::<usize>() >= 8 {
u32::MAX as usize
} else {
isize::MAX as usize
};
#[non_exhaustive]
pub struct ParserReturn<'a> {
pub program: Program<'a>,
pub module_record: ModuleRecord<'a>,
pub errors: Vec<OxcDiagnostic>,
pub irregular_whitespaces: Box<[Span]>,
pub tokens: oxc_allocator::Vec<'a, Token>,
pub panicked: bool,
pub is_flow_language: bool,
}
#[derive(Debug, Clone, Copy)]
pub struct ParseOptions {
#[cfg(feature = "regular_expression")]
pub parse_regular_expression: bool,
pub allow_return_outside_function: bool,
pub preserve_parens: bool,
pub allow_v8_intrinsics: bool,
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
#[cfg(feature = "regular_expression")]
parse_regular_expression: false,
allow_return_outside_function: false,
preserve_parens: true,
allow_v8_intrinsics: false,
}
}
}
pub struct Parser<'a, C: ParserConfig = NoTokensParserConfig> {
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
options: ParseOptions,
config: C,
}
impl<'a> Parser<'a> {
pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
let options = ParseOptions::default();
Self { allocator, source_text, source_type, options, config: NoTokensParserConfig }
}
}
impl<'a, C: ParserConfig> Parser<'a, C> {
#[must_use]
pub fn with_options(mut self, options: ParseOptions) -> Self {
self.options = options;
self
}
#[must_use]
pub fn with_config<Config: ParserConfig>(self, config: Config) -> Parser<'a, Config> {
Parser {
allocator: self.allocator,
source_text: self.source_text,
source_type: self.source_type,
options: self.options,
config,
}
}
}
mod parser_parse {
use super::*;
pub struct UniquePromise(());
impl UniquePromise {
#[inline]
fn new() -> Self {
Self(())
}
#[cfg(any(test, feature = "benchmarking"))]
pub fn new_for_tests_and_benchmarks() -> Self {
Self(())
}
}
impl<'a, C: ParserConfig> Parser<'a, C> {
pub fn parse(self) -> ParserReturn<'a> {
let unique = UniquePromise::new();
let parser = ParserImpl::new(
self.allocator,
self.source_text,
self.source_type,
self.options,
self.config,
unique,
);
parser.parse()
}
pub fn parse_expression(self) -> Result<Expression<'a>, Vec<OxcDiagnostic>> {
let unique = UniquePromise::new();
let parser = ParserImpl::new(
self.allocator,
self.source_text,
self.source_type,
self.options,
self.config,
unique,
);
parser.parse_expression()
}
}
}
use parser_parse::UniquePromise;
struct ParserImpl<'a, C: ParserConfig> {
options: ParseOptions,
pub(crate) lexer: Lexer<'a, C::LexerConfig>,
source_type: SourceType,
source_text: &'a str,
errors: Vec<OxcDiagnostic>,
deferred_script_errors: Vec<OxcDiagnostic>,
fatal_error: Option<FatalError>,
token: Token,
prev_token_end: u32,
state: ParserState<'a>,
ctx: Context,
ast: AstBuilder<'a>,
module_record_builder: ModuleRecordBuilder<'a>,
is_ts: bool,
}
impl<'a, C: ParserConfig> ParserImpl<'a, C> {
#[inline]
#[expect(clippy::needless_pass_by_value)]
pub fn new(
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
options: ParseOptions,
config: C,
unique: UniquePromise,
) -> Self {
Self {
options,
lexer: Lexer::new(allocator, source_text, source_type, config.lexer_config(), unique),
source_type,
source_text,
errors: vec![],
deferred_script_errors: vec![],
fatal_error: None,
token: Token::default(),
prev_token_end: 0,
state: ParserState::new(),
ctx: Self::default_context(source_type, options),
ast: AstBuilder::new(allocator),
module_record_builder: ModuleRecordBuilder::new(allocator, source_type),
is_ts: source_type.is_typescript(),
}
}
#[inline]
pub fn parse(mut self) -> ParserReturn<'a> {
let mut program = self.parse_program();
let mut panicked = false;
if let Some(fatal_error) = self.fatal_error.take() {
panicked = true;
self.errors.truncate(fatal_error.errors_len);
if !self.lexer.errors.is_empty() && self.cur_kind().is_eof() {
} else {
self.error(fatal_error.error);
}
program = Program::dummy(self.ast.allocator);
program.source_type = self.source_type;
program.source_text = self.source_text;
}
self.check_unfinished_errors();
if let Some(overlong_error) = self.overlong_error() {
panicked = true;
self.lexer.errors.clear();
self.errors.clear();
self.error(overlong_error);
}
let mut is_flow_language = false;
let mut errors = vec![];
if (!self.lexer.errors.is_empty() || !self.errors.is_empty())
&& let Some(error) = self.flow_error()
{
is_flow_language = true;
errors.push(error);
}
let (module_record, mut module_record_errors) = self.module_record_builder.build();
if errors.len() != 1 {
errors
.reserve(self.lexer.errors.len() + self.errors.len() + module_record_errors.len());
errors.append(&mut self.lexer.errors);
errors.append(&mut self.errors);
errors.append(&mut module_record_errors);
}
let irregular_whitespaces =
std::mem::take(&mut self.lexer.trivia_builder.irregular_whitespaces).into_boxed_slice();
let source_type = program.source_type;
if source_type.is_unambiguous() {
if module_record.has_module_syntax {
program.source_type = source_type.with_module(true);
errors.append(&mut self.lexer.deferred_module_errors);
} else {
program.source_type = source_type.with_script(true);
errors.extend(self.deferred_script_errors);
}
}
let tokens = if panicked {
ArenaVec::new_in(self.ast.allocator)
} else {
self.lexer.finalize_tokens()
};
ParserReturn {
program,
module_record,
errors,
irregular_whitespaces,
tokens,
panicked,
is_flow_language,
}
}
pub fn parse_expression(mut self) -> Result<Expression<'a>, Vec<OxcDiagnostic>> {
self.bump_any();
let expr = self.parse_expr();
if let Some(FatalError { error, .. }) = self.fatal_error.take() {
return Err(vec![error]);
}
self.check_unfinished_errors();
let errors = self.lexer.errors.into_iter().chain(self.errors).collect::<Vec<_>>();
if !errors.is_empty() {
return Err(errors);
}
Ok(expr)
}
#[expect(clippy::cast_possible_truncation)]
fn parse_program(&mut self) -> Program<'a> {
self.token = self.lexer.first_token();
let hashbang = self.parse_hashbang();
self.ctx |= Context::TopLevel;
let (directives, mut statements) = self.parse_directives_and_statements();
if self.source_type.is_unambiguous()
&& self.module_record_builder.has_module_syntax()
&& !self.state.potential_await_reparse.is_empty()
{
self.reparse_potential_top_level_awaits(&mut statements);
}
let span = Span::new(0, self.source_text.len() as u32);
let comments = self.ast.vec_from_iter(self.lexer.trivia_builder.comments.iter().copied());
self.ast.program(
span,
self.source_type,
self.source_text,
comments,
hashbang,
directives,
statements,
)
}
fn reparse_potential_top_level_awaits(
&mut self,
statements: &mut oxc_allocator::Vec<'a, oxc_ast::ast::Statement<'a>>,
) {
let original_tokens =
if self.lexer.config.tokens() { Some(self.lexer.take_tokens()) } else { None };
let checkpoints = std::mem::take(&mut self.state.potential_await_reparse);
for (stmt_index, checkpoint) in checkpoints {
self.rewind(checkpoint);
let stmt = self.context_add(Context::Await, |p| {
p.parse_statement_list_item(StatementContext::StatementList)
});
if stmt_index < statements.len() {
statements[stmt_index] = stmt;
}
}
if let Some(original_tokens) = original_tokens {
self.lexer.set_tokens(original_tokens);
}
}
fn default_context(source_type: SourceType, options: ParseOptions) -> Context {
let mut ctx = Context::default().and_ambient(source_type.is_typescript_definition());
if source_type.is_module() {
ctx = ctx.and_await(true);
}
if options.allow_return_outside_function || source_type.is_commonjs() {
ctx = ctx.and_return(true);
}
ctx
}
fn flow_error(&mut self) -> Option<OxcDiagnostic> {
if !self.source_type.is_javascript() {
return None;
}
let span = self.lexer.trivia_builder.comments.first()?.span;
if span.source_text(self.source_text).contains("@flow") {
self.errors.clear();
Some(diagnostics::flow(span))
} else {
None
}
}
fn check_unfinished_errors(&mut self) {
use oxc_span::GetSpan;
for expr in self.state.cover_initialized_name.values() {
self.errors.push(diagnostics::cover_initialized_name(expr.span()));
}
}
#[cold]
fn overlong_error(&self) -> Option<OxcDiagnostic> {
if self.source_text.len() > MAX_LEN {
return Some(diagnostics::overlong_source());
}
None
}
#[inline]
fn alloc<T>(&self, value: T) -> ArenaBox<'a, T> {
self.ast.alloc(value)
}
}
#[cfg(test)]
mod test {
use std::path::Path;
use oxc_ast::ast::{CommentKind, Expression, Statement};
use oxc_span::GetSpan;
use super::*;
#[test]
fn parse_program_smoke_test() {
let allocator = Allocator::default();
let source_type = SourceType::default();
let source = "";
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(ret.program.is_empty());
assert!(ret.errors.is_empty());
assert!(!ret.is_flow_language);
}
#[test]
fn parse_expression_smoke_test() {
let allocator = Allocator::default();
let source_type = SourceType::default();
let source = "a";
let expr = Parser::new(&allocator, source, source_type).parse_expression().unwrap();
assert!(matches!(expr, Expression::Identifier(_)));
}
#[test]
fn flow_error() {
let allocator = Allocator::default();
let source_type = SourceType::default();
let sources = [
"// @flow\nasdf adsf",
"/* @flow */\n asdf asdf",
"/**
* @flow
*/
asdf asdf
",
"/* @flow */ super;",
];
for source in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(ret.is_flow_language);
assert_eq!(ret.errors.len(), 1);
assert_eq!(ret.errors.first().unwrap().to_string(), "Flow is not supported");
}
}
#[test]
fn ts_module_declaration() {
let allocator = Allocator::default();
let source_type = SourceType::from_path(Path::new("module.ts")).unwrap();
let source = "declare module 'test'\n";
let ret = Parser::new(&allocator, source, source_type).parse();
assert_eq!(ret.errors.len(), 0);
}
#[test]
fn directives() {
let allocator = Allocator::default();
let source_type = SourceType::default();
let sources = [
("import x from 'foo'; 'use strict';", 2),
("export {x} from 'foo'; 'use strict';", 2),
(";'use strict';", 2),
];
for (source, body_length) in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(ret.program.directives.is_empty(), "{source}");
assert_eq!(ret.program.body.len(), body_length, "{source}");
}
}
#[test]
fn v8_intrinsics() {
let allocator = Allocator::default();
let source_type = SourceType::default();
{
let source = "%DebugPrint('Raging against the Dying Light')";
let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
assert!(ret.errors.is_empty());
if let Some(Statement::ExpressionStatement(expr_stmt)) = ret.program.body.first() {
if let Expression::V8IntrinsicExpression(expr) = &expr_stmt.expression {
assert_eq!(expr.span().source_text(source), source);
} else {
panic!("Expected V8IntrinsicExpression");
}
} else {
panic!("Expected ExpressionStatement");
}
}
{
let source = "%DebugPrint(...illegalSpread)";
let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
assert_eq!(ret.errors.len(), 1);
assert_eq!(
ret.errors[0].to_string(),
"V8 runtime calls cannot have spread elements as arguments"
);
}
{
let source = "%DebugPrint('~~')";
let ret = Parser::new(&allocator, source, source_type).parse();
assert_eq!(ret.errors.len(), 1);
assert_eq!(ret.errors[0].to_string(), "Unexpected token");
}
{
let source = "interface Props extends %enuProps {}";
let source_type = SourceType::default().with_typescript(true);
let opts = ParseOptions { allow_v8_intrinsics: true, ..ParseOptions::default() };
let ret = Parser::new(&allocator, source, source_type).with_options(opts).parse();
assert_eq!(ret.errors.len(), 1);
let ret = Parser::new(&allocator, source, source_type).parse();
assert_eq!(ret.errors.len(), 1);
}
}
#[test]
fn comments() {
let allocator = Allocator::default();
let source_type = SourceType::default().with_typescript(true);
let sources = [
("// line comment", CommentKind::Line),
("/* line comment */", CommentKind::SingleLineBlock),
(
"type Foo = ( /* Require properties which are not generated automatically. */ 'bar')",
CommentKind::SingleLineBlock,
),
];
for (source, kind) in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
let comments = &ret.program.comments;
assert_eq!(comments.len(), 1, "{source}");
assert_eq!(comments.first().unwrap().kind, kind, "{source}");
}
}
#[test]
fn hashbang() {
let allocator = Allocator::default();
let source_type = SourceType::default();
let source = "#!/usr/bin/node\n;";
let ret = Parser::new(&allocator, source, source_type).parse();
assert_eq!(ret.program.hashbang.unwrap().value.as_str(), "/usr/bin/node");
}
#[test]
fn unambiguous() {
let allocator = Allocator::default();
let source_type = SourceType::unambiguous();
assert!(source_type.is_unambiguous());
let sources = ["import x from 'foo';", "export {x} from 'foo';", "import.meta"];
for source in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(ret.program.source_type.is_module());
}
let sources = ["", "import('foo')"];
for source in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(ret.program.source_type.is_script());
}
}
#[test]
fn binary_file() {
let allocator = Allocator::default();
let source_type = SourceType::default();
let ret = Parser::new(&allocator, "\u{FFFD}", source_type).parse();
assert!(ret.program.is_empty());
assert_eq!(ret.errors.len(), 1);
assert_eq!(ret.errors[0].to_string(), "File appears to be binary.");
let ret = Parser::new(&allocator, "\"oops \u{FFFD} oops\";", source_type).parse();
assert!(!ret.program.is_empty());
assert!(ret.errors.is_empty());
}
#[test]
fn memory_leak() {
let allocator = Allocator::default();
let source_type = SourceType::default();
let sources = ["2n", ";'1234567890123456789012345678901234567890'"];
for source in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(!ret.program.body.is_empty());
}
}
#[cfg(target_pointer_width = "64")]
#[cfg(not(miri))]
#[test]
fn overlong_source() {
use std::{
alloc::{self, Layout},
ptr::NonNull,
slice, str,
};
struct ZeroedString {
ptr: NonNull<u8>,
}
impl ZeroedString {
const LEN: usize = MAX_LEN + 1;
const PAGE_SIZE: usize = 4096;
const LAYOUT: Layout = match Layout::from_size_align(Self::LEN, Self::PAGE_SIZE) {
Ok(layout) => layout,
Err(_) => panic!("Failed to create layout"),
};
fn new() -> Self {
let ptr = unsafe { alloc::alloc_zeroed(Self::LAYOUT) };
let Some(ptr) = NonNull::new(ptr) else {
panic!("Failed to allocate {} bytes", Self::LEN);
};
Self { ptr }
}
fn as_str(&self) -> &str {
unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(self.ptr.as_ptr(), Self::LEN))
}
}
}
impl Drop for ZeroedString {
fn drop(&mut self) {
unsafe { alloc::dealloc(self.ptr.as_ptr(), Self::LAYOUT) };
}
}
let zeroed_string = ZeroedString::new();
let source_text = zeroed_string.as_str();
let allocator = Allocator::default();
let ret = Parser::new(&allocator, source_text, SourceType::default()).parse();
assert!(ret.program.is_empty());
assert!(ret.panicked);
assert_eq!(ret.errors.len(), 1);
assert_eq!(ret.errors.first().unwrap().to_string(), "Source length exceeds 4 GiB limit");
}
#[cfg(not(debug_assertions))]
#[cfg(not(miri))]
#[test]
fn legal_length_source() {
let head = "const x = 1;\n/*";
let foot = "*/\nconst y = 2;\n";
let mut source = "x".repeat(MAX_LEN);
source.replace_range(..head.len(), head);
source.replace_range(MAX_LEN - foot.len().., foot);
assert_eq!(source.len(), MAX_LEN);
let allocator = Allocator::default();
let ret = Parser::new(&allocator, &source, SourceType::default()).parse();
assert!(!ret.panicked);
assert!(ret.errors.is_empty());
assert_eq!(ret.program.body.len(), 2);
}
}