use memchr::memchr;
use oxc_allocator::Allocator;
use oxc_ast::ast::{BigIntLiteral, NumericLiteral};
use oxc_ast_visit::Visit;
use oxc_parser::{ParseOptions, Parser};
use oxc_span::SourceType;
use crate::{
firstpass::FirstPass,
parse::{Item, ItemBody},
strings::CowStr,
};
const MAX_MDX_NESTING: u32 = 32;
pub(crate) fn dedent_expression_continuation(
s: &str,
container_content_col: usize,
) -> alloc::borrow::Cow<'_, str> {
if !s.contains('\n') && !s.contains('\r') {
return alloc::borrow::Cow::Borrowed(s);
}
const INDENT: usize = 2;
const TAB_SIZE: usize = 4;
let base_col = if container_content_col == 0 {
1
} else {
container_content_col
};
let bytes = s.as_bytes();
let mut out = alloc::string::String::with_capacity(s.len());
let mut i = 0;
while i < bytes.len() && bytes[i] != b'\n' && bytes[i] != b'\r' {
i += 1;
}
out.push_str(&s[..i]);
while i < bytes.len() {
let line_end_start = i;
if bytes[i] == b'\r' {
i += 1;
if i < bytes.len() && bytes[i] == b'\n' {
i += 1;
}
} else if bytes[i] == b'\n' {
i += 1;
} else {
break;
}
out.push_str(&s[line_end_start..i]);
let mut stripped = 0usize;
let mut column = base_col - 1;
while i < bytes.len() && stripped < INDENT {
let b = bytes[i];
if b == b' ' {
stripped += 1;
column += 1;
i += 1;
} else if b == b'\t' {
let next_col = (column / TAB_SIZE + 1) * TAB_SIZE;
let tab_width = next_col - column;
let to_strip = (INDENT - stripped).min(tab_width);
stripped += to_strip;
for _ in 0..(tab_width - to_strip) {
out.push(PHANTOM_SPACE);
}
column = next_col;
i += 1;
} else {
break;
}
}
let rest_start = i;
while i < bytes.len() && bytes[i] != b'\n' && bytes[i] != b'\r' {
i += 1;
}
out.push_str(&s[rest_start..i]);
}
alloc::borrow::Cow::Owned(out)
}
pub(crate) const PHANTOM_SPACE: char = '\u{F002}';
fn strip_expression_indent(
s: &str,
container_content_col: usize,
extra_strip_cols: usize,
) -> alloc::string::String {
const INDENT_SIZE: usize = 2;
const TAB_WIDTH: usize = 4;
let base_col = if container_content_col == 0 {
1
} else {
container_content_col
};
let strip_cols = INDENT_SIZE + extra_strip_cols;
let mut result = alloc::string::String::with_capacity(s.len());
let mut at_line_start = false;
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
if c == b'\n' {
result.push('\n');
i += 1;
at_line_start = true;
continue;
}
if c == b'\r' {
result.push('\r');
i += 1;
if i < bytes.len() && bytes[i] == b'\n' {
result.push('\n');
i += 1;
}
at_line_start = true;
continue;
}
if !at_line_start {
let ch_len = char_len_utf8(c);
result.push_str(&s[i..i + ch_len]);
i += ch_len;
continue;
}
let mut cols_consumed = 0usize;
let mut col = base_col;
while i < bytes.len() && cols_consumed < strip_cols {
match bytes[i] {
b' ' => {
cols_consumed += 1;
col += 1;
i += 1;
}
b'\t' => {
let tab_cols = TAB_WIDTH - ((col - 1) % TAB_WIDTH);
let want = strip_cols - cols_consumed;
if want >= tab_cols {
cols_consumed += tab_cols;
col += tab_cols;
i += 1;
} else {
let keep_cols = tab_cols - want;
for _ in 0..keep_cols {
result.push(PHANTOM_SPACE);
}
i += 1;
break;
}
}
_ => break,
}
}
at_line_start = false;
}
result
}
fn strip_attr_continuation_indent(s: &str) -> alloc::borrow::Cow<'_, str> {
if !s.contains('\n') && !s.contains('\r') {
return alloc::borrow::Cow::Borrowed(s);
}
let mut result = alloc::string::String::with_capacity(s.len());
let mut at_line_start = false;
for c in s.chars() {
if c == '\n' || c == '\r' {
result.push(c);
at_line_start = true;
} else if at_line_start && (c == ' ' || c == '\t') {
continue;
} else {
at_line_start = false;
result.push(c);
}
}
alloc::borrow::Cow::Owned(result)
}
use crate::utils::decode_html_entities as decode_attr_entities;
fn is_mdx_unicode_whitespace(s: &[u8], ix: usize) -> bool {
let b = s[ix];
if b.is_ascii_whitespace() {
return true;
}
if b < 0x80 {
return false;
}
let len = char_len_utf8(b);
let end = ix + len;
if end > s.len() {
return false;
}
let Ok(text) = core::str::from_utf8(&s[ix..end]) else {
return false;
};
let c = text.chars().next().unwrap();
matches!(
c,
'\u{00A0}' | '\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' | '\u{FEFF}'
)
}
fn char_len_utf8(b: u8) -> usize {
match b {
0x00..=0x7F => 1,
0xC0..=0xDF => 2,
0xE0..=0xEF => 3,
0xF0..=0xFF => 4,
_ => 1,
}
}
fn decode_utf8_char(s: &[u8], ix: usize) -> Option<char> {
core::str::from_utf8(&s[ix..]).ok()?.chars().next()
}
fn is_jsx_name_start(s: &[u8], ix: usize) -> bool {
let b = s[ix];
if b < 0x80 {
return b.is_ascii_alphabetic() || b == b'_' || b == b'$';
}
decode_utf8_char(s, ix).is_some_and(unicode_id_start::is_id_start)
}
fn is_jsx_name_continue(s: &[u8], ix: usize) -> bool {
let b = s[ix];
if b < 0x80 {
return b.is_ascii_alphanumeric() || matches!(b, b'-' | b'_' | b'$');
}
decode_utf8_char(s, ix).is_some_and(unicode_id_start::is_id_continue)
}
pub(crate) enum EsmParseResult {
Complete,
Incomplete,
Error,
}
pub(crate) fn try_parse_expression_body(
value: &str,
allocator: &mut Allocator,
) -> Option<(usize, String)> {
let source_type = SourceType::mjs().with_jsx(true);
let trimmed = value.trim();
if trimmed.is_empty() {
return None;
}
allocator.reset();
let wrapped = alloc::format!("({value})");
let source = allocator.alloc_str(&wrapped);
let ret = Parser::new(allocator, source, source_type)
.with_options(ParseOptions::default())
.parse();
if ret.errors.is_empty() {
let mut finder = LegacyOctalFinder::default();
finder.visit_program(&ret.program);
if let Some(offset) = finder.offset {
return Some((offset.saturating_sub(1), "Invalid number".to_string()));
}
return None;
}
let bytes = value.as_bytes();
let mut i = 0;
let mut has_non_ws = false;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
i += 2;
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
} else if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
let comment_start = i;
i += 2;
let mut closed = false;
while i + 1 < bytes.len() {
if bytes[i] == b'*' && bytes[i + 1] == b'/' {
closed = true;
i += 2;
break;
}
i += 1;
}
if !closed {
return Some((comment_start, "Unterminated block comment".to_string()));
}
} else {
if !matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
has_non_ws = true;
}
i += 1;
}
}
if !has_non_ws {
return None;
}
let first = ret.errors.first()?;
let err_offset = first
.labels
.as_ref()
.and_then(|labels| labels.first().map(|l| l.offset()))
.map(|o| o.saturating_sub(1))
.unwrap_or(value.len());
Some((err_offset, first.message.to_string()))
}
#[derive(Default)]
struct LegacyOctalFinder {
offset: Option<usize>,
}
impl LegacyOctalFinder {
fn check_raw(&mut self, raw: Option<&str>, span_start: u32) {
if self.offset.is_some() {
return;
}
let raw = match raw {
Some(r) => r.as_bytes(),
None => return,
};
if raw.len() >= 2 && raw[0] == b'0' && raw[1].is_ascii_digit() {
self.offset = Some(span_start as usize);
}
}
}
impl<'a> Visit<'a> for LegacyOctalFinder {
fn visit_numeric_literal(&mut self, lit: &NumericLiteral<'a>) {
self.check_raw(lit.raw.as_deref(), lit.span.start);
}
fn visit_big_int_literal(&mut self, lit: &BigIntLiteral<'a>) {
self.check_raw(lit.raw.as_deref(), lit.span.start);
}
}
pub(crate) fn try_parse_esm(value: &str, allocator: &mut Allocator) -> EsmParseResult {
allocator.reset();
let source_type = SourceType::mjs().with_jsx(true);
let source = allocator.alloc_str(value);
let ret = Parser::new(allocator, source, source_type)
.with_options(ParseOptions::default())
.parse();
if ret.errors.is_empty() {
return EsmParseResult::Complete;
}
let error = &ret.errors[0];
let error_offset = error
.labels
.as_ref()
.and_then(|labels| labels.first().map(|l| l.offset()))
.unwrap_or(value.len());
if error_offset >= value.len() {
EsmParseResult::Incomplete
} else {
EsmParseResult::Error
}
}
const REGEX_KEYWORDS: &[&[u8]] = &[
b"await",
b"case",
b"delete",
b"in",
b"instanceof",
b"new",
b"of",
b"return",
b"throw",
b"typeof",
b"void",
b"yield",
];
fn slash_is_regex(bytes: &[u8], pos: usize) -> bool {
let mut i = pos;
while i > 0 {
i -= 1;
match bytes[i] {
b' ' | b'\t' | b'\n' | b'\r' => continue,
b')' | b']' => return false,
b'"' | b'\'' | b'`' => return false,
b'+' if i > 0 && bytes[i - 1] == b'+' => return false,
b'-' if i > 0 && bytes[i - 1] == b'-' => return false,
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'$' => {
let end = i + 1;
while i > 0
&& (bytes[i - 1].is_ascii_alphanumeric()
|| bytes[i - 1] == b'_'
|| bytes[i - 1] == b'$')
{
i -= 1;
}
let word = &bytes[i..end];
let is_keyword_boundary = i == 0
|| (!bytes[i - 1].is_ascii_alphanumeric()
&& bytes[i - 1] != b'_'
&& bytes[i - 1] != b'$');
if is_keyword_boundary && REGEX_KEYWORDS.contains(&word) {
return true;
}
return false;
}
_ => return true,
}
}
true
}
fn scan_regex(bytes: &[u8], start: usize) -> usize {
let mut ix = start + 1;
while ix < bytes.len() {
match bytes[ix] {
b'/' => {
ix += 1;
while ix < bytes.len() && bytes[ix].is_ascii_alphanumeric() {
ix += 1;
}
return ix;
}
b'\\' => ix += 2,
b'[' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b']' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\n' | b'\r' => return ix,
_ => ix += 1,
}
}
ix
}
fn is_blank_line_next(bytes: &[u8], ix: usize) -> bool {
let mut j = ix + 1;
if bytes[ix] == b'\r' && j < bytes.len() && bytes[j] == b'\n' {
j += 1;
}
let mut k = j;
while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
k += 1;
}
k >= bytes.len() || bytes[k] == b'\n' || bytes[k] == b'\r'
}
pub(crate) type ContainerLineCheck<'a> = &'a dyn Fn(&[u8]) -> Option<usize>;
#[derive(Copy, Clone, Eq, PartialEq)]
enum LineMode {
Strict,
Lazy,
}
fn check_container_after_newline(
bytes: &[u8],
ix: &mut usize,
container_check: &Option<ContainerLineCheck<'_>>,
) -> Option<()> {
if let Some(check) = container_check {
if *ix < bytes.len() {
if let Some(skip) = check(&bytes[*ix..]) {
*ix += skip;
} else {
return None;
}
}
}
Some(())
}
fn check_container_after_newline_lazy(
bytes: &[u8],
ix: &mut usize,
container_check: &Option<ContainerLineCheck<'_>>,
) -> LineMode {
if let Some(check) = container_check {
if *ix < bytes.len() {
if let Some(skip) = check(&bytes[*ix..]) {
*ix += skip;
return LineMode::Strict;
}
return LineMode::Lazy;
}
}
LineMode::Strict
}
fn scan_mdx_expression_end(bytes: &[u8], inline: bool) -> Option<usize> {
scan_mdx_expression_end_inner(bytes, inline, None, false, true, 0)
}
fn scan_mdx_expression_end_inner(
bytes: &[u8],
inline: bool,
container_check: Option<ContainerLineCheck<'_>>,
lazy_mode: bool,
allow_lazy_body: bool,
nesting_depth: u32,
) -> Option<usize> {
if nesting_depth > MAX_MDX_NESTING {
return None;
}
if bytes.is_empty() || bytes[0] != b'{' {
return None;
}
let mut ix = 1;
let mut depth: usize = 1;
let mut current_line_lazy = false;
let mut prev_was_value = false;
macro_rules! mark_value {
() => {
prev_was_value = true;
};
}
macro_rules! mark_op {
() => {
prev_was_value = false;
};
}
macro_rules! reject_if_lazy {
() => {
if lazy_mode && !allow_lazy_body && current_line_lazy {
return None;
}
};
}
while ix < bytes.len() && depth > 0 {
match bytes[ix] {
b'\n' => {
if inline && is_blank_line_next(bytes, ix) {
return None;
}
ix += 1;
if lazy_mode {
current_line_lazy =
check_container_after_newline_lazy(bytes, &mut ix, &container_check)
== LineMode::Lazy;
} else {
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
}
b'\r' => {
if inline && is_blank_line_next(bytes, ix) {
return None;
}
ix += 1;
if ix < bytes.len() && bytes[ix] == b'\n' {
ix += 1;
}
if lazy_mode {
current_line_lazy =
check_container_after_newline_lazy(bytes, &mut ix, &container_check)
== LineMode::Lazy;
} else {
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
}
b' ' | b'\t' => {
ix += 1;
}
b'{' => {
reject_if_lazy!();
depth += 1;
ix += 1;
mark_op!();
}
b'}' => {
depth -= 1;
if depth == 0 {
reject_if_lazy!();
return Some(ix + 1);
}
reject_if_lazy!();
ix += 1;
mark_value!();
}
b'"' | b'\'' => {
reject_if_lazy!();
if bytes[ix] == b'\''
&& ix > 0
&& (bytes[ix - 1].is_ascii_alphanumeric() || bytes[ix - 1] == b'_')
{
ix += 1;
continue;
}
let quote = bytes[ix];
ix += 1;
while ix < bytes.len()
&& bytes[ix] != quote
&& bytes[ix] != b'\n'
&& bytes[ix] != b'\r'
{
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() && bytes[ix] == quote {
ix += 1;
}
mark_value!();
}
b'`' => {
reject_if_lazy!();
ix += 1;
let mut template_depth: usize = 0;
while ix < bytes.len() {
match bytes[ix] {
b'`' if template_depth == 0 => {
ix += 1;
break;
}
b'\\' => {
ix += 2;
continue;
}
b'$' if ix + 1 < bytes.len() && bytes[ix + 1] == b'{' => {
template_depth += 1;
ix += 2;
continue;
}
b'{' if template_depth > 0 => template_depth += 1,
b'}' if template_depth > 0 => template_depth -= 1,
b'\n' | b'\r' if inline && is_blank_line_next(bytes, ix) => {
return None;
}
_ => {}
}
ix += 1;
}
mark_value!();
}
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'/' => {
reject_if_lazy!();
ix += 2;
while ix < bytes.len() && bytes[ix] != b'\n' {
ix += 1;
}
}
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'*' => {
reject_if_lazy!();
ix += 2;
while ix + 1 < bytes.len() {
if bytes[ix] == b'*' && bytes[ix + 1] == b'/' {
ix += 2;
break;
}
if inline
&& (bytes[ix] == b'\n' || bytes[ix] == b'\r')
&& is_blank_line_next(bytes, ix)
{
return None;
}
ix += 1;
}
}
b'/' if {
let prev_is_ident_char = {
let mut j = ix;
while j > 0 && matches!(bytes[j - 1], b' ' | b'\t') {
j -= 1;
}
j > 0
&& (bytes[j - 1].is_ascii_alphanumeric()
|| bytes[j - 1] == b'_'
|| bytes[j - 1] == b'$')
};
let force_division = prev_was_value && !prev_is_ident_char;
!force_division && slash_is_regex(bytes, ix)
} =>
{
reject_if_lazy!();
ix = scan_regex(bytes, ix);
mark_value!();
}
b'<' if ix + 1 < bytes.len()
&& (bytes[ix + 1].is_ascii_alphabetic()
|| bytes[ix + 1] == b'_'
|| bytes[ix + 1] == b'$'
|| bytes[ix + 1] == b'/'
|| bytes[ix + 1] == b'>') =>
{
reject_if_lazy!();
if let Some(end) = scan_mdx_jsx_tag_end_inner(&bytes[ix..], None, nesting_depth + 1)
{
ix += end;
mark_value!();
} else {
ix += 1;
mark_op!();
}
}
b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'$' | b'0'..=b'9' => {
reject_if_lazy!();
ix += 1;
mark_value!();
}
b')' | b']' => {
reject_if_lazy!();
ix += 1;
mark_value!();
}
_ => {
reject_if_lazy!();
ix += 1;
mark_op!();
}
}
}
None
}
fn scan_to_line_end(bytes: &[u8], start: usize) -> Option<usize> {
let eol = memchr(b'\n', &bytes[start..])
.map(|i| start + i + 1)
.unwrap_or(bytes.len());
Some(eol)
}
fn scan_mdx_jsx_tag_end(bytes: &[u8]) -> Option<usize> {
scan_mdx_jsx_tag_end_inner(bytes, None, 0)
}
fn scan_mdx_jsx_tag_end_inner(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
nesting_depth: u32,
) -> Option<usize> {
if nesting_depth > MAX_MDX_NESTING {
return None;
}
let mut ix = 1;
let is_closing = ix < bytes.len() && bytes[ix] == b'/';
if is_closing {
ix += 1;
while ix < bytes.len() && is_mdx_unicode_whitespace(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
}
}
if ix < bytes.len() && bytes[ix] == b'>' {
return Some(ix + 1);
}
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
let mut saw_namespace = false;
let mut saw_member = false;
loop {
while ix < bytes.len() && is_jsx_name_continue(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
}
let save = ix;
while ix < bytes.len() && is_mdx_unicode_whitespace(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
}
if ix >= bytes.len() {
ix = save;
break;
}
match bytes[ix] {
b':' => {
if saw_namespace || saw_member {
ix = save;
break;
}
saw_namespace = true;
ix += 1;
}
b'.' => {
if saw_namespace {
ix = save;
break;
}
saw_member = true;
ix += 1;
}
_ => {
ix = save;
break;
}
}
while ix < bytes.len() && is_mdx_unicode_whitespace(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
}
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
}
if ix < bytes.len() {
match bytes[ix] {
b'>' | b'/' | b'{' => {}
_ if is_mdx_unicode_whitespace(bytes, ix) => {}
_ => return None,
}
}
loop {
while ix < bytes.len() {
match bytes[ix] {
b' ' | b'\t' => ix += 1,
b'\n' | b'\r' => {
let was_cr = bytes[ix] == b'\r';
ix += 1;
if was_cr && ix < bytes.len() && bytes[ix] == b'\n' {
ix += 1;
}
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, ix) => {
ix += char_len_utf8(bytes[ix]);
}
_ => break,
}
}
if ix >= bytes.len() {
return None;
}
match bytes[ix] {
b'>' => return Some(ix + 1),
_ if is_closing => return None,
b'/' => {
let mut j = ix + 1;
while j < bytes.len() {
match bytes[j] {
b' ' | b'\t' => j += 1,
b'\n' | b'\r' => {
let was_cr = bytes[j] == b'\r';
j += 1;
if was_cr && j < bytes.len() && bytes[j] == b'\n' {
j += 1;
}
check_container_after_newline(bytes, &mut j, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, j) => {
j += char_len_utf8(bytes[j]);
}
_ => break,
}
}
if j < bytes.len() && bytes[j] == b'>' {
return Some(j + 1);
}
return None;
}
b'{' => {
if !looks_like_spread(&bytes[ix..]) {
return None;
}
let expr_len = scan_mdx_expression_end_inner(
&bytes[ix..],
false,
None,
false,
true,
nesting_depth + 1,
)?;
ix += expr_len;
}
_ if is_jsx_name_start(bytes, ix) => {
ix += char_len_utf8(bytes[ix]);
let mut attr_saw_namespace = false;
while ix < bytes.len() {
if bytes[ix] == b':' {
if attr_saw_namespace {
return None;
}
attr_saw_namespace = true;
ix += 1;
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
} else if is_jsx_name_continue(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
} else {
break;
}
}
let mut peek = ix;
while peek < bytes.len() {
match bytes[peek] {
b' ' | b'\t' => peek += 1,
b'\n' | b'\r' => {
let was_cr = bytes[peek] == b'\r';
peek += 1;
if was_cr && peek < bytes.len() && bytes[peek] == b'\n' {
peek += 1;
}
check_container_after_newline(bytes, &mut peek, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, peek) => {
peek += char_len_utf8(bytes[peek]);
}
_ => break,
}
}
if peek < bytes.len() && bytes[peek] == b'=' {
ix = peek + 1;
while ix < bytes.len() {
match bytes[ix] {
b' ' | b'\t' => ix += 1,
b'\n' | b'\r' => {
let was_cr = bytes[ix] == b'\r';
ix += 1;
if was_cr && ix < bytes.len() && bytes[ix] == b'\n' {
ix += 1;
}
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, ix) => {
ix += char_len_utf8(bytes[ix]);
}
_ => break,
}
}
if ix >= bytes.len() {
return None;
}
match bytes[ix] {
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
ix += 1;
}
if ix >= bytes.len() {
return None;
}
ix += 1;
}
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
ix += 1;
}
if ix >= bytes.len() {
return None;
}
ix += 1;
}
b'{' => {
let expr_len = scan_mdx_expression_end_inner(
&bytes[ix..],
false,
None,
false,
true,
nesting_depth + 1,
)?;
ix += expr_len;
}
_ => return None,
}
}
}
_ => return None,
}
}
}
fn looks_like_spread(bytes: &[u8]) -> bool {
debug_assert_eq!(bytes.first(), Some(&b'{'));
let mut i = 1;
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
i += 1;
}
i + 2 < bytes.len() && &bytes[i..i + 3] == b"..."
}
pub(crate) fn scan_mdx_esm(bytes: &[u8]) -> Option<usize> {
let is_import = bytes.starts_with(b"import ")
|| bytes.starts_with(b"import\t")
|| bytes.starts_with(b"import{");
let is_export = bytes.starts_with(b"export ")
|| bytes.starts_with(b"export\t")
|| bytes.starts_with(b"export{")
|| bytes.starts_with(b"export*")
|| bytes.starts_with(b"export\n")
|| bytes.starts_with(b"export\r");
if !is_import && !is_export {
return None;
}
let mut ix = 0;
loop {
let eol = memchr(b'\n', &bytes[ix..])
.map(|i| ix + i + 1)
.unwrap_or(bytes.len());
ix = eol;
if ix >= bytes.len() || bytes[ix] == b'\n' || bytes[ix] == b'\r' {
break;
}
}
Some(ix)
}
pub(crate) fn scan_mdx_jsx_block(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
if bytes.len() < 2 || bytes[0] != b'<' {
return None;
}
let is_closing = bytes[1] == b'/';
let mut name_start = if is_closing { 2 } else { 1 };
if is_closing {
while name_start < bytes.len() && is_mdx_unicode_whitespace(bytes, name_start) {
name_start += char_len_utf8(bytes[name_start]);
}
}
if name_start >= bytes.len() {
return None;
}
let mut pos = if bytes[name_start] == b'>' {
name_start + 1
} else {
if !is_jsx_name_start(bytes, name_start) {
return None;
}
scan_mdx_jsx_tag_end_inner(bytes, container_check, 0)?
};
let mut last_was_jsx = true;
loop {
while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
pos += 1;
}
if pos >= bytes.len() || bytes[pos] == b'\n' || bytes[pos] == b'\r' {
break;
}
if bytes[pos] == b'<' {
if let Some(end) = scan_mdx_jsx_tag_end_inner(&bytes[pos..], container_check, 0) {
pos += end;
last_was_jsx = true;
continue;
}
}
if bytes[pos] == b'{' {
if let Some(len) = scan_mdx_expression_end(&bytes[pos..], false) {
if !last_was_jsx {
return None;
}
pos += len;
last_was_jsx = false;
continue;
}
}
return None;
}
scan_to_line_end(bytes, pos)
}
pub(crate) fn scan_mdx_expression_block(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
let mut ix = scan_mdx_expression_end_inner(bytes, false, container_check, false, true, 0)?;
let mut last_was_jsx = false;
loop {
while ix < bytes.len() && (bytes[ix] == b' ' || bytes[ix] == b'\t') {
ix += 1;
}
if ix >= bytes.len() || bytes[ix] == b'\n' || bytes[ix] == b'\r' {
break;
}
if bytes[ix] == b'<' {
if let Some(end) = scan_mdx_jsx_tag_end_inner(&bytes[ix..], container_check, 0) {
ix += end;
last_was_jsx = true;
continue;
}
if ix + 1 < bytes.len() && bytes[ix + 1] == b'>' {
ix += 2;
last_was_jsx = true;
continue;
}
if ix + 2 < bytes.len() && bytes[ix + 1] == b'/' && bytes[ix + 2] == b'>' {
ix += 3;
last_was_jsx = true;
continue;
}
}
if bytes[ix] == b'{' {
if let Some(len) = scan_mdx_expression_end(&bytes[ix..], true) {
if !last_was_jsx {
return None;
}
ix += len;
last_was_jsx = false;
continue;
}
}
return None;
}
scan_to_line_end(bytes, ix)
}
pub(crate) fn scan_mdx_inline_expression(bytes: &[u8]) -> Option<(usize, usize, usize)> {
let total = scan_mdx_expression_end(bytes, true)?;
Some((1, total - 1, total))
}
pub(crate) fn scan_mdx_inline_expression_in_container(
bytes: &[u8],
container_check: ContainerLineCheck<'_>,
allow_lazy_body: bool,
) -> Option<(usize, usize, usize)> {
let total = scan_mdx_expression_end_inner(
bytes,
true,
Some(container_check),
true,
allow_lazy_body,
0,
)?;
Some((1, total - 1, total))
}
pub(crate) fn scan_mdx_inline_jsx(bytes: &[u8]) -> Option<usize> {
scan_mdx_inline_jsx_inner(bytes, None)
}
fn scan_mdx_inline_jsx_inner(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
if bytes.len() < 2 || bytes[0] != b'<' {
return None;
}
let is_closing = bytes[1] == b'/';
let mut name_start = if is_closing { 2 } else { 1 };
if is_closing {
while name_start < bytes.len() && is_mdx_unicode_whitespace(bytes, name_start) {
name_start += char_len_utf8(bytes[name_start]);
}
}
if name_start >= bytes.len() {
return None;
}
if bytes[name_start] == b'>' {
return Some(name_start + 1);
}
if !is_jsx_name_start(bytes, name_start) {
return None;
}
let mut ix = name_start + char_len_utf8(bytes[name_start]);
let mut saw_namespace = false;
let mut saw_member = false;
loop {
while ix < bytes.len() && is_jsx_name_continue(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
}
let save = ix;
while ix < bytes.len() && is_mdx_unicode_whitespace(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
}
if ix >= bytes.len() {
ix = save;
break;
}
match bytes[ix] {
b':' => {
if saw_namespace || saw_member {
ix = save;
break;
}
saw_namespace = true;
ix += 1;
}
b'.' => {
if saw_namespace {
ix = save;
break;
}
saw_member = true;
ix += 1;
}
_ => {
ix = save;
break;
}
}
while ix < bytes.len() && is_mdx_unicode_whitespace(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
}
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
}
if ix < bytes.len() {
match bytes[ix] {
b'>' | b'/' | b'{' => {}
_ if is_mdx_unicode_whitespace(bytes, ix) => {}
_ => return None,
}
}
loop {
while ix < bytes.len() {
match bytes[ix] {
b' ' | b'\t' => ix += 1,
b'\n' | b'\r' => {
let was_cr = bytes[ix] == b'\r';
ix += 1;
if was_cr && ix < bytes.len() && bytes[ix] == b'\n' {
ix += 1;
}
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, ix) => {
ix += char_len_utf8(bytes[ix]);
}
_ => break,
}
}
if ix >= bytes.len() {
return None;
}
match bytes[ix] {
b'>' => return Some(ix + 1),
_ if is_closing => return None,
b'/' => {
let mut j = ix + 1;
while j < bytes.len() {
match bytes[j] {
b' ' | b'\t' => j += 1,
b'\n' | b'\r' => {
let was_cr = bytes[j] == b'\r';
j += 1;
if was_cr && j < bytes.len() && bytes[j] == b'\n' {
j += 1;
}
check_container_after_newline(bytes, &mut j, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, j) => {
j += char_len_utf8(bytes[j]);
}
_ => break,
}
}
if j < bytes.len() && bytes[j] == b'>' {
return Some(j + 1);
}
return None;
}
b'{' => {
if !looks_like_spread(&bytes[ix..]) {
return None;
}
let expr_len = scan_mdx_expression_end(&bytes[ix..], false)?;
ix += expr_len;
}
_ if is_jsx_name_start(bytes, ix) => {
ix += char_len_utf8(bytes[ix]);
let mut attr_saw_namespace = false;
while ix < bytes.len() {
if bytes[ix] == b':' {
if attr_saw_namespace {
return None;
}
attr_saw_namespace = true;
ix += 1;
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
} else if is_jsx_name_continue(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
} else {
break;
}
}
let mut peek = ix;
while peek < bytes.len() {
match bytes[peek] {
b' ' | b'\t' => peek += 1,
b'\n' | b'\r' => {
let was_cr = bytes[peek] == b'\r';
peek += 1;
if was_cr && peek < bytes.len() && bytes[peek] == b'\n' {
peek += 1;
}
check_container_after_newline(bytes, &mut peek, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, peek) => {
peek += char_len_utf8(bytes[peek]);
}
_ => break,
}
}
if peek < bytes.len() && bytes[peek] == b'=' {
ix = peek + 1;
while ix < bytes.len() {
match bytes[ix] {
b' ' | b'\t' => ix += 1,
b'\n' | b'\r' => {
let was_cr = bytes[ix] == b'\r';
ix += 1;
if was_cr && ix < bytes.len() && bytes[ix] == b'\n' {
ix += 1;
}
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
_ if is_mdx_unicode_whitespace(bytes, ix) => {
ix += char_len_utf8(bytes[ix]);
}
_ => break,
}
}
if ix >= bytes.len() {
return None;
}
match bytes[ix] {
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
ix += 1;
}
if ix >= bytes.len() {
return None;
}
ix += 1;
}
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
ix += 1;
}
if ix >= bytes.len() {
return None;
}
ix += 1;
}
b'{' => {
let expr_len = scan_mdx_expression_end(&bytes[ix..], false)?;
ix += expr_len;
}
_ => return None,
}
}
}
_ => return None,
}
}
}
impl<'a, 'b> FirstPass<'a, 'b> {
pub(crate) fn parse_mdx_esm(&mut self, start_ix: usize, end_ix: usize) -> usize {
let content = self.text[start_ix..end_ix].trim_end_matches(['\n', '\r']);
let cow_ix = self.allocs.allocate_cow(content.into());
self.tree.append(Item {
start: start_ix,
end: end_ix,
body: ItemBody::MdxEsm(cow_ix),
});
end_ix
}
pub(crate) fn parse_mdx_jsx_flow(&mut self, start_ix: usize, end_ix: usize) -> usize {
let raw = {
let stripped = self.strip_container_prefixes(start_ix, end_ix);
stripped.trim_end().to_string()
};
let orig_bytes = self.text.as_bytes();
let stripped_bytes = raw.as_bytes();
let mut map: Vec<(usize, usize, usize)> = Vec::new();
map.push((0, start_ix, 0));
{
let mut s_pos = 0usize;
let mut o_pos = start_ix;
while s_pos < stripped_bytes.len() && o_pos < end_ix {
let b = stripped_bytes[s_pos];
if b == b'\n' || b == b'\r' {
s_pos += 1;
o_pos += 1;
if b == b'\r'
&& s_pos < stripped_bytes.len()
&& stripped_bytes[s_pos] == b'\n'
&& o_pos < end_ix
&& orig_bytes[o_pos] == b'\n'
{
s_pos += 1;
o_pos += 1;
}
let mut ls = crate::scanners::LineStart::new(&orig_bytes[o_pos..end_ix]);
let _ = crate::parse::scan_containers(&self.tree, &mut ls, self.options);
o_pos += ls.bytes_scanned();
let phantom = ls.remaining_space();
map.push((s_pos, o_pos, phantom));
s_pos += phantom;
} else {
s_pos += 1;
o_pos += 1;
}
}
}
let stripped_to_orig = |s_pos: usize| -> usize {
let idx = match map.binary_search_by(|probe| probe.0.cmp(&s_pos)) {
Ok(i) => i,
Err(i) => i.saturating_sub(1),
};
let (base_s, base_o, phantom) = map[idx];
let offset = s_pos - base_s;
if offset <= phantom {
base_o
} else {
base_o + (offset - phantom)
}
};
let mut pos = 0;
while pos < raw.len() {
while pos < raw.len() && raw.as_bytes()[pos] == b' ' {
pos += 1;
}
if pos >= raw.len() {
break;
}
let remaining = &raw.as_bytes()[pos..];
if remaining[0] == b'<' {
let tag_end = scan_mdx_jsx_tag_end(remaining).unwrap_or(raw.len() - pos);
let tag_raw = &raw[pos..pos + tag_end];
let container_content_col = self.container_content_col();
let extra_strip_cols = self.directive_initial_size_sum();
let jsx_data =
parse_jsx_tag_with_column(tag_raw, container_content_col, extra_strip_cols)
.into_static();
validate_jsx_expressions(
&jsx_data.attrs,
stripped_to_orig(pos),
&mut self.mdx_expr_allocator,
&mut self.mdx_errors,
);
let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
self.tree.append(Item {
start: stripped_to_orig(pos),
end: stripped_to_orig(pos + tag_end),
body: ItemBody::MdxJsxFlowElement(jsx_ix),
});
pos += tag_end;
} else if remaining[0] == b'{' {
let expr_end = scan_mdx_expression_end(remaining, false).unwrap_or(raw.len() - pos);
let inner_raw = &raw[pos + 1..pos + expr_end - 1];
let inner: CowStr<'static> = CowStr::from(
dedent_expression_continuation(inner_raw, self.container_content_col())
.into_owned(),
);
let inner_for_validate: alloc::borrow::Cow<'_, str> =
if inner.contains(PHANTOM_SPACE) {
alloc::borrow::Cow::Owned(inner.replace(PHANTOM_SPACE, ""))
} else {
alloc::borrow::Cow::Borrowed(inner.as_ref())
};
if let Some((err_offset, detail)) =
try_parse_expression_body(&inner_for_validate, &mut self.mdx_expr_allocator)
{
self.mdx_errors.push((
stripped_to_orig(pos + 1) + err_offset,
alloc::format!("Could not parse expression with oxc: {detail}"),
));
}
let cow_ix = self.allocs.allocate_cow(inner);
self.tree.append(Item {
start: stripped_to_orig(pos),
end: stripped_to_orig(pos + expr_end),
body: ItemBody::MdxFlowExpression(cow_ix),
});
pos += expr_end;
} else {
break;
}
}
end_ix
}
pub(crate) fn directive_initial_size_sum(&self) -> usize {
use crate::parse::ItemBody;
if !self.options.contains(crate::Options::ENABLE_DIRECTIVE) {
return 0;
}
let mut sum = 0usize;
for &node_ix in self.tree.walk_spine() {
if let ItemBody::ContainerDirective(_, dir_ix) = self.tree[node_ix].item.body {
sum += self.allocs.directive_ref(dir_ix).initial_size as usize;
}
}
sum
}
pub(crate) fn container_content_col(&self) -> usize {
use crate::parse::ItemBody;
let mut col = 1usize;
for &node_ix in self.tree.walk_spine() {
match self.tree[node_ix].item.body {
ItemBody::BlockQuote(..) => col += 2, ItemBody::ListItem(indent, _) | ItemBody::DefinitionListDefinition(indent) => {
col += indent
}
ItemBody::FootnoteDefinition(..)
if self.options.contains(crate::Options::ENABLE_FOOTNOTES) =>
{
col += 4
}
ItemBody::ContainerDirective(..) => {}
_ => {}
}
}
col
}
pub(crate) fn inline_expression_value(
&self,
start_ix: usize,
end_ix: usize,
) -> alloc::string::String {
const INDENT: usize = 2;
const TAB_SIZE: usize = 4;
let base_col = self.container_content_col().max(1);
let bytes = self.text.as_bytes();
let mut out = alloc::string::String::with_capacity(end_ix - start_ix);
let mut pos = start_ix;
let line_end = memchr::memchr2(b'\n', b'\r', &bytes[pos..end_ix])
.map(|i| pos + i)
.unwrap_or(end_ix);
out.push_str(&self.text[pos..line_end]);
pos = line_end;
while pos < end_ix {
if bytes[pos] == b'\r' {
out.push('\r');
pos += 1;
}
if pos < end_ix && bytes[pos] == b'\n' {
out.push('\n');
pos += 1;
}
if pos >= end_ix {
break;
}
let (post_prefix_col, partial_spaces) = if self.tree.spine_len() == 0 {
(0usize, 0usize)
} else {
let mut ls = LineStart::new(&bytes[pos..end_ix]);
let matched = scan_containers(&self.tree, &mut ls, self.options);
pos += ls.bytes_scanned();
let partial = ls.remaining_space();
let col = if matched == self.tree.spine_len() {
base_col - 1
} else {
0
};
(col, partial)
};
for _ in 0..partial_spaces {
out.push(' ');
}
let mut stripped = 0usize;
let mut column = post_prefix_col;
while pos < end_ix && stripped < INDENT {
let b = bytes[pos];
if b == b' ' {
stripped += 1;
column += 1;
pos += 1;
} else if b == b'\t' {
let next_col = (column / TAB_SIZE + 1) * TAB_SIZE;
let tab_width = next_col - column;
let to_strip = (INDENT - stripped).min(tab_width);
stripped += to_strip;
for _ in 0..(tab_width - to_strip) {
out.push(' ');
}
column = next_col;
pos += 1;
} else {
break;
}
}
let line_end = memchr::memchr2(b'\n', b'\r', &bytes[pos..end_ix])
.map(|i| pos + i)
.unwrap_or(end_ix);
out.push_str(&self.text[pos..line_end]);
pos = line_end;
}
out
}
pub(crate) fn strip_container_prefixes(
&self,
start_ix: usize,
end_ix: usize,
) -> alloc::borrow::Cow<'_, str> {
if self.tree.spine_len() == 0 {
return alloc::borrow::Cow::Borrowed(&self.text[start_ix..end_ix]);
}
let bytes = self.text.as_bytes();
let mut result = alloc::string::String::new();
let mut pos = start_ix;
let line_end = memchr::memchr2(b'\n', b'\r', &bytes[pos..end_ix])
.map(|i| pos + i)
.unwrap_or(end_ix);
result.push_str(&self.text[pos..line_end]);
pos = line_end;
while pos < end_ix {
if bytes[pos] == b'\r' {
result.push('\r');
pos += 1;
}
if pos < end_ix && bytes[pos] == b'\n' {
result.push('\n');
pos += 1;
}
if pos >= end_ix {
break;
}
let mut ls = LineStart::new(&bytes[pos..]);
let _ = scan_containers(&self.tree, &mut ls, self.options);
pos += ls.bytes_scanned();
for _ in 0..ls.remaining_space() {
result.push(' ');
}
let line_end = memchr::memchr2(b'\n', b'\r', &bytes[pos..end_ix])
.map(|i| pos + i)
.unwrap_or(end_ix);
result.push_str(&self.text[pos..line_end]);
pos = line_end;
}
alloc::borrow::Cow::Owned(result)
}
}
use crate::{
parse::{scan_containers, JsxAttr, JsxElementData},
scanners::LineStart,
};
impl<'a, 'b> FirstPass<'a, 'b> {
pub(crate) fn make_container_line_check(&self) -> impl Fn(&[u8]) -> Option<usize> + '_ {
move |line_bytes: &[u8]| {
let mut ls = LineStart::new(line_bytes);
let matched = scan_containers(&self.tree, &mut ls, self.options);
if matched == self.tree.spine_len() {
Some(ls.bytes_scanned())
} else {
None
}
}
}
pub(crate) fn scan_mdx_flow_in_container(
&self,
ix: usize,
scanner: impl Fn(&[u8], Option<ContainerLineCheck<'_>>) -> Option<usize>,
) -> Option<usize> {
self.scan_mdx_flow_in_container_bytes(&self.text.as_bytes()[ix..], scanner)
}
pub(crate) fn scan_mdx_flow_in_container_bytes(
&self,
bytes: &[u8],
scanner: impl Fn(&[u8], Option<ContainerLineCheck<'_>>) -> Option<usize>,
) -> Option<usize> {
if self.tree.spine_len() == 0 {
return scanner(bytes, None);
}
let check = self.make_container_line_check();
scanner(bytes, Some(&check))
}
}
pub(crate) fn parse_jsx_tag<'a>(raw: &'a str) -> JsxElementData<'a> {
parse_jsx_tag_with_column(raw, 1, 0)
}
pub(crate) fn column_at(bytes: &[u8], pos: usize) -> usize {
const TAB_WIDTH: usize = 4;
let mut line_start = pos;
while line_start > 0 && bytes[line_start - 1] != b'\n' && bytes[line_start - 1] != b'\r' {
line_start -= 1;
}
let mut col: usize = 1;
let mut i = line_start;
while i < pos {
if bytes[i] == b'\t' {
col += TAB_WIDTH - ((col - 1) % TAB_WIDTH);
} else {
col += 1;
}
i += 1;
}
col
}
pub(crate) fn parse_jsx_tag_with_column<'a>(
raw: &'a str,
container_content_col: usize,
extra_strip_cols: usize,
) -> JsxElementData<'a> {
let s = raw.trim();
if let Some(rest) = s.strip_prefix("</") {
let name = extract_tag_name(rest.trim_start()).into_owned();
return JsxElementData {
name: name.into(),
attrs: Vec::new(),
raw: raw.into(),
is_closing: true,
is_self_closing: false,
};
}
let ends_self_close = {
let bytes = s.as_bytes();
if bytes.last() == Some(&b'>') {
let mut j = bytes.len() - 1;
while j > 0 && matches!(bytes[j - 1], b' ' | b'\t' | b'\n' | b'\r') {
j -= 1;
}
j > 0 && bytes[j - 1] == b'/'
} else {
false
}
};
let name = extract_tag_name(&s[1..]);
let is_self_contained = if !name.is_empty() {
let close_tag = alloc::format!("</{name}>");
s.contains(&*close_tag)
} else {
s.contains("</>")
};
let is_self_closing = ends_self_close || is_self_contained;
let attrs = parse_jsx_attrs(s, container_content_col, extra_strip_cols);
JsxElementData {
name: name.into_owned().into(),
attrs,
raw: raw.into(),
is_closing: false,
is_self_closing,
}
}
fn extract_tag_name(s: &str) -> alloc::borrow::Cow<'_, str> {
use alloc::borrow::Cow;
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() && is_jsx_name_continue(bytes, i) {
i += char_len_utf8(bytes[i]);
}
let primary_end = i;
let mut j = primary_end;
let mut saw_namespace = false;
let mut owned: Option<alloc::string::String> = None;
loop {
let save = j;
while j < bytes.len() && is_mdx_unicode_whitespace(bytes, j) {
j += char_len_utf8(bytes[j]);
}
if j >= bytes.len() {
return Cow::Borrowed(&s[..primary_end]);
}
let sep = bytes[j];
let is_member = sep == b'.';
let is_namespace = sep == b':';
if !is_member && !is_namespace {
return owned.map_or_else(|| Cow::Borrowed(&s[..primary_end]), Cow::Owned);
}
if is_namespace && saw_namespace {
return owned.map_or_else(|| Cow::Borrowed(&s[..primary_end]), Cow::Owned);
}
if is_namespace {
saw_namespace = true;
}
j += 1;
let after_sep = j;
while j < bytes.len() && is_mdx_unicode_whitespace(bytes, j) {
j += char_len_utf8(bytes[j]);
}
if j >= bytes.len() || !is_jsx_name_start(bytes, j) {
let _ = save;
return owned.map_or_else(|| Cow::Borrowed(&s[..primary_end]), Cow::Owned);
}
let name_chunk_start = j;
j += char_len_utf8(bytes[j]);
while j < bytes.len() && is_jsx_name_continue(bytes, j) {
j += char_len_utf8(bytes[j]);
}
let acc = owned.get_or_insert_with(|| s[..primary_end].into());
acc.push(sep as char);
let _ = after_sep;
acc.push_str(&s[name_chunk_start..j]);
}
}
fn extract_opening_tag(text: &str) -> &str {
let mut depth = 0i32;
let mut in_single_quote = false;
let mut in_double_quote = false;
let mut in_backtick = false;
let mut prev = '\0';
for (i, ch) in text.char_indices() {
if in_single_quote {
if ch == '\'' && prev != '\\' {
in_single_quote = false;
}
} else if in_double_quote {
if ch == '"' && prev != '\\' {
in_double_quote = false;
}
} else if in_backtick {
if ch == '`' && prev != '\\' {
in_backtick = false;
}
} else {
match ch {
'\'' => in_single_quote = true,
'"' => in_double_quote = true,
'`' => in_backtick = true,
'{' => depth += 1,
'}' => depth -= 1,
'>' if depth == 0 => return &text[..=i],
_ => {}
}
}
prev = ch;
}
text
}
fn parse_jsx_attrs<'a>(
text: &'a str,
container_content_col: usize,
extra_strip_cols: usize,
) -> Vec<JsxAttr<'a>> {
let tag = extract_opening_tag(text);
let bytes = tag.as_bytes();
let len = bytes.len();
let mut attrs = Vec::new();
let mut i = 1;
if i < len && bytes[i] == b'/' {
i += 1;
}
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
let mut saw_namespace = false;
loop {
while i < len && is_jsx_name_continue(bytes, i) {
i += char_len_utf8(bytes[i]);
}
let save = i;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len {
i = save;
break;
}
match bytes[i] {
b':' if !saw_namespace => {
saw_namespace = true;
i += 1;
}
b'.' if !saw_namespace => {
i += 1;
}
_ => {
i = save;
break;
}
}
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len || !is_jsx_name_start(bytes, i) {
break;
}
i += char_len_utf8(bytes[i]);
}
loop {
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len {
break;
}
if bytes[i] == b'>' || (bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'>') {
break;
}
if bytes[i] == b'{' {
i += 1;
let start = i;
let mut depth = 1i32;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' | b'`' => {
let q = bytes[i];
i += 1;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let value = tag[start..i.saturating_sub(1)].trim();
attrs.push(JsxAttr::Spread(value.into()));
continue;
}
let name_start = i;
while i < len {
if bytes[i] == b':' {
i += 1;
} else if is_jsx_name_continue(bytes, i) {
i += char_len_utf8(bytes[i]);
} else {
break;
}
}
if i == name_start {
i += 1;
continue;
}
let name = &tag[name_start..i];
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i < len && bytes[i] == b'=' {
i += 1;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len {
attrs.push(JsxAttr::Boolean(name.into()));
continue;
}
if bytes[i] == b'"' || bytes[i] == b'\'' {
let q = bytes[i];
i += 1;
let val_start = i;
while i < len && bytes[i] != q {
i += 1;
}
let raw_value = &tag[val_start..i];
if i < len {
i += 1;
}
let value = strip_attr_continuation_indent(raw_value);
let decoded = decode_attr_entities(value.as_ref());
attrs.push(JsxAttr::Literal(name.into(), decoded.into_owned().into()));
} else if bytes[i] == b'{' {
i += 1;
let val_start = i;
let mut depth = 1i32;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' | b'`' => {
let q = bytes[i];
i += 1;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let value = &tag[val_start..i.saturating_sub(1)];
let normalized = if value.contains('\n') || value.contains('\r') {
alloc::borrow::Cow::Owned(strip_expression_indent(
value,
container_content_col,
extra_strip_cols,
))
} else {
alloc::borrow::Cow::Borrowed(value)
};
attrs.push(JsxAttr::Expression(
name.into(),
normalized.into_owned().into(),
));
} else {
attrs.push(JsxAttr::Boolean(name.into()));
}
} else {
attrs.push(JsxAttr::Boolean(name.into()));
}
}
attrs
}
pub(crate) fn validate_jsx_expressions(
attrs: &[JsxAttr<'_>],
tag_offset: usize,
allocator: &mut Allocator,
mdx_errors: &mut Vec<(usize, alloc::string::String)>,
) {
for attr in attrs {
let body: alloc::borrow::Cow<'_, str> = match attr {
JsxAttr::Expression(_, v) => alloc::borrow::Cow::Borrowed(v.as_ref()),
JsxAttr::Spread(v) => {
let trimmed = v.as_ref().trim_start();
match trimmed.strip_prefix("...") {
Some(operand) => alloc::borrow::Cow::Borrowed(operand),
None => continue,
}
}
_ => continue,
};
let body: alloc::borrow::Cow<'_, str> = if body.contains(PHANTOM_SPACE) {
alloc::borrow::Cow::Owned(body.replace(PHANTOM_SPACE, ""))
} else {
body
};
if let Some((_off, detail)) = try_parse_expression_body(&body, allocator) {
mdx_errors.push((
tag_offset,
alloc::format!("Could not parse expression with oxc: {detail}"),
));
}
}
}