use memchr::memchr;
use oxc_allocator::Allocator;
use oxc_parser::{ParseOptions, Parser};
use oxc_span::SourceType;
use crate::{
firstpass::FirstPass,
parse::{Item, ItemBody},
strings::CowStr,
};
fn dedent_expression_continuation(
s: &str,
container_content_col: usize,
) -> alloc::borrow::Cow<'_, str> {
if !s.contains('\n') && !s.contains('\r') {
return alloc::borrow::Cow::Borrowed(s);
}
const INDENT: usize = 2;
const TAB_SIZE: usize = 4;
let base_col = if container_content_col == 0 {
1
} else {
container_content_col
};
let bytes = s.as_bytes();
let mut out = alloc::string::String::with_capacity(s.len());
let mut i = 0;
while i < bytes.len() && bytes[i] != b'\n' && bytes[i] != b'\r' {
i += 1;
}
out.push_str(&s[..i]);
while i < bytes.len() {
let line_end_start = i;
if bytes[i] == b'\r' {
i += 1;
if i < bytes.len() && bytes[i] == b'\n' {
i += 1;
}
} else if bytes[i] == b'\n' {
i += 1;
} else {
break;
}
out.push_str(&s[line_end_start..i]);
let mut stripped = 0usize;
let mut column = base_col - 1;
while i < bytes.len() && stripped < INDENT {
let b = bytes[i];
if b == b' ' {
stripped += 1;
column += 1;
i += 1;
} else if b == b'\t' {
let next_col = (column / TAB_SIZE + 1) * TAB_SIZE;
let tab_width = next_col - column;
let to_strip = (INDENT - stripped).min(tab_width);
stripped += to_strip;
for _ in 0..(tab_width - to_strip) {
out.push(' ');
}
column = next_col;
i += 1;
} else {
break;
}
}
let rest_start = i;
while i < bytes.len() && bytes[i] != b'\n' && bytes[i] != b'\r' {
i += 1;
}
out.push_str(&s[rest_start..i]);
}
alloc::borrow::Cow::Owned(out)
}
fn strip_expression_indent(s: &str, container_content_col: usize) -> alloc::string::String {
const INDENT_SIZE: usize = 2;
const TAB_WIDTH: usize = 4;
let base_col = if container_content_col == 0 {
1
} else {
container_content_col
};
let strip_cols = INDENT_SIZE;
let mut result = alloc::string::String::with_capacity(s.len());
let mut at_line_start = false;
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
if c == b'\n' {
result.push('\n');
i += 1;
at_line_start = true;
continue;
}
if c == b'\r' {
result.push('\r');
i += 1;
if i < bytes.len() && bytes[i] == b'\n' {
result.push('\n');
i += 1;
}
at_line_start = true;
continue;
}
if !at_line_start {
let ch_len = char_len_utf8(c);
result.push_str(&s[i..i + ch_len]);
i += ch_len;
continue;
}
let mut cols_consumed = 0usize;
let mut col = base_col;
while i < bytes.len() && cols_consumed < strip_cols {
match bytes[i] {
b' ' => {
cols_consumed += 1;
col += 1;
i += 1;
}
b'\t' => {
let tab_cols = TAB_WIDTH - ((col - 1) % TAB_WIDTH);
let want = strip_cols - cols_consumed;
if want >= tab_cols {
cols_consumed += tab_cols;
col += tab_cols;
i += 1;
} else {
let keep_cols = tab_cols - want;
for _ in 0..keep_cols {
result.push(' ');
}
i += 1;
break;
}
}
_ => break,
}
}
at_line_start = false;
}
result
}
fn strip_attr_continuation_indent(s: &str) -> alloc::borrow::Cow<'_, str> {
if !s.contains('\n') && !s.contains('\r') {
return alloc::borrow::Cow::Borrowed(s);
}
let mut result = alloc::string::String::with_capacity(s.len());
let mut at_line_start = false;
for c in s.chars() {
if c == '\n' || c == '\r' {
result.push(c);
at_line_start = true;
} else if at_line_start && (c == ' ' || c == '\t') {
continue;
} else {
at_line_start = false;
result.push(c);
}
}
alloc::borrow::Cow::Owned(result)
}
fn decode_attr_entities(s: &str) -> alloc::borrow::Cow<'_, str> {
if !s.contains('&') {
return alloc::borrow::Cow::Borrowed(s);
}
let bytes = s.as_bytes();
let mut out = alloc::string::String::with_capacity(s.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'&' {
let (consumed, replacement) = crate::scanners::scan_entity(&bytes[i..]);
if consumed > 0 {
if let Some(rep) = replacement {
out.push_str(&rep);
}
i += consumed;
continue;
}
}
let ch_len = char_len_utf8(bytes[i]);
out.push_str(&s[i..i + ch_len]);
i += ch_len;
}
alloc::borrow::Cow::Owned(out)
}
fn is_mdx_unicode_whitespace(s: &[u8], ix: usize) -> bool {
if s[ix].is_ascii_whitespace() {
return true;
}
let rest = &s[ix..];
let Ok(text) = core::str::from_utf8(rest) else {
return false;
};
let c = text.chars().next().unwrap();
matches!(
c,
'\u{00A0}' | '\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' | '\u{FEFF}'
)
}
fn char_len_utf8(b: u8) -> usize {
match b {
0x00..=0x7F => 1,
0xC0..=0xDF => 2,
0xE0..=0xEF => 3,
0xF0..=0xFF => 4,
_ => 1,
}
}
fn decode_utf8_char(s: &[u8], ix: usize) -> Option<char> {
core::str::from_utf8(&s[ix..]).ok()?.chars().next()
}
fn is_jsx_name_start(s: &[u8], ix: usize) -> bool {
let b = s[ix];
if b < 0x80 {
return b.is_ascii_alphabetic() || b == b'_' || b == b'$';
}
decode_utf8_char(s, ix).is_some_and(unicode_id_start::is_id_start)
}
fn is_jsx_name_continue(s: &[u8], ix: usize) -> bool {
let b = s[ix];
if b < 0x80 {
return b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'$');
}
decode_utf8_char(s, ix).is_some_and(unicode_id_start::is_id_continue)
}
pub(crate) enum EsmParseResult {
Complete,
Incomplete,
Error,
}
pub(crate) fn try_parse_esm(value: &str, allocator: &mut Allocator) -> EsmParseResult {
allocator.reset();
let source_type = SourceType::mjs().with_jsx(true);
let source = allocator.alloc_str(value);
let ret = Parser::new(allocator, source, source_type)
.with_options(ParseOptions::default())
.parse();
if ret.errors.is_empty() {
return EsmParseResult::Complete;
}
let error = &ret.errors[0];
let error_offset = error
.labels
.as_ref()
.and_then(|labels| labels.first().map(|l| l.offset()))
.unwrap_or(value.len());
if error_offset >= value.len() {
EsmParseResult::Incomplete
} else {
EsmParseResult::Error
}
}
const REGEX_KEYWORDS: &[&[u8]] = &[
b"await",
b"case",
b"delete",
b"in",
b"instanceof",
b"new",
b"of",
b"return",
b"throw",
b"typeof",
b"void",
b"yield",
];
fn slash_is_regex(bytes: &[u8], pos: usize) -> bool {
let mut i = pos;
while i > 0 {
i -= 1;
match bytes[i] {
b' ' | b'\t' | b'\n' | b'\r' => continue,
b')' | b']' => return false,
b'"' | b'\'' | b'`' => return false,
b'+' if i > 0 && bytes[i - 1] == b'+' => return false,
b'-' if i > 0 && bytes[i - 1] == b'-' => return false,
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'$' => {
let end = i + 1;
while i > 0
&& (bytes[i - 1].is_ascii_alphanumeric()
|| bytes[i - 1] == b'_'
|| bytes[i - 1] == b'$')
{
i -= 1;
}
let word = &bytes[i..end];
let is_keyword_boundary = i == 0
|| (!bytes[i - 1].is_ascii_alphanumeric()
&& bytes[i - 1] != b'_'
&& bytes[i - 1] != b'$');
if is_keyword_boundary && REGEX_KEYWORDS.contains(&word) {
return true;
}
return false;
}
_ => return true,
}
}
true
}
fn scan_regex(bytes: &[u8], start: usize) -> usize {
let mut ix = start + 1;
while ix < bytes.len() {
match bytes[ix] {
b'/' => {
ix += 1;
while ix < bytes.len() && bytes[ix].is_ascii_alphanumeric() {
ix += 1;
}
return ix;
}
b'\\' => ix += 2,
b'[' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b']' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\n' | b'\r' => return ix,
_ => ix += 1,
}
}
ix
}
fn is_blank_line_next(bytes: &[u8], ix: usize) -> bool {
let mut j = ix + 1;
if bytes[ix] == b'\r' && j < bytes.len() && bytes[j] == b'\n' {
j += 1;
}
let mut k = j;
while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
k += 1;
}
k >= bytes.len() || bytes[k] == b'\n' || bytes[k] == b'\r'
}
pub(crate) type ContainerLineCheck<'a> = &'a dyn Fn(&[u8]) -> Option<usize>;
fn check_container_after_newline(
bytes: &[u8],
ix: &mut usize,
container_check: &Option<ContainerLineCheck<'_>>,
) -> Option<()> {
if let Some(check) = container_check {
if *ix < bytes.len() {
if let Some(skip) = check(&bytes[*ix..]) {
*ix += skip;
} else {
return None;
}
}
}
Some(())
}
fn scan_mdx_expression_end(bytes: &[u8], inline: bool) -> Option<usize> {
scan_mdx_expression_end_inner(bytes, inline, None)
}
fn scan_mdx_expression_end_inner(
bytes: &[u8],
inline: bool,
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
if bytes.is_empty() || bytes[0] != b'{' {
return None;
}
let mut ix = 1;
let mut depth: usize = 1;
while ix < bytes.len() && depth > 0 {
match bytes[ix] {
b'\n' => {
if inline && is_blank_line_next(bytes, ix) {
return None;
}
ix += 1;
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
b'\r' => {
if inline && is_blank_line_next(bytes, ix) {
return None;
}
ix += 1;
if ix < bytes.len() && bytes[ix] == b'\n' {
ix += 1;
}
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
b'{' => {
depth += 1;
ix += 1;
}
b'}' => {
depth -= 1;
if depth == 0 {
return Some(ix + 1);
}
ix += 1;
}
b'"' | b'\'' => {
if bytes[ix] == b'\''
&& ix > 0
&& (bytes[ix - 1].is_ascii_alphanumeric() || bytes[ix - 1] == b'_')
{
ix += 1;
continue;
}
let quote = bytes[ix];
ix += 1;
while ix < bytes.len()
&& bytes[ix] != quote
&& bytes[ix] != b'\n'
&& bytes[ix] != b'\r'
{
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() && bytes[ix] == quote {
ix += 1;
}
}
b'`' => {
ix += 1;
let mut template_depth: usize = 0;
while ix < bytes.len() {
match bytes[ix] {
b'`' if template_depth == 0 => {
ix += 1;
break;
}
b'\\' => {
ix += 2;
continue;
}
b'$' if ix + 1 < bytes.len() && bytes[ix + 1] == b'{' => {
template_depth += 1;
ix += 2;
continue;
}
b'{' if template_depth > 0 => template_depth += 1,
b'}' if template_depth > 0 => template_depth -= 1,
b'\n' | b'\r' if inline && is_blank_line_next(bytes, ix) => {
return None;
}
_ => {}
}
ix += 1;
}
}
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'/' => {
ix += 2;
while ix < bytes.len() && bytes[ix] != b'\n' {
ix += 1;
}
}
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'*' => {
ix += 2;
while ix + 1 < bytes.len() {
if bytes[ix] == b'*' && bytes[ix + 1] == b'/' {
ix += 2;
break;
}
if inline
&& (bytes[ix] == b'\n' || bytes[ix] == b'\r')
&& is_blank_line_next(bytes, ix)
{
return None;
}
ix += 1;
}
}
b'/' if slash_is_regex(bytes, ix) => {
ix = scan_regex(bytes, ix);
}
b'<' if ix + 1 < bytes.len()
&& (bytes[ix + 1].is_ascii_alphabetic()
|| bytes[ix + 1] == b'_'
|| bytes[ix + 1] == b'$'
|| bytes[ix + 1] == b'/'
|| bytes[ix + 1] == b'>') =>
{
if let Some(end) = scan_mdx_jsx_tag_end(&bytes[ix..]) {
ix += end;
} else {
ix += 1;
}
}
_ => ix += 1,
}
}
None
}
fn scan_to_line_end(bytes: &[u8], start: usize) -> Option<usize> {
let eol = memchr(b'\n', &bytes[start..])
.map(|i| start + i + 1)
.unwrap_or(bytes.len());
Some(eol)
}
fn scan_mdx_jsx_tag_end(bytes: &[u8]) -> Option<usize> {
scan_mdx_jsx_tag_end_inner(bytes, None)
}
fn scan_mdx_jsx_tag_end_inner(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
let mut ix = 1;
let is_closing = ix < bytes.len() && bytes[ix] == b'/';
if is_closing {
ix += 1;
while ix < bytes.len() && matches!(bytes[ix], b' ' | b'\t') {
ix += 1;
}
}
if ix < bytes.len() && bytes[ix] == b'>' {
return Some(ix + 1);
}
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
while ix < bytes.len() {
if bytes[ix] == b':' {
ix += 1;
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
} else if is_jsx_name_continue(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
} else {
break;
}
}
if ix < bytes.len() {
match bytes[ix] {
b'>' | b'/' | b'{' => {}
_ if is_mdx_unicode_whitespace(bytes, ix) => {}
_ => return None,
}
}
while ix < bytes.len() {
match bytes[ix] {
b'>' => return Some(ix + 1),
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'>' => {
return Some(ix + 2);
}
b'{' => {
let expr_len = scan_mdx_expression_end(&bytes[ix..], false)?;
ix += expr_len;
}
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\n' | b'\r' => {
ix += 1;
if ix < bytes.len() && bytes[ix - 1] == b'\r' && bytes[ix] == b'\n' {
ix += 1;
}
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
_ => ix += 1,
}
}
None }
pub(crate) fn scan_mdx_esm(bytes: &[u8]) -> Option<usize> {
let is_import = bytes.starts_with(b"import ")
|| bytes.starts_with(b"import\t")
|| bytes.starts_with(b"import{");
let is_export = bytes.starts_with(b"export ")
|| bytes.starts_with(b"export\t")
|| bytes.starts_with(b"export{")
|| bytes.starts_with(b"export*")
|| bytes.starts_with(b"export\n")
|| bytes.starts_with(b"export\r");
if !is_import && !is_export {
return None;
}
let mut ix = 0;
loop {
let eol = memchr(b'\n', &bytes[ix..])
.map(|i| ix + i + 1)
.unwrap_or(bytes.len());
ix = eol;
if ix >= bytes.len() || bytes[ix] == b'\n' || bytes[ix] == b'\r' {
break;
}
}
Some(ix)
}
pub(crate) fn scan_mdx_jsx_block(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
if bytes.len() < 2 || bytes[0] != b'<' {
return None;
}
let is_closing = bytes[1] == b'/';
let mut name_start = if is_closing { 2 } else { 1 };
if is_closing {
while name_start < bytes.len() && matches!(bytes[name_start], b' ' | b'\t') {
name_start += 1;
}
}
if name_start >= bytes.len() {
return None;
}
let mut pos = if bytes[name_start] == b'>' {
name_start + 1
} else {
if !is_jsx_name_start(bytes, name_start) {
return None;
}
scan_mdx_jsx_tag_end_inner(bytes, container_check)?
};
let mut last_was_jsx = true;
loop {
while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
pos += 1;
}
if pos >= bytes.len() || bytes[pos] == b'\n' || bytes[pos] == b'\r' {
break;
}
if bytes[pos] == b'<' {
if let Some(end) = scan_mdx_jsx_tag_end_inner(&bytes[pos..], container_check) {
pos += end;
last_was_jsx = true;
continue;
}
}
if bytes[pos] == b'{' {
if let Some(len) = scan_mdx_expression_end(&bytes[pos..], true) {
if !last_was_jsx {
return None;
}
pos += len;
last_was_jsx = false;
continue;
}
}
return None;
}
scan_to_line_end(bytes, pos)
}
pub(crate) fn scan_mdx_expression_block(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
let mut ix = scan_mdx_expression_end_inner(bytes, false, container_check)?;
let mut last_was_jsx = false;
loop {
while ix < bytes.len() && (bytes[ix] == b' ' || bytes[ix] == b'\t') {
ix += 1;
}
if ix >= bytes.len() || bytes[ix] == b'\n' || bytes[ix] == b'\r' {
break;
}
if bytes[ix] == b'<' {
if let Some(end) = scan_mdx_jsx_tag_end_inner(&bytes[ix..], container_check) {
ix += end;
last_was_jsx = true;
continue;
}
if ix + 1 < bytes.len() && bytes[ix + 1] == b'>' {
ix += 2;
last_was_jsx = true;
continue;
}
if ix + 2 < bytes.len() && bytes[ix + 1] == b'/' && bytes[ix + 2] == b'>' {
ix += 3;
last_was_jsx = true;
continue;
}
}
if bytes[ix] == b'{' {
if let Some(len) = scan_mdx_expression_end(&bytes[ix..], true) {
if !last_was_jsx {
return None;
}
ix += len;
last_was_jsx = false;
continue;
}
}
return None;
}
scan_to_line_end(bytes, ix)
}
pub(crate) fn scan_mdx_inline_expression(bytes: &[u8]) -> Option<(usize, usize, usize)> {
let total = scan_mdx_expression_end(bytes, true)?;
Some((1, total - 1, total))
}
pub(crate) fn scan_mdx_inline_expression_in_container(
bytes: &[u8],
container_check: ContainerLineCheck<'_>,
) -> Option<(usize, usize, usize)> {
let total = scan_mdx_expression_end_inner(bytes, true, Some(container_check))?;
Some((1, total - 1, total))
}
pub(crate) fn scan_mdx_inline_jsx(bytes: &[u8]) -> Option<usize> {
scan_mdx_inline_jsx_inner(bytes, None)
}
fn scan_mdx_inline_jsx_inner(
bytes: &[u8],
container_check: Option<ContainerLineCheck<'_>>,
) -> Option<usize> {
if bytes.len() < 2 || bytes[0] != b'<' {
return None;
}
let is_closing = bytes[1] == b'/';
let mut name_start = if is_closing { 2 } else { 1 };
if is_closing {
while name_start < bytes.len() && matches!(bytes[name_start], b' ' | b'\t') {
name_start += 1;
}
}
if name_start >= bytes.len() {
return None;
}
if bytes[name_start] == b'>' {
return Some(name_start + 1);
}
if !is_jsx_name_start(bytes, name_start) {
return None;
}
let mut ix = name_start + char_len_utf8(bytes[name_start]);
while ix < bytes.len() {
if bytes[ix] == b':' {
ix += 1;
if ix >= bytes.len() || !is_jsx_name_start(bytes, ix) {
return None;
}
ix += char_len_utf8(bytes[ix]);
} else if is_jsx_name_continue(bytes, ix) {
ix += char_len_utf8(bytes[ix]);
} else {
break;
}
}
if ix < bytes.len() {
match bytes[ix] {
b'>' | b'/' | b'{' => {}
_ if is_mdx_unicode_whitespace(bytes, ix) => {}
_ => return None,
}
}
let mut brace_depth: usize = 0;
while ix < bytes.len() {
match bytes[ix] {
b'>' if brace_depth == 0 => return Some(ix + 1),
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'>' && brace_depth == 0 => {
return Some(ix + 2);
}
b'{' => {
brace_depth += 1;
ix += 1;
}
b'}' => {
brace_depth = brace_depth.saturating_sub(1);
ix += 1;
}
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\n' | b'\r' => {
ix += 1;
if ix < bytes.len() && bytes[ix - 1] == b'\r' && bytes[ix] == b'\n' {
ix += 1;
}
check_container_after_newline(bytes, &mut ix, &container_check)?;
}
_ => ix += 1,
}
}
None
}
impl<'a, 'b> FirstPass<'a, 'b> {
pub(crate) fn parse_mdx_esm(&mut self, start_ix: usize, end_ix: usize) -> usize {
let content = self.text[start_ix..end_ix].trim_end_matches(['\n', '\r']);
let cow_ix = self.allocs.allocate_cow(content.into());
self.tree.append(Item {
start: start_ix,
end: end_ix,
body: ItemBody::MdxEsm(cow_ix),
});
end_ix
}
pub(crate) fn parse_mdx_jsx_flow(&mut self, start_ix: usize, end_ix: usize) -> usize {
let raw = {
let stripped = self.strip_container_prefixes(start_ix, end_ix);
stripped.trim_end().to_string()
};
let orig_bytes = self.text.as_bytes();
let stripped_bytes = raw.as_bytes();
let mut map: Vec<(usize, usize, usize)> = Vec::new();
map.push((0, start_ix, 0));
{
let mut s_pos = 0usize;
let mut o_pos = start_ix;
while s_pos < stripped_bytes.len() && o_pos < end_ix {
let b = stripped_bytes[s_pos];
if b == b'\n' || b == b'\r' {
s_pos += 1;
o_pos += 1;
if b == b'\r'
&& s_pos < stripped_bytes.len()
&& stripped_bytes[s_pos] == b'\n'
&& o_pos < end_ix
&& orig_bytes[o_pos] == b'\n'
{
s_pos += 1;
o_pos += 1;
}
let mut ls = crate::scanners::LineStart::new(&orig_bytes[o_pos..end_ix]);
let _ = crate::parse::scan_containers(&self.tree, &mut ls, self.options);
o_pos += ls.bytes_scanned();
let phantom = ls.remaining_space();
map.push((s_pos, o_pos, phantom));
s_pos += phantom;
} else {
s_pos += 1;
o_pos += 1;
}
}
}
let stripped_to_orig = |s_pos: usize| -> usize {
let idx = match map.binary_search_by(|probe| probe.0.cmp(&s_pos)) {
Ok(i) => i,
Err(i) => i.saturating_sub(1),
};
let (base_s, base_o, phantom) = map[idx];
let offset = s_pos - base_s;
if offset <= phantom {
base_o
} else {
base_o + (offset - phantom)
}
};
let mut pos = 0;
while pos < raw.len() {
while pos < raw.len() && raw.as_bytes()[pos] == b' ' {
pos += 1;
}
if pos >= raw.len() {
break;
}
let remaining = &raw.as_bytes()[pos..];
if remaining[0] == b'<' {
let tag_end = scan_mdx_jsx_tag_end(remaining).unwrap_or(raw.len() - pos);
let tag_raw = &raw[pos..pos + tag_end];
let container_content_col = self.container_content_col();
let jsx_data =
parse_jsx_tag_with_column(tag_raw, container_content_col).into_static();
let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
self.tree.append(Item {
start: stripped_to_orig(pos),
end: stripped_to_orig(pos + tag_end),
body: ItemBody::MdxJsxFlowElement(jsx_ix),
});
pos += tag_end;
} else if remaining[0] == b'{' {
let expr_end = scan_mdx_expression_end(remaining, true).unwrap_or(raw.len() - pos);
let inner_raw = &raw[pos + 1..pos + expr_end - 1];
let inner: CowStr<'static> = CowStr::from(
dedent_expression_continuation(inner_raw, self.container_content_col())
.into_owned(),
);
let cow_ix = self.allocs.allocate_cow(inner);
self.tree.append(Item {
start: stripped_to_orig(pos),
end: stripped_to_orig(pos + expr_end),
body: ItemBody::MdxFlowExpression(cow_ix),
});
pos += expr_end;
} else {
break;
}
}
end_ix
}
fn container_content_col(&self) -> usize {
use crate::parse::ItemBody;
let mut col = 1usize;
for &node_ix in self.tree.walk_spine() {
match self.tree[node_ix].item.body {
ItemBody::BlockQuote(..) => col += 2, ItemBody::ListItem(indent, _) | ItemBody::DefinitionListDefinition(indent) => {
col += indent
}
ItemBody::FootnoteDefinition(..)
if self.options.contains(crate::Options::ENABLE_FOOTNOTES) =>
{
col += 4
}
_ => {}
}
}
col
}
fn strip_container_prefixes(
&self,
start_ix: usize,
end_ix: usize,
) -> alloc::borrow::Cow<'_, str> {
if self.tree.spine_len() == 0 {
return alloc::borrow::Cow::Borrowed(&self.text[start_ix..end_ix]);
}
let bytes = self.text.as_bytes();
let mut result = alloc::string::String::new();
let mut pos = start_ix;
let line_end = memchr::memchr2(b'\n', b'\r', &bytes[pos..end_ix])
.map(|i| pos + i)
.unwrap_or(end_ix);
result.push_str(&self.text[pos..line_end]);
pos = line_end;
while pos < end_ix {
if bytes[pos] == b'\r' {
result.push('\r');
pos += 1;
}
if pos < end_ix && bytes[pos] == b'\n' {
result.push('\n');
pos += 1;
}
if pos >= end_ix {
break;
}
let mut ls = LineStart::new(&bytes[pos..]);
let _ = scan_containers(&self.tree, &mut ls, self.options);
pos += ls.bytes_scanned();
for _ in 0..ls.remaining_space() {
result.push(' ');
}
let line_end = memchr::memchr2(b'\n', b'\r', &bytes[pos..end_ix])
.map(|i| pos + i)
.unwrap_or(end_ix);
result.push_str(&self.text[pos..line_end]);
pos = line_end;
}
alloc::borrow::Cow::Owned(result)
}
}
use crate::{
parse::{scan_containers, JsxAttr, JsxElementData},
scanners::LineStart,
};
impl<'a, 'b> FirstPass<'a, 'b> {
pub(crate) fn make_container_line_check(&self) -> impl Fn(&[u8]) -> Option<usize> + '_ {
move |line_bytes: &[u8]| {
let mut ls = LineStart::new(line_bytes);
let matched = scan_containers(&self.tree, &mut ls, self.options);
if matched == self.tree.spine_len() {
Some(ls.bytes_scanned())
} else {
None
}
}
}
pub(crate) fn scan_mdx_flow_in_container(
&self,
ix: usize,
scanner: impl Fn(&[u8], Option<ContainerLineCheck<'_>>) -> Option<usize>,
) -> Option<usize> {
self.scan_mdx_flow_in_container_bytes(&self.text.as_bytes()[ix..], scanner)
}
pub(crate) fn scan_mdx_flow_in_container_bytes(
&self,
bytes: &[u8],
scanner: impl Fn(&[u8], Option<ContainerLineCheck<'_>>) -> Option<usize>,
) -> Option<usize> {
if self.tree.spine_len() == 0 {
return scanner(bytes, None);
}
let check = self.make_container_line_check();
scanner(bytes, Some(&check))
}
}
pub(crate) fn parse_jsx_tag<'a>(raw: &'a str) -> JsxElementData<'a> {
parse_jsx_tag_with_column(raw, 1)
}
pub(crate) fn column_at(bytes: &[u8], pos: usize) -> usize {
const TAB_WIDTH: usize = 4;
let mut line_start = pos;
while line_start > 0 && bytes[line_start - 1] != b'\n' && bytes[line_start - 1] != b'\r' {
line_start -= 1;
}
let mut col: usize = 1;
let mut i = line_start;
while i < pos {
if bytes[i] == b'\t' {
col += TAB_WIDTH - ((col - 1) % TAB_WIDTH);
} else {
col += 1;
}
i += 1;
}
col
}
pub(crate) fn parse_jsx_tag_with_column<'a>(
raw: &'a str,
container_content_col: usize,
) -> JsxElementData<'a> {
let s = raw.trim();
if let Some(rest) = s.strip_prefix("</") {
let name = extract_tag_name(rest.trim_start());
return JsxElementData {
name: name.into(),
attrs: Vec::new(),
raw: raw.into(),
is_closing: true,
is_self_closing: false,
};
}
let ends_self_close = s.ends_with("/>");
let name = extract_tag_name(&s[1..]);
let is_self_contained = if !name.is_empty() {
let close_tag = alloc::format!("</{name}>");
s.contains(&*close_tag)
} else {
s.contains("</>")
};
let is_self_closing = ends_self_close || is_self_contained;
let attrs = parse_jsx_attrs(s, container_content_col);
JsxElementData {
name: name.into(),
attrs,
raw: raw.into(),
is_closing: false,
is_self_closing,
}
}
fn extract_tag_name(s: &str) -> &str {
let end = s
.find(|c: char| c.is_whitespace() || c == '/' || c == '>' || c == '{')
.unwrap_or(s.len());
&s[..end]
}
fn extract_opening_tag(text: &str) -> &str {
let mut depth = 0i32;
let mut in_single_quote = false;
let mut in_double_quote = false;
let mut in_backtick = false;
let mut prev = '\0';
for (i, ch) in text.char_indices() {
if in_single_quote {
if ch == '\'' && prev != '\\' {
in_single_quote = false;
}
} else if in_double_quote {
if ch == '"' && prev != '\\' {
in_double_quote = false;
}
} else if in_backtick {
if ch == '`' && prev != '\\' {
in_backtick = false;
}
} else {
match ch {
'\'' => in_single_quote = true,
'"' => in_double_quote = true,
'`' => in_backtick = true,
'{' => depth += 1,
'}' => depth -= 1,
'>' if depth == 0 => return &text[..=i],
_ => {}
}
}
prev = ch;
}
text
}
fn parse_jsx_attrs<'a>(text: &'a str, container_content_col: usize) -> Vec<JsxAttr<'a>> {
let tag = extract_opening_tag(text);
let bytes = tag.as_bytes();
let len = bytes.len();
let mut attrs = Vec::new();
let mut i = 1;
if i < len && bytes[i] == b'/' {
i += 1;
}
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
while i < len
&& (bytes[i].is_ascii_alphanumeric() || matches!(bytes[i], b'.' | b'-' | b':' | b'_'))
{
i += 1;
}
loop {
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len {
break;
}
if bytes[i] == b'>' || (bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'>') {
break;
}
if bytes[i] == b'{' {
i += 1;
let start = i;
let mut depth = 1i32;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' | b'`' => {
let q = bytes[i];
i += 1;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let value = tag[start..i.saturating_sub(1)].trim();
attrs.push(JsxAttr::Spread(value.into()));
continue;
}
let name_start = i;
while i < len
&& (bytes[i].is_ascii_alphanumeric() || matches!(bytes[i], b'-' | b':' | b'_'))
{
i += 1;
}
if i == name_start {
i += 1;
continue;
}
let name = &tag[name_start..i];
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i < len && bytes[i] == b'=' {
i += 1;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len {
attrs.push(JsxAttr::Boolean(name.into()));
continue;
}
if bytes[i] == b'"' || bytes[i] == b'\'' {
let q = bytes[i];
i += 1;
let val_start = i;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
let raw_value = &tag[val_start..i];
if i < len {
i += 1;
}
let value = strip_attr_continuation_indent(raw_value);
let decoded = decode_attr_entities(value.as_ref());
attrs.push(JsxAttr::Literal(name.into(), decoded.into_owned().into()));
} else if bytes[i] == b'{' {
i += 1;
let val_start = i;
let mut depth = 1i32;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' | b'`' => {
let q = bytes[i];
i += 1;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let value = &tag[val_start..i.saturating_sub(1)];
let normalized = if value.contains('\n') || value.contains('\r') {
alloc::borrow::Cow::Owned(strip_expression_indent(value, container_content_col))
} else {
alloc::borrow::Cow::Borrowed(value)
};
attrs.push(JsxAttr::Expression(
name.into(),
normalized.into_owned().into(),
));
} else {
attrs.push(JsxAttr::Boolean(name.into()));
}
} else {
attrs.push(JsxAttr::Boolean(name.into()));
}
}
attrs
}