use memchr::memchr;
use crate::{
firstpass::FirstPass,
parse::{Item, ItemBody},
};
fn is_only_whitespace_to_eol(bytes: &[u8]) -> bool {
for &b in bytes {
match b {
b' ' | b'\t' => continue,
b'\n' | b'\r' => return true,
_ => return false,
}
}
true }
fn scan_to_line_end(bytes: &[u8], start: usize) -> Option<usize> {
let eol = memchr(b'\n', &bytes[start..])
.map(|i| start + i + 1)
.unwrap_or(bytes.len());
Some(eol)
}
fn scan_mdx_jsx_tag_end(bytes: &[u8]) -> Option<usize> {
let mut ix = 1;
if ix < bytes.len() && bytes[ix] == b'/' {
ix += 1;
}
if ix < bytes.len() && bytes[ix] == b'>' {
return Some(ix + 1);
}
if ix >= bytes.len() {
return None;
}
let first = bytes[ix];
if !first.is_ascii_alphabetic() && first != b'_' && first != b'$' && first < 0x80 {
return None;
}
ix += 1;
while ix < bytes.len() {
match bytes[ix] {
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'$' => ix += 1,
b':' => {
ix += 1;
if ix >= bytes.len() {
return None;
}
let ch = bytes[ix];
if !ch.is_ascii_alphabetic() && ch != b'_' && ch != b'$' && ch < 0x80 {
return None;
}
ix += 1;
}
0x80.. => ix += 1,
_ => break,
}
}
if ix < bytes.len() {
match bytes[ix] {
b'>' | b'/' | b' ' | b'\t' | b'\n' | b'\r' | b'{' => {}
_ => return None,
}
}
let mut brace_depth: usize = 0;
while ix < bytes.len() {
match bytes[ix] {
b'>' if brace_depth == 0 => {
return Some(ix + 1);
}
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'>' && brace_depth == 0 => {
return Some(ix + 2);
}
b'{' => {
brace_depth += 1;
ix += 1;
}
b'}' => {
brace_depth = brace_depth.saturating_sub(1);
ix += 1;
}
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
} }
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'`' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'`' {
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\n' | b'\r' => {
ix += 1;
if ix < bytes.len() && bytes[ix - 1] == b'\r' && bytes[ix] == b'\n' {
ix += 1;
}
}
_ => ix += 1,
}
}
None }
pub(crate) fn scan_mdx_esm(bytes: &[u8]) -> Option<usize> {
let is_import = bytes.starts_with(b"import ")
|| bytes.starts_with(b"import\t")
|| bytes.starts_with(b"import{");
let is_export = bytes.starts_with(b"export ")
|| bytes.starts_with(b"export\t")
|| bytes.starts_with(b"export{")
|| bytes.starts_with(b"export*")
|| bytes.starts_with(b"export\n")
|| bytes.starts_with(b"export\r");
if !is_import && !is_export {
return None;
}
let mut ix = 0;
loop {
let eol = memchr(b'\n', &bytes[ix..])
.map(|i| ix + i + 1)
.unwrap_or(bytes.len());
ix = eol;
if ix < bytes.len() && (bytes[ix] == b' ' || bytes[ix] == b'\t') {
continue;
}
let prev_end = if ix >= 2 && bytes[ix - 2] == b'\r' {
ix - 2
} else {
ix - 1
};
let last_significant = bytes[..prev_end]
.iter()
.rposition(|&b| b != b' ' && b != b'\t');
if let Some(pos) = last_significant {
match bytes[pos] {
b',' | b'{' | b'(' => {
if ix < bytes.len() {
continue;
}
}
_ => {}
}
}
break;
}
Some(ix)
}
pub(crate) fn scan_mdx_jsx_block(bytes: &[u8]) -> Option<usize> {
if bytes.len() < 2 || bytes[0] != b'<' {
return None;
}
let is_closing = bytes[1] == b'/';
let name_start = if is_closing { 2 } else { 1 };
if name_start >= bytes.len() {
return None;
}
if bytes[name_start] == b'>' {
let after = name_start + 1;
return if is_only_whitespace_to_eol(&bytes[after..]) {
scan_to_line_end(bytes, after)
} else {
None };
}
let first = bytes[name_start];
if !first.is_ascii_alphabetic() && first != b'_' && first != b'$' && first < 0x80 {
return None;
}
let mut pos = scan_mdx_jsx_tag_end(bytes)?;
loop {
while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
pos += 1;
}
if pos >= bytes.len() || bytes[pos] == b'\n' || bytes[pos] == b'\r' {
break; }
if bytes[pos] == b'<' {
if let Some(end) = scan_mdx_jsx_tag_end(&bytes[pos..]) {
pos += end;
continue;
}
}
if bytes[pos] == b'{' {
if let Some((_, _, len)) = scan_mdx_inline_expression(&bytes[pos..]) {
pos += len;
continue;
}
}
return None;
}
scan_to_line_end(bytes, pos)
}
pub(crate) fn scan_mdx_expression_block(bytes: &[u8]) -> Option<usize> {
if bytes.is_empty() || bytes[0] != b'{' {
return None;
}
let mut ix = 1;
let mut depth: usize = 1;
while ix < bytes.len() && depth > 0 {
match bytes[ix] {
b'{' => {
depth += 1;
ix += 1;
}
b'}' => {
depth -= 1;
ix += 1;
}
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'`' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'`' {
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
_ => ix += 1,
}
}
if depth == 0 {
if !is_only_whitespace_to_eol(&bytes[ix..]) {
return None;
}
while ix < bytes.len() && (bytes[ix] == b' ' || bytes[ix] == b'\t') {
ix += 1;
}
if ix < bytes.len() && bytes[ix] == b'\r' {
ix += 1;
}
if ix < bytes.len() && bytes[ix] == b'\n' {
ix += 1;
}
Some(ix)
} else {
None
}
}
pub(crate) fn scan_mdx_inline_expression(bytes: &[u8]) -> Option<(usize, usize, usize)> {
if bytes.is_empty() || bytes[0] != b'{' {
return None;
}
let mut ix = 1;
let mut depth: usize = 1;
while ix < bytes.len() && depth > 0 {
match bytes[ix] {
b'{' => {
depth += 1;
ix += 1;
}
b'}' => {
depth -= 1;
ix += 1;
}
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'`' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'`' {
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
_ => ix += 1,
}
}
if depth == 0 {
Some((1, ix - 1, ix))
} else {
None
}
}
pub(crate) fn scan_mdx_inline_jsx(bytes: &[u8]) -> Option<usize> {
if bytes.len() < 2 || bytes[0] != b'<' {
return None;
}
let is_closing = bytes[1] == b'/';
let name_start = if is_closing { 2 } else { 1 };
if name_start >= bytes.len() {
return None;
}
if bytes[name_start] == b'>' {
return Some(name_start + 1);
}
let first = bytes[name_start];
if !first.is_ascii_alphabetic() && first != b'_' && first != b'$' && first < 0x80 {
return None;
}
let mut ix = name_start + 1;
while ix < bytes.len() {
match bytes[ix] {
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'$' => ix += 1,
b':' => {
ix += 1;
if ix >= bytes.len() {
return None;
}
let ch = bytes[ix];
if !ch.is_ascii_alphabetic() && ch != b'_' && ch != b'$' && ch < 0x80 {
return None;
}
ix += 1;
}
0x80.. => ix += 1,
_ => break,
}
}
if ix < bytes.len() {
match bytes[ix] {
b'>' | b'/' | b' ' | b'\t' | b'\n' | b'\r' | b'{' => {}
_ => return None,
}
}
let mut brace_depth: usize = 0;
while ix < bytes.len() {
match bytes[ix] {
b'>' if brace_depth == 0 => return Some(ix + 1),
b'/' if ix + 1 < bytes.len() && bytes[ix + 1] == b'>' && brace_depth == 0 => {
return Some(ix + 2);
}
b'{' => {
brace_depth += 1;
ix += 1;
}
b'}' => {
brace_depth = brace_depth.saturating_sub(1);
ix += 1;
}
b'"' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'"' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\'' => {
ix += 1;
while ix < bytes.len() && bytes[ix] != b'\'' {
if bytes[ix] == b'\\' {
ix += 1;
}
ix += 1;
}
if ix < bytes.len() {
ix += 1;
}
}
b'\n' | b'\r' => {
ix += 1;
if ix < bytes.len() && bytes[ix - 1] == b'\r' && bytes[ix] == b'\n' {
ix += 1;
}
}
_ => ix += 1,
}
}
None
}
impl<'a, 'b> FirstPass<'a, 'b> {
pub(crate) fn parse_mdx_esm(&mut self, start_ix: usize, end_ix: usize) -> usize {
let content = &self.text[start_ix..end_ix].trim_end();
let cow_ix = self.allocs.allocate_cow((*content).into());
self.tree.append(Item {
start: start_ix,
end: end_ix,
body: ItemBody::MdxEsm(cow_ix),
});
end_ix
}
pub(crate) fn parse_mdx_jsx_flow(&mut self, start_ix: usize, end_ix: usize) -> usize {
let raw = &self.text[start_ix..end_ix].trim_end();
let jsx_data = parse_jsx_tag(raw);
let jsx_ix = self.allocs.allocate_jsx_element(jsx_data);
self.tree.append(Item {
start: start_ix,
end: end_ix,
body: ItemBody::MdxJsxFlowElement(jsx_ix),
});
end_ix
}
pub(crate) fn parse_mdx_flow_expression(&mut self, start_ix: usize, end_ix: usize) -> usize {
let raw = &self.text[start_ix..end_ix].trim_end();
let inner = &raw[1..raw.len() - 1]; let cow_ix = self.allocs.allocate_cow(inner.into());
self.tree.append(Item {
start: start_ix,
end: end_ix,
body: ItemBody::MdxFlowExpression(cow_ix),
});
end_ix
}
}
use crate::parse::{JsxAttr, JsxElementData};
pub(crate) fn parse_jsx_tag<'a>(raw: &'a str) -> JsxElementData<'a> {
let s = raw.trim();
if let Some(rest) = s.strip_prefix("</") {
let name = extract_tag_name(rest);
return JsxElementData {
name: name.into(),
attrs: Vec::new(),
raw: raw.into(),
is_closing: true,
is_self_closing: false,
};
}
let ends_self_close = s.ends_with("/>");
let name = extract_tag_name(&s[1..]);
let is_self_contained = if !name.is_empty() {
let close_tag = alloc::format!("</{name}>");
s.contains(&*close_tag)
} else {
s.contains("</>")
};
let is_self_closing = ends_self_close || is_self_contained;
let attrs = parse_jsx_attrs(s);
JsxElementData {
name: name.into(),
attrs,
raw: raw.into(),
is_closing: false,
is_self_closing,
}
}
fn extract_tag_name(s: &str) -> &str {
let end = s
.find(|c: char| c.is_whitespace() || c == '/' || c == '>' || c == '{')
.unwrap_or(s.len());
&s[..end]
}
fn extract_opening_tag(text: &str) -> &str {
let mut depth = 0i32;
let mut in_single_quote = false;
let mut in_double_quote = false;
let mut in_backtick = false;
let mut prev = '\0';
for (i, ch) in text.char_indices() {
if in_single_quote {
if ch == '\'' && prev != '\\' {
in_single_quote = false;
}
} else if in_double_quote {
if ch == '"' && prev != '\\' {
in_double_quote = false;
}
} else if in_backtick {
if ch == '`' && prev != '\\' {
in_backtick = false;
}
} else {
match ch {
'\'' => in_single_quote = true,
'"' => in_double_quote = true,
'`' => in_backtick = true,
'{' => depth += 1,
'}' => depth -= 1,
'>' if depth == 0 => return &text[..=i],
_ => {}
}
}
prev = ch;
}
text
}
fn parse_jsx_attrs<'a>(text: &'a str) -> Vec<JsxAttr<'a>> {
let tag = extract_opening_tag(text);
let bytes = tag.as_bytes();
let len = bytes.len();
let mut attrs = Vec::new();
let mut i = 1;
if i < len && bytes[i] == b'/' {
i += 1;
}
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
while i < len
&& (bytes[i].is_ascii_alphanumeric() || matches!(bytes[i], b'.' | b'-' | b':' | b'_'))
{
i += 1;
}
loop {
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len {
break;
}
if bytes[i] == b'>' || (bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'>') {
break;
}
if bytes[i] == b'{' {
i += 1;
let start = i;
let mut depth = 1i32;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' | b'`' => {
let q = bytes[i];
i += 1;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let value = tag[start..i.saturating_sub(1)].trim();
attrs.push(JsxAttr::Spread(value.into()));
continue;
}
let name_start = i;
while i < len
&& (bytes[i].is_ascii_alphanumeric() || matches!(bytes[i], b'-' | b':' | b'_'))
{
i += 1;
}
if i == name_start {
i += 1;
continue;
}
let name = &tag[name_start..i];
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i < len && bytes[i] == b'=' {
i += 1;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= len {
attrs.push(JsxAttr::Boolean(name.into()));
continue;
}
if bytes[i] == b'"' || bytes[i] == b'\'' {
let q = bytes[i];
i += 1;
let val_start = i;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
let value = &tag[val_start..i];
if i < len {
i += 1;
}
attrs.push(JsxAttr::Literal(name.into(), value.into()));
} else if bytes[i] == b'{' {
i += 1;
let val_start = i;
let mut depth = 1i32;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' | b'`' => {
let q = bytes[i];
i += 1;
while i < len && bytes[i] != q {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let value = &tag[val_start..i.saturating_sub(1)];
attrs.push(JsxAttr::Expression(name.into(), value.into()));
} else {
attrs.push(JsxAttr::Boolean(name.into()));
}
} else {
attrs.push(JsxAttr::Boolean(name.into()));
}
}
attrs
}