use memchr::{memchr, memchr_iter, memmem};
use std::borrow::Cow;
use vize_carton::{cstr, FxHashMap, String};
const TAG_TEMPLATE: &[u8] = b"template";
const TAG_SCRIPT: &[u8] = b"script";
const TAG_STYLE: &[u8] = b"style";
type BlockAttrs<'a> = FxHashMap<Cow<'a, str>, Cow<'a, str>>;
type BlockParseOutput<'a> = (
&'a [u8], BlockAttrs<'a>, Cow<'a, str>, usize, usize, usize, usize, usize, );
type BlockParseError = (&'static str, String);
type BlockParseResult<'a> = Result<Option<BlockParseOutput<'a>>, BlockParseError>;
struct BlockEndSearch<'a> {
bytes: &'a [u8],
source: &'a str,
tag_name: &'a [u8],
pos: usize,
content_start: usize,
start_line: usize,
initial_last_newline: usize,
attrs: BlockAttrs<'a>,
}
fn build_malformed_error(tag_name: &[u8], reason: &str) -> BlockParseError {
let tag_str = std::str::from_utf8(tag_name).unwrap_or("unknown");
(
"MALFORMED_BLOCK",
cstr!("Malformed <{tag_str}> block: {reason}."),
)
}
#[inline(always)]
pub(super) fn tag_name_eq(name: &[u8], expected: &[u8]) -> bool {
name.len() == expected.len() && name.eq_ignore_ascii_case(expected)
}
#[inline(always)]
fn starts_with_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.len() >= needle.len() && haystack[..needle.len()].eq_ignore_ascii_case(needle)
}
#[inline(always)]
fn is_tag_name_char_fast(b: u8) -> bool {
matches!(b, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_')
}
#[inline(always)]
fn is_whitespace_fast(b: u8) -> bool {
matches!(b, b' ' | b'\t' | b'\n' | b'\r')
}
#[inline]
fn advance_line(bytes: &[u8], base: usize, line: &mut usize, last_newline: &mut usize) {
for offset in memchr_iter(b'\n', bytes) {
*line += 1;
*last_newline = base + offset;
}
}
fn can_start_regex_literal(prev_significant_char: u8) -> bool {
matches!(
prev_significant_char,
b'=' | b'('
| b'['
| b','
| b':'
| b'{'
| b';'
| b'\n'
| b'?'
| b'&'
| b'|'
| b'+'
| b'-'
| b'*'
| b'!'
| b'>'
| b'<'
| b'%'
| b'^'
)
}
fn skip_regex_literal(
bytes: &[u8],
mut pos: usize,
len: usize,
line: &mut usize,
last_newline: &mut usize,
) -> Option<usize> {
debug_assert_eq!(bytes[pos], b'/');
pos += 1;
let mut in_character_class = false;
while pos < len {
let c = bytes[pos];
if c == b'\n' {
return None;
}
if c == b'\\' {
if pos + 1 < len && bytes[pos + 1] == b'\n' {
*line += 1;
*last_newline = pos + 1;
}
pos = (pos + 2).min(len);
continue;
}
if in_character_class {
if c == b']' {
in_character_class = false;
}
pos += 1;
continue;
}
match c {
b'[' => {
in_character_class = true;
pos += 1;
}
b'/' => {
pos += 1;
while pos < len && (bytes[pos].is_ascii_alphanumeric() || bytes[pos] == b'_') {
pos += 1;
}
return Some(pos);
}
_ => pos += 1,
}
}
None
}
#[inline]
fn find_closing_tag_end(bytes: &[u8], pos: usize, len: usize, tag_name: &[u8]) -> Option<usize> {
if pos + 2 + tag_name.len() >= len {
return None;
}
if bytes[pos] != b'<' || bytes[pos + 1] != b'/' {
return None;
}
let name_start = pos + 2;
if !bytes[name_start..name_start + tag_name.len()].eq_ignore_ascii_case(tag_name) {
return None;
}
let mut check_pos = name_start + tag_name.len();
while check_pos < len {
match bytes[check_pos] {
b'>' => return Some(check_pos + 1),
b' ' | b'\t' | b'\n' | b'\r' => check_pos += 1,
_ => return None,
}
}
None
}
pub(super) fn parse_block_fast<'a>(
bytes: &'a [u8],
source: &'a str,
start: usize,
start_line: usize,
) -> BlockParseResult<'a> {
let len = bytes.len();
let mut pos = start + 1;
if pos >= len {
return Ok(None);
}
let tag_start = pos;
while pos < len && is_tag_name_char_fast(bytes[pos]) {
pos += 1;
}
if pos == tag_start {
return Ok(None);
}
let tag_name = &source.as_bytes()[tag_start..pos];
let mut attrs: BlockAttrs<'a> = FxHashMap::default();
while pos < len && bytes[pos] != b'>' {
while pos < len && is_whitespace_fast(bytes[pos]) {
pos += 1;
}
if pos >= len || bytes[pos] == b'>' || bytes[pos] == b'/' {
break;
}
let attr_start = pos;
while pos < len {
let c = bytes[pos];
if c == b'='
|| c == b' '
|| c == b'>'
|| c == b'/'
|| c == b'\t'
|| c == b'\n'
|| c == b'\r'
{
break;
}
pos += 1;
}
if pos == attr_start {
pos += 1;
continue;
}
let attr_name: Cow<'a, str> = Cow::Borrowed(&source[attr_start..pos]);
while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
pos += 1;
}
let attr_value: Cow<'a, str> = if pos < len && bytes[pos] == b'=' {
pos += 1;
while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
pos += 1;
}
if pos < len && (bytes[pos] == b'"' || bytes[pos] == b'\'') {
let quote_char = bytes[pos];
pos += 1;
let value_start = pos;
if let Some(quote_pos) = memchr(quote_char, &bytes[pos..]) {
pos += quote_pos;
let value = Cow::Borrowed(&source[value_start..pos]);
pos += 1; value
} else {
while pos < len && bytes[pos] != quote_char {
pos += 1;
}
let value = Cow::Borrowed(&source[value_start..pos]);
if pos < len {
pos += 1;
}
value
}
} else {
let value_start = pos;
while pos < len {
let c = bytes[pos];
if c == b' ' || c == b'>' || c == b'/' || c == b'\t' || c == b'\n' {
break;
}
pos += 1;
}
Cow::Borrowed(&source[value_start..pos])
}
} else {
Cow::Borrowed("")
};
if !attr_name.is_empty() {
attrs.insert(attr_name, attr_value);
}
}
let is_self_closing = pos > 0 && pos < len && bytes[pos - 1] == b'/';
if is_self_closing {
if pos < len && bytes[pos] == b'>' {
pos += 1;
}
return Ok(Some((
tag_name,
attrs,
Cow::Borrowed(""),
pos,
pos,
pos,
start_line,
pos - start,
)));
}
if pos < len && bytes[pos] == b'>' {
pos += 1;
} else {
return Err(build_malformed_error(
tag_name,
"the opening tag is incomplete",
));
}
let content_start = pos;
let mut line = start_line;
let mut last_newline = start;
if tag_name.eq_ignore_ascii_case(TAG_TEMPLATE) {
let mut depth = 1;
fn is_closing_template_tag(bytes: &[u8], pos: usize, len: usize) -> Option<usize> {
const CLOSING_TAG_PREFIX: &[u8] = b"</template";
if pos + CLOSING_TAG_PREFIX.len() > len {
return None;
}
if !bytes[pos..pos + CLOSING_TAG_PREFIX.len()].eq_ignore_ascii_case(CLOSING_TAG_PREFIX)
{
return None;
}
let mut check_pos = pos + CLOSING_TAG_PREFIX.len();
while check_pos < len {
match bytes[check_pos] {
b'>' => return Some(check_pos + 1), b' ' | b'\t' | b'\n' | b'\r' => check_pos += 1,
_ => return None, }
}
None
}
while pos < len {
let Some(lt_offset) = memchr(b'<', &bytes[pos..]) else {
advance_line(&bytes[pos..], pos, &mut line, &mut last_newline);
break;
};
advance_line(
&bytes[pos..pos + lt_offset],
pos,
&mut line,
&mut last_newline,
);
pos += lt_offset;
if let Some(end_tag_pos) = is_closing_template_tag(bytes, pos, len) {
depth -= 1;
if depth == 0 {
let content_end = pos;
let end_pos = end_tag_pos;
let col = pos - last_newline + (end_pos - pos);
let content = Cow::Borrowed(&source[content_start..content_end]);
return Ok(Some((
tag_name,
attrs,
content,
content_start,
content_end,
end_pos,
line,
col,
)));
}
pos = end_tag_pos;
continue;
}
if starts_with_bytes(&bytes[pos + 1..], TAG_TEMPLATE) {
let tag_check_pos = pos + 1 + TAG_TEMPLATE.len();
if tag_check_pos < len {
let next_char = bytes[tag_check_pos];
if next_char == b' '
|| next_char == b'>'
|| next_char == b'\n'
|| next_char == b'\t'
|| next_char == b'\r'
{
let mut check_pos = tag_check_pos;
let mut is_self_closing_nested = false;
while check_pos < len && bytes[check_pos] != b'>' {
if bytes[check_pos] == b'/'
&& check_pos + 1 < len
&& bytes[check_pos + 1] == b'>'
{
is_self_closing_nested = true;
break;
}
check_pos += 1;
}
if !is_self_closing_nested {
depth += 1;
}
}
}
}
pos += 1;
}
return Err(build_malformed_error(
tag_name,
"the closing tag is missing",
));
}
if tag_name.eq_ignore_ascii_case(TAG_STYLE) {
return find_block_end(BlockEndSearch {
bytes,
source,
tag_name,
pos,
content_start,
start_line,
initial_last_newline: start,
attrs,
});
}
if !tag_name.eq_ignore_ascii_case(TAG_SCRIPT) {
return find_block_end(BlockEndSearch {
bytes,
source,
tag_name,
pos,
content_start,
start_line,
initial_last_newline: content_start,
attrs,
});
}
let is_script = tag_name.eq_ignore_ascii_case(TAG_SCRIPT);
let mut prev_significant_char: u8 = b'\n';
while pos < len {
let b = bytes[pos];
if b == b'\n' {
line += 1;
last_newline = pos;
prev_significant_char = b'\n';
pos += 1;
continue;
}
if b == b' ' || b == b'\t' || b == b'\r' {
pos += 1;
continue;
}
if is_script {
if b == b'/' && pos + 1 < len && bytes[pos + 1] == b'/' {
pos += 2;
if let Some(newline_offset) = memchr(b'\n', &bytes[pos..]) {
pos += newline_offset;
} else {
pos = len;
}
continue;
}
if b == b'/' && pos + 1 < len && bytes[pos + 1] == b'*' {
pos += 2;
if let Some(end_offset) = memmem::find(&bytes[pos..], b"*/") {
advance_line(
&bytes[pos..pos + end_offset],
pos,
&mut line,
&mut last_newline,
);
pos += end_offset + 2;
} else {
advance_line(&bytes[pos..], pos, &mut line, &mut last_newline);
pos = len;
}
continue;
}
if b == b'/' && can_start_regex_literal(prev_significant_char) {
if let Some(next_pos) =
skip_regex_literal(bytes, pos, len, &mut line, &mut last_newline)
{
prev_significant_char = b'/';
pos = next_pos;
continue;
}
}
if b == b'\'' || b == b'"' || b == b'`' {
let is_string_context = matches!(
prev_significant_char,
b'=' | b'('
| b'['
| b','
| b':'
| b'{'
| b';'
| b'\n'
| b'?'
| b'&'
| b'|'
| b'+'
| b'-'
| b'*'
| b'!'
| b'>'
| b'<'
| b'%'
| b'^'
) || (b == b'`'
&& (prev_significant_char.is_ascii_alphanumeric()
|| prev_significant_char == b'_'
|| prev_significant_char == b')'));
if is_string_context {
let quote = b;
pos += 1;
while pos < len {
let c = bytes[pos];
if c == b'\n' {
line += 1;
last_newline = pos;
}
if c == b'\\' && pos + 1 < len {
pos += 2; continue;
}
if quote == b'`' && c == b'$' && pos + 1 < len && bytes[pos + 1] == b'{' {
pos += 2;
let mut brace_depth = 1;
while pos < len && brace_depth > 0 {
let inner = bytes[pos];
if inner == b'\n' {
line += 1;
last_newline = pos;
}
if inner == b'{' {
brace_depth += 1;
} else if inner == b'}' {
brace_depth -= 1;
} else if inner == b'\\' && pos + 1 < len {
pos += 1; }
pos += 1;
}
continue;
}
if c == quote {
pos += 1;
break;
}
if quote != b'`' && c == b'\n' {
break;
}
pos += 1;
}
prev_significant_char = quote; continue;
}
}
}
if b == b'<' {
if let Some(end_tag_pos) = find_closing_tag_end(bytes, pos, len, tag_name) {
let content_end = pos;
let col = pos - last_newline + (end_tag_pos - pos);
let content = Cow::Borrowed(&source[content_start..content_end]);
return Ok(Some((
tag_name,
attrs,
content,
content_start,
content_end,
end_tag_pos,
line,
col,
)));
}
}
prev_significant_char = b;
pos += 1;
}
Err(build_malformed_error(
tag_name,
"the closing tag is missing",
))
}
fn find_block_end<'a>(search: BlockEndSearch<'a>) -> BlockParseResult<'a> {
let BlockEndSearch {
bytes,
source,
tag_name,
mut pos,
content_start,
start_line,
initial_last_newline,
attrs,
} = search;
let len = bytes.len();
let mut line = start_line;
let mut last_newline = initial_last_newline;
while pos < len {
if let Some(lt_offset) = memchr(b'<', &bytes[pos..]) {
advance_line(
&bytes[pos..pos + lt_offset],
pos,
&mut line,
&mut last_newline,
);
pos += lt_offset;
if bytes[pos] == b'<' {
if let Some(end_tag_pos) = find_closing_tag_end(bytes, pos, len, tag_name) {
let content_end = pos;
let col = pos - last_newline + (end_tag_pos - pos);
let content = Cow::Borrowed(&source[content_start..content_end]);
return Ok(Some((
tag_name,
attrs,
content,
content_start,
content_end,
end_tag_pos,
line,
col,
)));
}
}
pos += 1;
} else {
break;
}
}
Err(build_malformed_error(
tag_name,
"the closing tag is missing",
))
}