use std::ops::Range;
#[inline]
pub fn scan_tag_name(bytes: &[u8], start: usize) -> usize {
let mut end = start;
while end < bytes.len() && is_tag_name_continue(bytes[end]) {
end += 1;
}
end
}
#[inline]
pub fn is_tag_name_start(b: u8) -> bool {
b.is_ascii_alphabetic()
}
#[inline]
pub fn is_tag_name_continue(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
}
#[inline]
pub fn skip_ws(bytes: &[u8], mut pos: usize) -> usize {
while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
pos += 1;
}
pos
}
pub fn find_tag_close(bytes: &[u8], start: usize) -> Option<(usize, bool)> {
let mut pos = start;
let len = bytes.len();
let mut in_quote: Option<u8> = None;
while pos < len {
let next = find_quote_or_close(bytes, pos)?;
pos = next;
match bytes[pos] {
b'"' | b'\'' => {
if let Some(q) = in_quote {
if q == bytes[pos] {
in_quote = None;
}
} else {
in_quote = Some(bytes[pos]);
}
}
b'>' if in_quote.is_none() => {
let self_closing = pos > 0 && bytes[pos - 1] == b'/';
return Some((pos, self_closing));
}
_ => {}
}
pos += 1;
}
None
}
#[inline]
fn find_quote_or_close(bytes: &[u8], start: usize) -> Option<usize> {
memchr::memchr3(b'"', b'\'', b'>', &bytes[start..]).map(|pos| start + pos)
}
pub fn scan_attribute(
bytes: &[u8],
pos: usize,
) -> Option<(Range<usize>, Option<Range<usize>>, usize)> {
let pos = skip_ws(bytes, pos);
if pos >= bytes.len() {
return None;
}
if bytes[pos] == b'>' || (bytes[pos] == b'/' && bytes.get(pos + 1) == Some(&b'>')) {
return None;
}
let key_start = pos;
let mut key_end = pos;
while key_end < bytes.len() {
match bytes[key_end] {
b'=' | b'>' | b'/' | b' ' | b'\t' | b'\n' | b'\r' => break,
_ => key_end += 1,
}
}
if key_end == key_start {
return None;
}
let after_key = skip_ws(bytes, key_end);
if after_key >= bytes.len() || bytes[after_key] != b'=' {
return Some((key_start..key_end, None, after_key));
}
let after_eq = skip_ws(bytes, after_key + 1);
if after_eq >= bytes.len() {
return None;
}
let (value_range, new_pos) = if let q @ (b'"' | b'\'') = bytes[after_eq] {
let val_start = after_eq + 1;
let mut val_end = val_start;
while val_end < bytes.len() && bytes[val_end] != q {
val_end += 1;
}
(val_start..val_end, val_end + 1)
} else {
let val_start = after_eq;
let mut val_end = val_start;
while val_end < bytes.len()
&& !matches!(bytes[val_end], b' ' | b'\t' | b'\n' | b'\r' | b'>' | b'/')
{
val_end += 1;
}
(val_start..val_end, val_end)
};
Some((key_start..key_end, Some(value_range), new_pos))
}
pub fn collect_attrs(bytes: &[u8], start: usize, end: usize) -> Vec<(&[u8], Option<&[u8]>)> {
let mut attrs = Vec::new();
let mut pos = skip_ws(bytes, start);
while pos < end {
match scan_attribute(bytes, pos) {
Some((key_range, val_range, new_pos)) => {
if new_pos <= pos {
break;
}
let key = &bytes[key_range.start..key_range.end.min(end)];
let val = val_range.map(|r| &bytes[r.start..r.end.min(end)]);
attrs.push((key, val));
pos = new_pos;
}
None => break,
}
}
attrs
}