#[cfg(test)]
mod tests;
#[inline]
pub(crate) fn is_skip_tag(tag: &str) -> bool {
matches!(
tag,
"script"
| "style"
| "meta"
| "link"
| "template"
| "iframe"
| "object"
| "embed"
| "noscript"
| "head"
| "svg"
)
}
#[inline]
pub(crate) fn is_shell_tag(tag: &str) -> bool {
matches!(tag, "nav" | "header" | "footer" | "aside")
}
#[inline]
pub(crate) fn is_wrapper_tag(tag: &str) -> bool {
matches!(tag, "span" | "div" | "section" | "article" | "main")
}
#[inline]
pub(crate) fn is_structural_tag(tag: &str) -> bool {
matches!(
tag,
"h1" | "h2"
| "h3"
| "h4"
| "h5"
| "h6"
| "p"
| "ul"
| "ol"
| "li"
| "blockquote"
| "pre"
| "code"
| "table"
| "thead"
| "tbody"
| "tr"
| "th"
| "td"
| "a"
| "img"
| "strong"
| "b"
| "em"
| "i"
| "hr"
| "br"
| "figure"
| "figcaption"
)
}
pub fn write_normalised(
text: &str,
out: &mut String,
last_was_space: &mut bool,
at_block_start: bool,
at_line_start: &mut bool,
) {
let mut block_start = at_block_start;
for c in text.chars() {
if c.is_ascii_whitespace() || c == '\u{00a0}' {
if !*last_was_space && !block_start {
*last_was_space = true;
}
continue;
}
if *last_was_space && !block_start {
out.push(' ');
}
*last_was_space = false;
block_start = false;
let line_start = *at_line_start;
*at_line_start = false;
match c {
'\\' | '*' | '_' | '`' | '[' | ']' | '!' => {
out.push('\\');
out.push(c);
}
'#' | '>' | '+' | '-' if line_start => {
out.push('\\');
out.push(c);
}
'0'..='9' if line_start => {
out.push('\\');
out.push(c);
}
_ => out.push(c),
}
}
}
pub fn extract_code_lang(class: Option<&str>) -> Option<&str> {
class?
.split_whitespace()
.find(|cls| cls.starts_with("language-"))
.map(|cls| &cls["language-".len()..])
}