use html5ever::Attribute;
use markup5ever_rcdom::{Node, NodeData};
use std::{borrow::Cow, rc::Rc};
use super::{
element_handler::ElementHandler,
node_util::get_node_tag_name,
options::Options,
text_util::{
compress_whitespace, index_of_markdown_ordered_item_dot, is_markdown_atx_heading,
TrimAsciiWhitespace,
},
};
pub(crate) fn walk_node(
node: &Rc<Node>,
parent_tag: Option<&str>,
buffer: &mut Vec<String>,
handler: &dyn ElementHandler,
options: &Options,
is_pre: bool,
trim_leading_spaces: bool,
) {
match node.data {
NodeData::Document => {
walk_children(buffer, node, true, handler, options, false);
trim_buffer_end(buffer);
}
NodeData::Text { ref contents } => {
append_text(
buffer,
parent_tag,
contents.borrow().to_string(),
is_pre,
trim_leading_spaces,
);
}
NodeData::Element {
ref name,
ref attrs,
..
} => {
visit_element(
buffer,
node,
handler,
options,
&name.local,
&attrs.borrow(),
is_pre,
);
}
NodeData::Comment { .. } => {}
NodeData::Doctype { .. } => {}
NodeData::ProcessingInstruction { .. } => unreachable!(),
}
}
fn append_text(
buffer: &mut Vec<String>,
parent_tag: Option<&str>,
text: String,
is_pre: bool,
trim_leading_spaces: bool,
) {
if is_pre {
let text = if parent_tag.is_some_and(|t| t == "pre") {
escape_pre_text_if_needed(text)
} else {
text
};
buffer.push(text);
} else {
let text = html_escape::decode_html_entities(&text);
let text = escape_if_needed(text);
let text = compress_whitespace(&text);
let mut chars = text.chars();
if chars.next().is_some_and(|ch| ch == ' ')
&& chars.next().is_none()
&& parent_tag.is_some_and(is_block_container)
{
return;
}
let to_add = if trim_leading_spaces
|| (text.chars().next().is_some_and(|ch| ch == ' ')
&& buffer.last().is_some_and(|text| text.ends_with(' ')))
{
text.trim_start_matches(' ').to_string()
} else {
text.into_owned()
};
buffer.push(to_add);
}
}
fn visit_element(
buffer: &mut Vec<String>,
node: &Rc<Node>,
handler: &dyn ElementHandler,
options: &Options,
tag: &str,
attrs: &[Attribute],
is_pre: bool,
) {
let is_head = tag == "head";
let is_pre = is_pre || tag == "pre" || tag == "code";
let prev_buffer_len = buffer.len();
let is_block = is_block_element(tag);
walk_children(buffer, node, is_block, handler, options, is_pre);
let md = handler.on_visit(
node,
tag,
attrs,
&join_contents(&buffer[prev_buffer_len..]),
options,
);
buffer.truncate(prev_buffer_len);
if let Some(text) = md {
if !text.is_empty() || !is_head {
buffer.push(text);
}
}
}
fn join_contents(contents: &[String]) -> String {
let mut result = String::new();
for content in contents {
let content_len = content.len();
if content_len == 0 {
continue;
}
let result_len = result.len();
let left = result.trim_end_matches('\n');
let right = content.trim_start_matches('\n');
let max_trimmed_new_lines =
std::cmp::max(result_len - left.len(), content_len - right.len());
let separator_new_lines = std::cmp::min(max_trimmed_new_lines, 2);
let separator = "\n".repeat(separator_new_lines);
let mut next_result = String::with_capacity(left.len() + separator.len() + right.len());
next_result.push_str(left);
next_result.push_str(&separator);
next_result.push_str(right);
result = next_result;
}
result
}
fn walk_children(
buffer: &mut Vec<String>,
node: &Rc<Node>,
is_parent_blok_element: bool,
handler: &dyn ElementHandler,
options: &Options,
is_pre: bool,
) {
let tag = get_node_tag_name(node);
let mut trim_leading_spaces = !is_pre && is_parent_blok_element;
for child in node.children.borrow().iter() {
let is_block = get_node_tag_name(child).is_some_and(is_block_element);
if is_block {
trim_buffer_end_spaces(buffer);
}
let buffer_len = buffer.len();
walk_node(
child,
tag,
buffer,
handler,
options,
is_pre,
trim_leading_spaces,
);
if buffer.len() > buffer_len {
trim_leading_spaces = is_block;
}
}
}
fn trim_buffer_end(buffer: &mut [String]) {
for content in buffer.iter_mut().rev() {
let trimmed = content.trim_end_ascii_whitespace();
if trimmed.len() == content.len() {
break;
}
*content = trimmed.to_string();
}
}
fn trim_buffer_end_spaces(buffer: &mut [String]) {
for content in buffer.iter_mut().rev() {
let trimmed = content.trim_end_matches(' ');
if trimmed.len() == content.len() {
break;
}
*content = trimmed.to_string();
}
}
fn escape_if_needed(text: Cow<str>) -> Cow<'_, str> {
let Some(first) = text.chars().next() else {
return text;
};
let mut need_escape = matches!(first, '=' | '~' | '>' | '-' | '+' | '#' | '0'..='9');
if !need_escape {
need_escape = text
.chars()
.any(|c| c == '\\' || c == '*' || c == '_' || c == '`' || c == '[' || c == ']');
}
if !need_escape {
return text;
}
let mut escaped = String::new();
for ch in text.chars() {
match ch {
'\\' => escaped.push_str("\\\\"),
'*' => escaped.push_str("\\*"),
'_' => escaped.push_str("\\_"),
'`' => escaped.push_str("\\`"),
'[' => escaped.push_str("\\["),
']' => escaped.push_str("\\]"),
_ => escaped.push(ch),
}
}
match first {
'=' | '~' | '>' => {
escaped.insert(0, '\\');
}
'-' | '+' => {
if escaped.chars().nth(1).is_some_and(|ch| ch == ' ') {
escaped.insert(0, '\\');
}
}
'#' => {
if is_markdown_atx_heading(&escaped) {
escaped.insert(0, '\\');
}
}
'0'..='9' => {
if let Some(dot_idx) = index_of_markdown_ordered_item_dot(&escaped) {
escaped.replace_range(dot_idx..(dot_idx + 1), "\\.");
}
}
_ => {}
}
Cow::Owned(escaped)
}
fn escape_pre_text_if_needed(text: String) -> String {
let Some(first) = text.chars().next() else {
return text;
};
match first {
'`' | '~' => {
let mut text = text;
text.insert(0, '\\');
text
}
_ => text,
}
}
fn is_block_container(tag: &str) -> bool {
matches!(
tag,
"html"
| "body"
| "div"
| "ul"
| "ol"
| "li"
| "table"
| "tr"
| "header"
| "head"
| "footer"
| "nav"
| "section"
| "article"
| "aside"
| "main"
| "blockquote"
| "script"
| "style"
)
}
fn is_block_element(tag: &str) -> bool {
if is_block_container(tag) {
return true;
}
matches!(
tag,
"p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "pre" | "hr" | "br"
)
}