use std::{
borrow::Cow,
cell::RefCell,
collections::HashSet,
fmt,
io::{self, Write},
rc::Rc,
};
use html5ever::{
Attribute, QualName,
interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink},
parse_document,
tendril::{StrTendril, TendrilSink},
};
#[derive(Debug)]
pub enum Error {
Parse(String),
Io(io::Error),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::Parse(msg) => write!(f, "HTML parse error: {msg}"),
Error::Io(err) => write!(f, "I/O error: {err}"),
}
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::Parse(_) => None,
Error::Io(err) => Some(err),
}
}
}
impl From<io::Error> for Error {
fn from(err: io::Error) -> Self {
Error::Io(err)
}
}
pub fn convert<W: Write>(html: &[u8], out: &mut W) -> Result<(), Error> {
debug_assert!(!html.is_empty(), "input html must not be empty");
let dom = RcDom::parse(html)?;
let mut cvt = Converter {
out,
redirect_stack: Vec::new(),
in_pre: false,
at_line_start: true,
pending_space: false,
trailing_nls: 0,
list_stack: Vec::new(),
table_stack: Vec::new(),
code_buf: String::new(),
depth: 0,
};
cvt.walk(&dom.document)?;
cvt.finalize()?;
Ok(())
}
struct Node {
data: NodeData,
parent: RefCell<Option<Handle>>,
first_child: RefCell<Option<Handle>>,
last_child: RefCell<Option<Handle>>,
prev_sibling: RefCell<Option<Handle>>,
next_sibling: RefCell<Option<Handle>>,
}
#[allow(dead_code)]
enum NodeData {
Document,
Doctype {
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
},
Text {
contents: RefCell<StrTendril>,
},
Comment {
contents: StrTendril,
},
Element {
name: QualName,
attrs: RefCell<Vec<Attribute>>,
template_contents: Option<Handle>,
mathml_annotation_xml_integration_point: bool,
},
ProcessingInstruction {
target: StrTendril,
contents: StrTendril,
},
}
type Handle = Rc<Node>;
fn detach(node: &Handle) {
let parent = node.parent.borrow_mut().take();
let prev = node.prev_sibling.borrow_mut().take();
let next = node.next_sibling.borrow_mut().take();
if let Some(ref next_n) = next {
*next_n.prev_sibling.borrow_mut() = prev.clone();
} else if let Some(ref parent_n) = parent {
*parent_n.last_child.borrow_mut() = prev.clone();
}
if let Some(ref prev_n) = prev {
*prev_n.next_sibling.borrow_mut() = next;
} else if let Some(ref parent_n) = parent {
*parent_n.first_child.borrow_mut() = None;
}
}
fn append_child(parent: &Handle, child: &Handle) {
detach(child);
*child.parent.borrow_mut() = Some(Rc::clone(parent));
if let Some(last) = parent.last_child.borrow_mut().take() {
*child.prev_sibling.borrow_mut() = Some(Rc::clone(&last));
debug_assert!(last.next_sibling.borrow().is_none());
*last.next_sibling.borrow_mut() = Some(Rc::clone(child));
} else {
debug_assert!(parent.first_child.borrow().is_none());
*parent.first_child.borrow_mut() = Some(Rc::clone(child));
}
*parent.last_child.borrow_mut() = Some(Rc::clone(child));
}
fn insert_before(sibling: &Handle, new_node: &Handle) {
detach(new_node);
*new_node.parent.borrow_mut() = sibling.parent.borrow().clone();
*new_node.next_sibling.borrow_mut() = Some(Rc::clone(sibling));
let prev = sibling.prev_sibling.borrow_mut().take();
if let Some(ref prev_n) = prev {
*new_node.prev_sibling.borrow_mut() = Some(Rc::clone(prev_n));
*prev_n.next_sibling.borrow_mut() = Some(Rc::clone(new_node));
} else if let Some(parent_n) = sibling.parent.borrow().as_ref() {
*parent_n.first_child.borrow_mut() = Some(Rc::clone(new_node));
}
*sibling.prev_sibling.borrow_mut() = Some(Rc::clone(new_node));
}
fn iter_children(node: &Handle) -> ChildIter {
ChildIter {
next: node.first_child.borrow().clone(),
}
}
struct ChildIter {
next: Option<Handle>,
}
impl Iterator for ChildIter {
type Item = Handle;
fn next(&mut self) -> Option<Self::Item> {
let current = self.next.take()?;
self.next = current.next_sibling.borrow().clone();
Some(current)
}
}
struct RcDom {
document: Handle,
quirks_mode: RefCell<QuirksMode>,
}
impl RcDom {
fn parse(html: &[u8]) -> Result<Self, Error> {
let dom = Self {
document: Rc::new(Node {
data: NodeData::Document,
parent: RefCell::new(None),
first_child: RefCell::new(None),
last_child: RefCell::new(None),
prev_sibling: RefCell::new(None),
next_sibling: RefCell::new(None),
}),
quirks_mode: RefCell::new(QuirksMode::NoQuirks),
};
let dom = parse_document(dom, Default::default())
.from_utf8()
.read_from(&mut &html[..])
.map_err(|e| Error::Parse(format!("{e}")))?;
Ok(dom)
}
fn make_node(&self, data: NodeData) -> Handle {
Rc::new(Node {
data,
parent: RefCell::new(None),
first_child: RefCell::new(None),
last_child: RefCell::new(None),
prev_sibling: RefCell::new(None),
next_sibling: RefCell::new(None),
})
}
fn append_common<P, A>(&self, child: NodeOrText<Handle>, previous: P, append: A)
where
P: FnOnce() -> Option<Handle>,
A: FnOnce(Handle),
{
let node = match child {
NodeOrText::AppendText(text) => {
if let Some(prev) = previous()
&& let NodeData::Text { ref contents } = prev.data
{
contents.borrow_mut().push_tendril(&text);
return;
}
self.make_node(NodeData::Text {
contents: RefCell::new(text),
})
}
NodeOrText::AppendNode(n) => n,
};
append(node);
}
}
impl TreeSink for RcDom {
type Handle = Handle;
type Output = Self;
type ElemName<'a>
= &'a QualName
where
Self: 'a;
fn finish(self) -> Self::Output {
self
}
fn parse_error(&self, _msg: Cow<'static, str>) {}
fn get_document(&self) -> Handle {
Rc::clone(&self.document)
}
fn elem_name<'a>(&'a self, target: &'a Handle) -> Self::ElemName<'a> {
debug_assert!(
matches!(&target.data, NodeData::Element { .. }),
"elem_name called on non-element node"
);
match &target.data {
NodeData::Element { name, .. } => name,
_ => unreachable!("not an element"),
}
}
fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle {
self.make_node(NodeData::Element {
name,
attrs: RefCell::new(attrs),
template_contents: if flags.template {
Some(self.make_node(NodeData::Document))
} else {
None
},
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
})
}
fn create_comment(&self, text: StrTendril) -> Handle {
self.make_node(NodeData::Comment { contents: text })
}
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Handle {
self.make_node(NodeData::ProcessingInstruction {
target,
contents: data,
})
}
fn append(&self, parent: &Handle, child: NodeOrText<Handle>) {
let parent = Rc::clone(parent);
self.append_common(
child,
|| parent.last_child.borrow().clone(),
|node| append_child(&parent, &node),
);
}
fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) {
let sibling = Rc::clone(sibling);
self.append_common(
child,
|| sibling.prev_sibling.borrow().clone(),
|node| insert_before(&sibling, &node),
);
}
fn append_based_on_parent_node(
&self,
element: &Handle,
prev_element: &Handle,
child: NodeOrText<Handle>,
) {
if element.parent.borrow().is_some() {
self.append_before_sibling(element, child);
} else {
self.append(prev_element, child);
}
}
fn append_doctype_to_document(
&self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
) {
let node = self.make_node(NodeData::Doctype {
name,
public_id,
system_id,
});
append_child(&self.document, &node);
}
fn get_template_contents(&self, target: &Handle) -> Handle {
debug_assert!(
matches!(
&target.data,
NodeData::Element {
template_contents: Some(..),
..
}
),
"get_template_contents called on non-template node"
);
match &target.data {
NodeData::Element {
template_contents: Some(contents),
..
} => Rc::clone(contents),
_ => unreachable!("not a template element"),
}
}
fn same_node(&self, x: &Handle, y: &Handle) -> bool {
Rc::ptr_eq(x, y)
}
fn set_quirks_mode(&self, mode: QuirksMode) {
*self.quirks_mode.borrow_mut() = mode;
}
fn add_attrs_if_missing(&self, target: &Handle, new_attrs: Vec<Attribute>) {
if let NodeData::Element { ref attrs, .. } = target.data {
let mut existing = attrs.borrow_mut();
let names: HashSet<QualName> = existing.iter().map(|a| a.name.clone()).collect();
existing.extend(new_attrs.into_iter().filter(|a| !names.contains(&a.name)));
}
}
fn remove_from_parent(&self, target: &Handle) {
detach(target);
}
fn reparent_children(&self, node: &Handle, new_parent: &Handle) {
let mut next = node.first_child.borrow().clone();
while let Some(child) = next {
debug_assert!(
child
.parent
.borrow()
.as_ref()
.is_some_and(|p| Rc::ptr_eq(p, node))
);
next = child.next_sibling.borrow().clone();
append_child(new_parent, &child);
}
}
fn is_mathml_annotation_xml_integration_point(&self, handle: &Handle) -> bool {
match &handle.data {
NodeData::Element {
mathml_annotation_xml_integration_point,
..
} => *mathml_annotation_xml_integration_point,
_ => false,
}
}
}
const MAX_DEPTH: u32 = 200;
const LIST_INDENTS: [&str; 16] = [
"",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
];
fn list_indent_str(level: usize) -> &'static str {
LIST_INDENTS.get(level).copied().unwrap_or_else(|| {
LIST_INDENTS[LIST_INDENTS.len() - 1]
})
}
struct RedirectState {
buf: Vec<u8>,
saved_at_line_start: bool,
saved_pending_space: bool,
saved_trailing_nls: u8,
}
struct Converter<'a, W: Write> {
out: &'a mut W,
redirect_stack: Vec<RedirectState>,
in_pre: bool,
at_line_start: bool,
pending_space: bool,
trailing_nls: u8,
list_stack: Vec<ListInfo>,
table_stack: Vec<TableState>,
code_buf: String,
depth: u32,
}
struct ListInfo {
ordered: bool,
counter: u32,
}
struct TableState {
rows: Vec<TableRow>,
current_row: Vec<String>,
current_cell: Vec<u8>,
in_header: bool,
current_row_has_th: bool,
}
struct TableRow {
cells: Vec<String>,
is_header: bool,
}
impl<'a, W: Write> Converter<'a, W> {
fn finalize(&mut self) -> io::Result<()> {
if self.trailing_nls == 0 {
self.out.write_all(b"\n")?;
}
Ok(())
}
fn enter_redirect(&mut self) {
debug_assert!(
self.redirect_stack.len() < 100,
"redirect stack too deep, possible leak"
);
self.redirect_stack.push(RedirectState {
buf: Vec::new(),
saved_at_line_start: self.at_line_start,
saved_pending_space: self.pending_space,
saved_trailing_nls: self.trailing_nls,
});
self.at_line_start = true;
self.pending_space = false;
self.trailing_nls = 0;
}
fn leave_redirect(&mut self) -> Option<RedirectState> {
debug_assert!(
!self.redirect_stack.is_empty(),
"leave_redirect called with empty stack"
);
let state = self.redirect_stack.pop()?;
self.at_line_start = state.saved_at_line_start;
self.pending_space = state.saved_pending_space;
self.trailing_nls = state.saved_trailing_nls;
Some(state)
}
fn walk(&mut self, handle: &Handle) -> io::Result<()> {
self.depth = self
.depth
.checked_add(1)
.ok_or_else(|| io::Error::other("HTML nesting depth overflow"))?;
if self.depth > MAX_DEPTH {
return Err(io::Error::other(format!(
"HTML nesting exceeds maximum depth of {MAX_DEPTH}"
)));
}
let result = match &handle.data {
NodeData::Document => self.walk_children(handle),
NodeData::Text { contents } => {
let text = contents.borrow();
self.emit_text(&text)
}
NodeData::Element { name, attrs, .. } => {
let tag: &str = &name.local;
self.handle_element(tag, attrs, handle)
}
NodeData::Doctype { .. }
| NodeData::Comment { .. }
| NodeData::ProcessingInstruction { .. } => Ok(()),
};
self.depth -= 1;
result
}
fn walk_children(&mut self, handle: &Handle) -> io::Result<()> {
for child in iter_children(handle) {
self.walk(&child)?;
}
Ok(())
}
fn handle_element(
&mut self,
tag: &str,
attrs: &RefCell<Vec<Attribute>>,
handle: &Handle,
) -> io::Result<()> {
match tag {
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
let level = (tag.as_bytes()[1] - b'0') as usize;
debug_assert!((1..=6).contains(&level), "heading level 1-6");
self.handle_heading(level, handle)
}
"p" => self.handle_paragraph(handle),
"blockquote" => self.handle_blockquote(handle),
"pre" => self.handle_pre(handle),
"hr" => self.handle_hr(),
"br" => self.handle_br(),
"div" | "section" | "article" | "main" | "header" | "footer" | "nav" | "aside"
| "figure" | "figcaption" | "details" | "summary" | "address" => {
self.ensure_blank_line()?;
self.walk_children(handle)
}
"ul" | "ol" => self.handle_list(tag == "ol", attrs, handle),
"li" => self.handle_list_item(handle),
"table" => self.handle_table(handle),
"thead" => self.handle_thead(handle),
"tbody" | "tfoot" => self.walk_children(handle),
"tr" => self.handle_tr(handle),
"th" => self.handle_cell(true, handle),
"td" => self.handle_cell(false, handle),
"strong" | "b" => self.handle_inline("**", handle),
"em" | "i" => self.handle_inline("*", handle),
"del" | "s" | "strike" => self.handle_inline("~~", handle),
"code" => self.handle_code(handle),
"a" => self.handle_link(attrs, handle),
"img" => self.handle_image(attrs),
"script" | "style" | "noscript" | "head" | "meta" | "link" => Ok(()),
_ => self.walk_children(handle),
}
}
fn handle_heading(&mut self, level: usize, handle: &Handle) -> io::Result<()> {
self.ensure_blank_line()?;
const HASHES: [&str; 7] = ["", "#", "##", "###", "####", "#####", "######"];
debug_assert!((1..=6).contains(&level));
self.emit(HASHES[level])?;
self.emit(" ")?;
self.walk_children(handle)?;
self.emit("\n\n")?;
self.at_line_start = true;
Ok(())
}
fn handle_paragraph(&mut self, handle: &Handle) -> io::Result<()> {
if self.table_stack.last().is_some() {
return self.walk_children(handle);
}
self.ensure_blank_line()?;
self.walk_children(handle)?;
self.emit("\n\n")?;
self.at_line_start = true;
Ok(())
}
fn handle_blockquote(&mut self, handle: &Handle) -> io::Result<()> {
self.ensure_blank_line()?;
self.enter_redirect();
self.trailing_nls = 2;
self.walk_children(handle)?;
let Some(state) = self.leave_redirect() else {
return Ok(());
};
let mut content = state.buf;
while content.last() == Some(&b'\n') || content.last() == Some(&b' ') {
content.pop();
}
if content.is_empty() {
return Ok(());
}
for line in content.split(|&b| b == b'\n') {
self.emit("> ")?;
if !line.is_empty() {
self.raw_write(line)?;
}
self.emit("\n")?;
}
self.emit("\n")?;
self.at_line_start = true;
Ok(())
}
fn handle_pre(&mut self, handle: &Handle) -> io::Result<()> {
self.ensure_blank_line()?;
let lang = self.extract_code_language(handle);
self.emit("```")?;
if let Some(ref l) = lang {
self.emit(l)?;
}
self.emit("\n")?;
self.enter_redirect();
self.in_pre = true;
self.walk_children(handle)?;
self.in_pre = false;
let Some(state) = self.leave_redirect() else {
return Ok(());
};
let mut content = state.buf;
while content.last() == Some(&b'\n') {
content.pop();
}
self.raw_write(&content)?;
self.trailing_nls = 0;
self.emit("\n```\n")?;
self.at_line_start = true;
Ok(())
}
fn handle_hr(&mut self) -> io::Result<()> {
self.ensure_blank_line()?;
self.emit("---\n")?;
self.at_line_start = true;
Ok(())
}
fn handle_br(&mut self) -> io::Result<()> {
if self.in_pre {
self.emit("\n")?;
} else {
self.emit(" \n")?;
}
self.at_line_start = true;
Ok(())
}
fn handle_list(
&mut self,
ordered: bool,
attrs: &RefCell<Vec<Attribute>>,
handle: &Handle,
) -> io::Result<()> {
if self.list_stack.is_empty() {
self.ensure_blank_line()?;
} else if !self.at_line_start {
self.emit("\n")?;
}
let start = if ordered {
attrs
.borrow()
.iter()
.find(|a| &*a.name.local == "start")
.and_then(|a| a.value.parse::<u32>().ok())
.unwrap_or(1)
} else {
1
};
self.list_stack.push(ListInfo {
ordered,
counter: start,
});
self.walk_children(handle)?;
self.list_stack.pop();
if self.list_stack.is_empty() {
self.emit("\n")?;
self.at_line_start = true;
}
Ok(())
}
fn handle_list_item(&mut self, handle: &Handle) -> io::Result<()> {
debug_assert!(
!self.list_stack.is_empty(),
"list item must be child of ul/ol"
);
let indent = self.list_indent();
if indent > 0 {
let idx = indent / 2;
self.emit(list_indent_str(idx))?;
}
let ordered = self.list_stack.last().is_some_and(|info| info.ordered);
if ordered {
let Some(info) = self.list_stack.last_mut() else {
return Ok(());
};
let counter = info.counter;
info.counter += 1;
self.emit_u32(counter)?;
self.emit(". ")?;
} else {
self.emit("- ")?;
}
self.at_line_start = false;
self.walk_children(handle)?;
self.emit("\n")?;
self.at_line_start = true;
Ok(())
}
#[inline]
fn list_indent(&self) -> usize {
self.list_stack.len().saturating_sub(1) * 2
}
fn handle_table(&mut self, handle: &Handle) -> io::Result<()> {
debug_assert!(self.table_stack.is_empty(), "nested tables not supported");
self.ensure_blank_line()?;
self.table_stack.push(TableState {
rows: Vec::new(),
current_row: Vec::new(),
current_cell: Vec::new(),
in_header: false,
current_row_has_th: false,
});
self.walk_children(handle)?;
self.finish_table()
}
fn handle_thead(&mut self, handle: &Handle) -> io::Result<()> {
if let Some(table) = self.table_stack.last_mut() {
table.in_header = true;
}
self.walk_children(handle)?;
if let Some(table) = self.table_stack.last_mut() {
table.in_header = false;
}
Ok(())
}
fn handle_tr(&mut self, handle: &Handle) -> io::Result<()> {
self.walk_children(handle)?;
if let Some(table) = self.table_stack.last_mut()
&& !table.current_row.is_empty()
{
let is_header = table.in_header || table.current_row_has_th;
table.current_row_has_th = false;
table.rows.push(TableRow {
cells: table.current_row.drain(..).collect(),
is_header,
});
}
Ok(())
}
fn handle_cell(&mut self, is_th: bool, handle: &Handle) -> io::Result<()> {
debug_assert!(!self.table_stack.is_empty(), "cell must be child of table");
self.walk_children(handle)?;
if let Some(table) = self.table_stack.last_mut() {
if is_th {
table.current_row_has_th = true;
}
let raw = std::mem::take(&mut table.current_cell);
let mut cell_text = String::from_utf8(raw).map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "invalid UTF-8 in table cell")
})?;
let trimmed = cell_text.trim();
if trimmed.len() < cell_text.len() {
cell_text = trimmed.to_owned();
}
table.current_row.push(cell_text);
}
Ok(())
}
fn finish_table(&mut self) -> io::Result<()> {
self.pending_space = false;
let table = match self.table_stack.pop() {
Some(t) => t,
None => {
debug_assert!(false, "finish_table called with empty stack");
return Ok(());
}
};
if table.rows.is_empty() {
return Ok(());
}
let ncols = table.rows.iter().map(|r| r.cells.len()).max().unwrap_or(0);
debug_assert!(ncols > 0, "table must have columns");
let mut widths = Vec::new();
compute_col_widths(&table.rows, ncols, &mut widths);
let has_explicit = table.rows.iter().any(|r| r.is_header);
for (idx, row) in table.rows.iter().enumerate() {
self.emit_row(row, &widths, ncols)?;
if row.is_header || (!has_explicit && idx == 0) {
self.emit_sep(&widths, ncols)?;
}
}
self.emit("\n")?;
self.at_line_start = true;
Ok(())
}
fn emit_row(&mut self, row: &TableRow, widths: &[usize], ncols: usize) -> io::Result<()> {
debug_assert!(
widths.len() >= ncols,
"widths array must have at least ncols elements"
);
self.emit("|")?;
for (i, width) in widths.iter().enumerate().take(ncols) {
self.emit(" ")?;
let cell = row.cells.get(i).map(String::as_str).unwrap_or("");
if !cell.is_empty() {
self.emit(cell)?;
}
for _ in 0..width.saturating_sub(cell.len()) {
self.emit(" ")?;
}
self.emit(" |")?;
}
self.emit("\n")
}
fn emit_sep(&mut self, widths: &[usize], ncols: usize) -> io::Result<()> {
debug_assert!(!widths.is_empty(), "widths array must not be empty");
self.emit("|")?;
for width in widths.iter().take(ncols) {
self.emit(" ")?;
for _ in 0..*width {
self.emit("-")?;
}
self.emit(" |")?;
}
self.emit("\n")
}
fn handle_inline(&mut self, marker: &str, handle: &Handle) -> io::Result<()> {
let alt = match marker {
"**" => "__",
"*" => "_",
_ => marker,
};
let mut text = String::new();
collect_text_recursive(handle, &mut text, self.depth).map_err(io::Error::other)?;
let chosen = if text.contains(marker) && marker != alt && !text.contains(alt) {
alt
} else {
marker
};
self.emit(chosen)?;
self.walk_children(handle)?;
self.emit(chosen)
}
fn handle_code(&mut self, handle: &Handle) -> io::Result<()> {
if self.in_pre {
return self.walk_children(handle);
}
self.code_buf.clear();
collect_text_recursive(handle, &mut self.code_buf, self.depth).map_err(io::Error::other)?;
let buf = std::mem::take(&mut self.code_buf);
let max_run = longest_backtick_run(&buf);
let delim_len = if max_run > 0 {
max_run + 1
} else if buf.starts_with(' ') || buf.ends_with(' ') {
2
} else {
1
};
let need_padding =
delim_len > 1 && (buf.starts_with(' ') || buf.ends_with(' ') || buf.is_empty());
let delim = "`".repeat(delim_len);
self.emit(&delim)?;
if need_padding {
self.emit(" ")?;
}
self.emit(&buf)?;
if need_padding {
self.emit(" ")?;
}
self.emit(&delim)?;
self.code_buf = buf;
Ok(())
}
fn handle_link(&mut self, attrs: &RefCell<Vec<Attribute>>, handle: &Handle) -> io::Result<()> {
let href = attrs
.borrow()
.iter()
.find(|a| &*a.name.local == "href")
.map(|a| a.value.clone());
self.emit("[")?;
self.walk_children(handle)?;
self.emit("](")?;
if let Some(ref h) = href {
if h.contains('(') || h.contains(')') || h.contains(' ') {
self.emit("<")?;
self.emit(h)?;
self.emit(">")?;
} else {
self.emit(h)?;
}
}
self.emit(")")
}
fn handle_image(&mut self, attrs: &RefCell<Vec<Attribute>>) -> io::Result<()> {
let borrowed = attrs.borrow();
let src = borrowed.iter().find(|a| &*a.name.local == "src");
let alt = borrowed.iter().find(|a| &*a.name.local == "alt");
self.emit("?;
if let Some(s) = src {
if s.value.contains('(') || s.value.contains(')') || s.value.contains(' ') {
self.emit("<")?;
self.emit(&s.value)?;
self.emit(">")?;
} else {
self.emit(&s.value)?;
}
}
self.emit(")")
}
fn raw_write(&mut self, data: &[u8]) -> io::Result<()> {
if data.is_empty() {
return Ok(());
}
if let Some(state) = self.redirect_stack.last_mut() {
state.buf.extend_from_slice(data);
} else if let Some(table) = self.table_stack.last_mut() {
table.current_cell.extend_from_slice(data);
} else {
self.out.write_all(data)?;
}
self.at_line_start = data.last() == Some(&b'\n');
self.trailing_nls = 0;
Ok(())
}
fn emit(&mut self, s: &str) -> io::Result<()> {
debug_assert!(!s.is_empty(), "emit called with empty string");
if self.pending_space && !s.starts_with('\n') && !s.starts_with(' ') {
self.pending_space = false;
self.write_one(b' ')?;
} else if self.pending_space && s.starts_with('\n') {
self.pending_space = false;
}
let bytes = s.as_bytes();
let all_newlines = bytes.iter().all(|&b| b == b'\n');
if all_newlines {
let needed = 2usize.saturating_sub(self.trailing_nls as usize);
let to_write = bytes.len().min(needed);
if to_write > 0 {
self.write_all(&bytes[..to_write])?;
}
self.trailing_nls = (self.trailing_nls + to_write as u8).min(2);
self.at_line_start = true;
return Ok(());
}
self.write_all(bytes)?;
let len = bytes.len();
let mut nls: u8 = 0;
for &b in bytes.iter().rev().take(2) {
if b == b'\n' {
nls += 1;
} else {
break;
}
}
if nls > 0 {
if len > nls as usize {
self.trailing_nls = nls;
} else {
self.trailing_nls = (self.trailing_nls + nls).min(2);
}
} else {
self.trailing_nls = 0;
}
self.at_line_start = s.ends_with('\n');
Ok(())
}
fn write_one(&mut self, b: u8) -> io::Result<()> {
if let Some(state) = self.redirect_stack.last_mut() {
state.buf.push(b);
} else if let Some(table) = self.table_stack.last_mut() {
table.current_cell.push(b);
} else {
let buf = [b];
self.out.write_all(&buf)?;
}
Ok(())
}
fn write_all(&mut self, data: &[u8]) -> io::Result<()> {
if let Some(state) = self.redirect_stack.last_mut() {
state.buf.extend_from_slice(data);
} else if let Some(table) = self.table_stack.last_mut() {
table.current_cell.extend_from_slice(data);
} else {
self.out.write_all(data)?;
}
Ok(())
}
fn emit_u32(&mut self, mut n: u32) -> io::Result<()> {
if n == 0 {
self.write_one(b'0')?;
return Ok(());
}
let mut buf = [0u8; 10];
let mut pos = 10;
while n > 0 {
pos -= 1;
buf[pos] = b'0' + (n % 10) as u8;
n /= 10;
}
self.write_all(&buf[pos..])
}
fn emit_text(&mut self, text: &str) -> io::Result<()> {
if self.in_pre {
if !text.is_empty() {
self.emit(text)?;
}
return Ok(());
}
self.emit_text_normalized(text)
}
fn emit_text_normalized(&mut self, text: &str) -> io::Result<()> {
let text = if self.at_line_start {
let trimmed = text.trim_start();
if trimmed.is_empty() {
return Ok(());
}
trimmed
} else {
text
};
let mut last_ws = false;
let mut seg_start = 0;
for (i, ch) in text.char_indices() {
if ch.is_whitespace() {
if !last_ws && seg_start < i {
self.emit(&text[seg_start..i])?;
}
last_ws = true;
seg_start = i + ch.len_utf8();
} else {
if last_ws && seg_start <= i {
self.emit(" ")?;
}
last_ws = false;
}
}
if seg_start < text.len() && !last_ws {
self.emit(&text[seg_start..])?;
}
if last_ws {
self.pending_space = true;
}
Ok(())
}
fn ensure_blank_line(&mut self) -> io::Result<()> {
self.pending_space = false;
if self.table_stack.last().is_some() {
return Ok(());
}
match self.trailing_nls {
0 => self.emit("\n\n")?,
1 => self.emit("\n")?,
_ => {}
}
self.at_line_start = true;
Ok(())
}
fn extract_code_language(&self, handle: &Handle) -> Option<String> {
for child in iter_children(handle) {
if let NodeData::Element { name, attrs, .. } = &child.data
&& &*name.local == "code"
{
for attr in attrs.borrow().iter() {
if &*attr.name.local == "class"
&& let Some(lang) = attr.value.strip_prefix("language-")
{
return Some(lang.to_owned());
}
}
}
}
None
}
}
fn collect_text_recursive(handle: &Handle, out: &mut String, depth: u32) -> Result<(), String> {
if depth > MAX_DEPTH {
return Err("text collection exceeds maximum depth".to_owned());
}
match &handle.data {
NodeData::Text { contents } => {
out.push_str(&contents.borrow());
}
NodeData::Element { .. } => {
for child in iter_children(handle) {
collect_text_recursive(&child, out, depth + 1)?;
}
}
_ => {}
}
Ok(())
}
fn longest_backtick_run(s: &str) -> usize {
let mut max_run = 0;
let mut current = 0;
for &b in s.as_bytes() {
if b == b'`' {
current += 1;
if current > max_run {
max_run = current;
}
} else {
current = 0;
}
}
max_run
}
fn compute_col_widths(rows: &[TableRow], ncols: usize, widths: &mut Vec<usize>) {
widths.clear();
widths.resize(ncols, 3);
for row in rows {
for (i, cell) in row.cells.iter().enumerate() {
if i < ncols {
widths[i] = widths[i].max(cell.len());
}
}
}
}