use crate::ast::{HtmlElement, ListItem, Node};
use log;
#[cfg(feature = "gfm")]
use crate::ast::{TableAlignment, TaskListStatus};
use super::{utils, HtmlWriteError, HtmlWriteResult, HtmlWriterOptions};
use html_escape;
#[derive(Debug)]
pub struct HtmlWriter {
pub options: HtmlWriterOptions,
buffer: String,
tag_opened: bool,
}
impl HtmlWriter {
pub fn new() -> Self {
Self::with_options(HtmlWriterOptions::default())
}
pub fn with_options(options: HtmlWriterOptions) -> Self {
HtmlWriter {
options,
buffer: String::new(),
tag_opened: false,
}
}
pub fn into_string(mut self) -> String {
self.ensure_tag_closed().unwrap();
self.buffer
}
fn ensure_tag_closed(&mut self) -> HtmlWriteResult<()> {
if self.tag_opened {
self.buffer.push('>');
self.tag_opened = false;
}
Ok(())
}
fn start_tag_internal(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
self.ensure_tag_closed()?;
self.buffer.push('<');
self.buffer.push_str(tag_name);
self.tag_opened = true;
Ok(())
}
pub fn start_tag(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
self.start_tag_internal(tag_name)
}
fn attribute_internal(&mut self, key: &str, value: &str) -> HtmlWriteResult<()> {
if !self.tag_opened {
return Err(HtmlWriteError::InvalidHtmlTag(
"Cannot write attribute: no tag is currently open.".to_string(),
));
}
self.buffer.push(' ');
self.buffer.push_str(key);
self.buffer.push_str("=\"");
self.buffer
.push_str(html_escape::encode_text(value).as_ref());
self.buffer.push('"');
Ok(())
}
pub fn attribute(&mut self, key: &str, value: &str) -> HtmlWriteResult<()> {
self.attribute_internal(key, value)
}
fn finish_tag_internal(&mut self) -> HtmlWriteResult<()> {
if self.tag_opened {
self.buffer.push('>');
self.tag_opened = false;
}
Ok(())
}
pub fn finish_tag(&mut self) -> HtmlWriteResult<()> {
self.finish_tag_internal()
}
fn end_tag_internal(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
self.ensure_tag_closed()?;
self.buffer.push_str("</");
self.buffer.push_str(tag_name);
self.buffer.push('>');
Ok(())
}
pub fn end_tag(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
self.end_tag_internal(tag_name)
}
fn text_internal(&mut self, text: &str) -> HtmlWriteResult<()> {
self.ensure_tag_closed()?;
self.buffer
.push_str(html_escape::encode_text(text).as_ref());
Ok(())
}
pub fn text(&mut self, text: &str) -> HtmlWriteResult<()> {
self.text_internal(text)
}
pub fn write_str(&mut self, s: &str) -> HtmlWriteResult<()> {
self.text(s)
}
fn self_closing_tag_internal(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
self.ensure_tag_closed()?;
self.buffer.push('<');
self.buffer.push_str(tag_name);
self.buffer.push_str(" />");
self.tag_opened = false;
Ok(())
}
fn finish_self_closing_tag_internal(&mut self) -> HtmlWriteResult<()> {
if !self.tag_opened {
return Err(HtmlWriteError::InvalidHtmlTag(
"Cannot finish self-closing tag: no tag is currently open.".to_string(),
));
}
self.buffer.push_str(" />");
self.tag_opened = false;
Ok(())
}
pub fn finish_self_closing_tag(&mut self) -> HtmlWriteResult<()> {
self.finish_self_closing_tag_internal()
}
fn raw_html_internal(&mut self, html: &str) -> HtmlWriteResult<()> {
self.ensure_tag_closed()?;
self.buffer.push_str(html);
Ok(())
}
pub fn raw_html(&mut self, html: &str) -> HtmlWriteResult<()> {
self.raw_html_internal(html)
}
pub fn write_node(&mut self, node: &Node) -> HtmlWriteResult<()> {
match node {
Node::Document(children) => self.write_document_node(children),
Node::Paragraph(children) => self.write_paragraph_node(children),
Node::Text(text) => self.write_text_node(text),
Node::Heading { level, content, .. } => self.write_heading_node(*level, content),
Node::Emphasis(children) => self.write_emphasis_node(children),
Node::Strong(children) => self.write_strong_node(children),
Node::ThematicBreak => self.write_thematic_break_node(),
Node::InlineCode(code) => self.write_inline_code_node(code),
Node::CodeBlock {
language, content, ..
} => self.write_code_block_node(language, content),
Node::HtmlBlock(block_content) => self.write_html_block_node(block_content),
Node::HtmlElement(element) => self.write_html_element_node(element),
Node::SoftBreak => self.write_soft_break_node(),
Node::HardBreak => self.write_hard_break_node(),
Node::Link {
url,
title,
content,
} => self.write_link_node(url, title, content),
Node::Image { url, title, alt } => self.write_image_node(url, title, alt),
Node::BlockQuote(children) => self.write_blockquote_node(children),
Node::OrderedList { start, items } => self.write_ordered_list_node(*start, items),
Node::UnorderedList(items) => self.write_unordered_list_node(items),
#[cfg(feature = "gfm")]
Node::Strikethrough(children) => self.write_strikethrough_node(children),
Node::Table {
headers,
#[cfg(feature = "gfm")]
alignments,
rows,
} => self.write_table_node(
headers,
#[cfg(feature = "gfm")]
alignments,
rows,
),
Node::Autolink { url, is_email } => self.write_autolink_node(url, *is_email),
#[cfg(feature = "gfm")]
Node::ExtendedAutolink(url) => self.write_extended_autolink_node(url),
Node::LinkReferenceDefinition { .. } => Ok(()), Node::ReferenceLink { label, content } => {
self.write_reference_link_node(label, content)
}
Node::Custom(custom_node) => {
custom_node.html_write(self)
}
#[cfg(not(feature = "gfm"))]
Node::ExtendedAutolink(url) => {
log::warn!("ExtendedAutolink encountered but GFM feature is not enabled. Rendering as text: {}", url);
self.text_internal(url)
}
#[allow(unreachable_patterns)]
_ => Err(HtmlWriteError::UnsupportedNodeType(format!("{:?}", node))),
}
}
fn write_document_node(&mut self, children: &[Node]) -> HtmlWriteResult<()> {
for child in children {
self.write_node(child)?;
if child.is_block() && !self.buffer.ends_with('\n') {
}
}
Ok(())
}
fn write_paragraph_node(&mut self, children: &[Node]) -> HtmlWriteResult<()> {
self.start_tag_internal("p")?;
self.finish_tag_internal()?;
for child in children {
self.write_node(child)?;
}
self.end_tag_internal("p")?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_text_node(&mut self, text: &str) -> HtmlWriteResult<()> {
self.text_internal(text)
}
fn write_heading_node(&mut self, level: u8, content: &[Node]) -> HtmlWriteResult<()> {
let tag_name = format!("h{}", level.clamp(1, 6));
self.start_tag_internal(&tag_name)?;
self.finish_tag_internal()?;
for child in content {
self.write_node(child)?;
}
self.end_tag_internal(&tag_name)?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_emphasis_node(&mut self, children: &[Node]) -> HtmlWriteResult<()> {
self.start_tag_internal("em")?;
self.finish_tag_internal()?;
for child in children {
self.write_node(child)?;
}
self.end_tag_internal("em")?;
Ok(())
}
fn write_strong_node(&mut self, children: &[Node]) -> HtmlWriteResult<()> {
self.start_tag_internal("strong")?;
self.finish_tag_internal()?;
for child in children {
self.write_node(child)?;
}
self.end_tag_internal("strong")?;
Ok(())
}
fn write_thematic_break_node(&mut self) -> HtmlWriteResult<()> {
self.self_closing_tag_internal("hr")?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_inline_code_node(&mut self, code: &str) -> HtmlWriteResult<()> {
self.start_tag_internal("code")?;
self.finish_tag_internal()?;
self.text_internal(code)?;
self.end_tag_internal("code")?;
Ok(())
}
fn write_code_block_node(
&mut self,
language: &Option<String>,
content: &str,
) -> HtmlWriteResult<()> {
self.start_tag_internal("pre")?;
self.finish_tag_internal()?; self.start_tag_internal("code")?;
if let Some(prefix) = &self.options.code_block_language_class_prefix {
if let Some(lang) = language {
if !lang.is_empty() {
self.attribute_internal("class", &format!("{}{}", prefix, lang.trim()))?;
}
}
}
self.finish_tag_internal()?;
self.text_internal(content)?;
self.end_tag_internal("code")?;
self.end_tag_internal("pre")?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_html_block_node(&mut self, block_content: &str) -> HtmlWriteResult<()> {
self.raw_html_internal(block_content)?;
if !block_content.ends_with('\n') {
self.raw_html_internal("\n")?;
}
Ok(())
}
fn write_html_element_node(&mut self, element: &HtmlElement) -> HtmlWriteResult<()> {
#[cfg(feature = "gfm")]
if self.options.enable_gfm
&& self
.options
.gfm_disallowed_html_tags
.iter()
.any(|tag| tag.eq_ignore_ascii_case(&element.tag))
{
log::debug!("GFM: Textualizing disallowed HTML tag: <{}>", element.tag);
self.textualize_full_element_node(element)?;
return Ok(());
}
if !utils::is_safe_tag_name(&element.tag) {
if self.options.strict {
return Err(HtmlWriteError::InvalidHtmlTag(element.tag.clone()));
} else {
log::warn!(
"Invalid HTML tag name '{}' encountered. Textualizing in non-strict mode.",
element.tag
);
self.textualize_full_element_node(element)?;
return Ok(());
}
}
self.start_tag_internal(&element.tag)?;
for attr in &element.attributes {
if !utils::is_safe_attribute_name(&attr.name) {
if self.options.strict {
return Err(HtmlWriteError::InvalidHtmlAttribute(attr.name.clone()));
} else {
log::warn!("Invalid HTML attribute name '{}' in tag '{}'. Textualizing attribute in non-strict mode.", attr.name, element.tag);
self.buffer.push(' ');
self.buffer.push_str(&attr.name);
self.buffer.push_str("=\"");
self.buffer
.push_str(html_escape::encode_text(&attr.value).as_ref()); self.buffer.push('"');
continue;
}
}
self.attribute_internal(&attr.name, &attr.value)?;
}
if element.self_closing {
self.finish_self_closing_tag_internal()?;
} else {
self.finish_tag_internal()?;
for child in &element.children {
self.write_node(child)?;
}
self.end_tag_internal(&element.tag)?;
}
Ok(())
}
fn textualize_full_element_node(&mut self, element: &HtmlElement) -> HtmlWriteResult<()> {
self.text_internal("<")?;
self.text_internal(&element.tag)?;
for attr in &element.attributes {
self.text_internal(" ")?;
self.text_internal(&attr.name)?;
self.text_internal("=")?;
self.text_internal("\"")?;
self.text_internal(&attr.value)?; self.text_internal("\"")?;
}
if element.self_closing {
self.text_internal(" />")?;
} else {
self.text_internal(">")?;
for child in &element.children {
self.write_node(child)?; }
self.text_internal("</")?;
self.text_internal(&element.tag)?;
self.text_internal(">")?;
}
Ok(())
}
fn write_soft_break_node(&mut self) -> HtmlWriteResult<()> {
self.raw_html_internal("\n")
}
fn write_hard_break_node(&mut self) -> HtmlWriteResult<()> {
self.self_closing_tag_internal("br")?;
self.raw_html_internal("\n")
}
fn write_link_node(
&mut self,
url: &str,
title: &Option<String>,
content: &[Node],
) -> HtmlWriteResult<()> {
self.start_tag_internal("a")?;
self.attribute_internal("href", url)?;
if let Some(title_str) = title {
if !title_str.is_empty() {
self.attribute_internal("title", title_str)?;
}
}
self.finish_tag_internal()?;
for child in content {
self.write_node(child)?;
}
self.end_tag_internal("a")?;
Ok(())
}
fn write_image_node(
&mut self,
url: &str,
title: &Option<String>,
alt: &[Node],
) -> HtmlWriteResult<()> {
self.start_tag_internal("img")?;
self.attribute_internal("src", url)?;
let mut alt_text_buffer = String::new();
render_nodes_to_plain_text(alt, &mut alt_text_buffer, &self.options);
self.attribute_internal("alt", &alt_text_buffer)?;
if let Some(title_str) = title {
if !title_str.is_empty() {
self.attribute_internal("title", title_str)?;
}
}
self.finish_self_closing_tag_internal()?;
Ok(())
}
fn write_blockquote_node(&mut self, children: &[Node]) -> HtmlWriteResult<()> {
self.start_tag_internal("blockquote")?;
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
for child in children {
self.write_node(child)?;
}
self.end_tag_internal("blockquote")?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_list_item_node_content(&mut self, item_content: &[Node]) -> HtmlWriteResult<()> {
let mut add_newline_before_next_child = false;
for child_node in item_content.iter() {
if add_newline_before_next_child {
self.raw_html_internal("\n")?;
add_newline_before_next_child = false;
}
self.write_node(child_node)?;
if child_node.is_block() {
add_newline_before_next_child = true;
}
}
Ok(())
}
fn write_list_item_node(&mut self, item: &ListItem) -> HtmlWriteResult<()> {
self.start_tag_internal("li")?;
#[cfg(feature = "gfm")]
if self.options.enable_gfm {
if let ListItem::Task { status, .. } = item {
let class_name = if *status == TaskListStatus::Checked {
"task-list-item task-list-item-checked"
} else {
"task-list-item" };
self.attribute_internal("class", class_name)?;
}
}
self.finish_tag_internal()?;
let content = match item {
ListItem::Unordered { content } => content,
ListItem::Ordered { content, .. } => content,
#[cfg(feature = "gfm")]
ListItem::Task { content, .. } => content,
};
#[cfg(feature = "gfm")]
if self.options.enable_gfm {
if let ListItem::Task { status, .. } = item {
self.start_tag_internal("input")?;
self.attribute_internal("type", "checkbox")?;
self.attribute_internal("disabled", "")?; if *status == TaskListStatus::Checked {
self.attribute_internal("checked", "")?;
}
self.finish_self_closing_tag_internal()?;
self.raw_html_internal(" ")?; }
}
self.write_list_item_node_content(content)?;
self.end_tag_internal("li")?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_ordered_list_node(&mut self, start: u32, items: &[ListItem]) -> HtmlWriteResult<()> {
self.start_tag_internal("ol")?;
if start != 1 {
self.attribute_internal("start", &start.to_string())?;
}
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
for item in items {
self.write_list_item_node(item)?;
}
self.end_tag_internal("ol")?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_unordered_list_node(&mut self, items: &[ListItem]) -> HtmlWriteResult<()> {
self.start_tag_internal("ul")?;
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
for item in items {
self.write_list_item_node(item)?;
}
self.end_tag_internal("ul")?;
self.raw_html_internal("\n")?;
Ok(())
}
#[cfg(feature = "gfm")]
fn write_strikethrough_node(&mut self, children: &[Node]) -> HtmlWriteResult<()> {
if !self.options.enable_gfm {
log::warn!("Strikethrough node encountered but GFM (or GFM strikethrough) is not enabled. Rendering content as plain.");
for child in children {
self.write_node(child)?;
}
return Ok(());
}
self.start_tag_internal("del")?;
self.finish_tag_internal()?;
for child in children {
self.write_node(child)?;
}
self.end_tag_internal("del")?;
Ok(())
}
fn write_table_node(
&mut self,
headers: &[Node],
#[cfg(feature = "gfm")] alignments: &[TableAlignment],
rows: &[Vec<Node>],
) -> HtmlWriteResult<()> {
self.start_tag_internal("table")?;
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
self.start_tag_internal("thead")?;
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
self.start_tag_internal("tr")?;
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
#[cfg(feature = "gfm")]
for (col_index, header_cell) in headers.iter().enumerate() {
self.start_tag_internal("th")?;
if self.options.enable_gfm && col_index < alignments.len() {
match alignments[col_index] {
TableAlignment::Left => {
self.attribute_internal("style", "text-align: left;")?;
}
TableAlignment::Center => {
self.attribute_internal("style", "text-align: center;")?;
}
TableAlignment::Right => {
self.attribute_internal("style", "text-align: right;")?;
}
TableAlignment::None => {}
}
}
self.finish_tag_internal()?;
self.write_node(header_cell)?;
self.end_tag_internal("th")?;
self.raw_html_internal("\n")?;
}
#[cfg(not(feature = "gfm"))]
for header_cell in headers.iter() {
self.start_tag_internal("th")?;
self.finish_tag_internal()?;
self.write_node(header_cell)?;
self.end_tag_internal("th")?;
self.raw_html_internal("\n")?;
}
self.end_tag_internal("tr")?;
self.raw_html_internal("\n")?;
self.end_tag_internal("thead")?;
self.raw_html_internal("\n")?;
self.start_tag_internal("tbody")?;
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
for row_cells in rows {
self.start_tag_internal("tr")?;
self.finish_tag_internal()?;
self.raw_html_internal("\n")?;
#[cfg(feature = "gfm")]
for (col_index, cell) in row_cells.iter().enumerate() {
self.start_tag_internal("td")?;
if self.options.enable_gfm && col_index < alignments.len() {
match alignments[col_index] {
TableAlignment::Left => {
self.attribute_internal("style", "text-align: left;")?;
}
TableAlignment::Center => {
self.attribute_internal("style", "text-align: center;")?;
}
TableAlignment::Right => {
self.attribute_internal("style", "text-align: right;")?;
}
TableAlignment::None => {}
}
}
self.finish_tag_internal()?;
self.write_node(cell)?;
self.end_tag_internal("td")?;
self.raw_html_internal("\n")?;
}
#[cfg(not(feature = "gfm"))]
for cell in row_cells.iter() {
self.start_tag_internal("td")?;
self.finish_tag_internal()?;
self.write_node(cell)?;
self.end_tag_internal("td")?;
self.raw_html_internal("\n")?;
}
self.end_tag_internal("tr")?;
self.raw_html_internal("\n")?;
}
self.end_tag_internal("tbody")?;
self.raw_html_internal("\n")?;
self.end_tag_internal("table")?;
self.raw_html_internal("\n")?;
Ok(())
}
fn write_autolink_node(&mut self, url: &str, is_email: bool) -> HtmlWriteResult<()> {
self.start_tag_internal("a")?;
let href = if is_email && !url.starts_with("mailto:") {
format!("mailto:{}", url)
} else {
url.to_string()
};
self.attribute_internal("href", &href)?;
self.finish_tag_internal()?;
self.text_internal(url)?;
self.end_tag_internal("a")?;
Ok(())
}
#[cfg(feature = "gfm")]
fn write_extended_autolink_node(&mut self, url: &str) -> HtmlWriteResult<()> {
if !self.options.enable_gfm {
log::warn!("ExtendedAutolink node encountered but GFM (or GFM autolinks) is not enabled. Rendering as plain text.");
self.text_internal(url)?;
return Ok(());
}
self.start_tag_internal("a")?;
self.attribute_internal("href", url)?; self.finish_tag_internal()?;
self.text_internal(url)?;
self.end_tag_internal("a")?;
Ok(())
}
fn write_reference_link_node(&mut self, label: &str, content: &[Node]) -> HtmlWriteResult<()> {
if self.options.strict {
return Err(HtmlWriteError::UnsupportedNodeType(format!(
"Unresolved reference link '[{}{}]' found in strict mode. Pre-resolve links for HTML output.",
render_nodes_to_plain_text_string(content, &self.options), label
)));
}
log::warn!(
"Unresolved reference link for label '{}'. Rendering as plain text.",
label
);
self.text_internal("[")?;
let content_text = render_nodes_to_plain_text_string(content, &self.options);
if content.is_empty() || content_text == label {
self.text_internal(label)?;
} else {
for node_in_content in content {
self.write_node(node_in_content)?; }
}
self.text_internal("]")?; if !(content_text == label && content.len() == 1 && matches!(content[0], Node::Text(_))) {
if !(content.is_empty() && label.is_empty()) {
let is_explicit_full_or_collapsed_form = !content.is_empty(); if is_explicit_full_or_collapsed_form {
self.text_internal("[")?;
self.text_internal(label)?; self.text_internal("]")?;
}
}
}
Ok(())
}
}
impl Default for HtmlWriter {
fn default() -> Self {
Self::new()
}
}
fn render_nodes_to_plain_text(nodes: &[Node], buffer: &mut String, _options: &HtmlWriterOptions) {
for node in nodes {
match node {
Node::Text(text) => buffer.push_str(text),
Node::Emphasis(children) | Node::Strong(children) => {
render_nodes_to_plain_text(children, buffer, _options);
}
#[cfg(feature = "gfm")]
Node::Strikethrough(children) => {
render_nodes_to_plain_text(children, buffer, _options);
}
Node::Link { content, .. } => render_nodes_to_plain_text(content, buffer, _options),
Node::Image { alt, .. } => render_nodes_to_plain_text(alt, buffer, _options), Node::InlineCode(code) => buffer.push_str(code),
Node::SoftBreak | Node::HardBreak => buffer.push(' '), Node::HtmlElement(element) => {
render_nodes_to_plain_text(&element.children, buffer, _options);
}
Node::Autolink { url, .. } | Node::ExtendedAutolink(url) => buffer.push_str(url),
Node::Paragraph(children)
| Node::BlockQuote(children)
| Node::Heading {
content: children, ..
} => {
render_nodes_to_plain_text(children, buffer, _options);
buffer.push(' '); }
_ => {} }
}
}
fn render_nodes_to_plain_text_string(nodes: &[Node], options: &HtmlWriterOptions) -> String {
let mut s = String::new();
render_nodes_to_plain_text(nodes, &mut s, options);
s
}