use crate::{
NodeSink, WalkCtx, Walker,
escape::{escape_attr, escape_text, escape_url},
};
use dmc_diagnostic::Code;
use dmc_parser::ast::*;
use duck_diagnostic::{DiagnosticEngine, diag};
#[derive(Debug, Clone, Copy, Default)]
pub struct RenderOptions {
pub gfm_disallowed_raw_html: bool,
}
pub struct HtmlEmitter {
out: String,
diag_engine: DiagnosticEngine<Code>,
in_table_depth: usize,
options: RenderOptions,
}
impl NodeSink for HtmlEmitter {
fn enter(&mut self, node: &Node, ctx: &WalkCtx) {
if self.in_table_depth > 0 {
return;
}
self.maybe_separate_list_item_block_child(node, ctx);
match node {
Node::Text(t) => self.out.push_str(&escape_text(&t.value)),
Node::InlineCode(c) => {
self.out.push_str("<code>");
self.out.push_str(&escape_text(&c.value));
self.out.push_str("</code>");
},
Node::CodeBlock(cb) => self.code_block(cb),
Node::Image(i) => self.image(i),
Node::HorizontalRule(_) => self.out.push_str("<hr />\n"),
Node::HardBreak(_) => self.out.push_str("<br />\n"),
Node::Html(h) => {
let value =
if self.options.gfm_disallowed_raw_html { escape_disallowed_raw_html_tag(&h.value) } else { h.value.clone() };
self.out.push_str(&value);
let inline_context = matches!(ctx.parent, Some(Node::Paragraph(_)) | Some(Node::Heading(_)));
if !inline_context && !value.ends_with('\n') {
self.out.push('\n');
}
},
Node::SoftBreak(_) => self.out.push('\n'),
Node::JsxSelfClosing(s) => self.jsx_self_closing(s),
Node::JsxExpression(e) => {
if let Some(text) = string_literal_expression(&e.value) {
self.out.push_str(&escape_text(&text));
} else {
self.diag(Code::HtmlExpressionDropped, format!("html: raw `{{...}}` expression dropped: {}", e.value.trim()));
}
},
Node::Table(t) => {
self.in_table_depth += 1;
self.inline_table(t);
},
Node::Frontmatter(_) | Node::Import(_) | Node::Export(_) => {},
_ => self.open_tag(node),
}
}
fn leave(&mut self, node: &Node, _ctx: &WalkCtx) {
if let Node::Table(_) = node {
self.in_table_depth = self.in_table_depth.saturating_sub(1);
return;
}
if self.in_table_depth > 0 {
return;
}
self.close_tag(node);
}
}
impl Default for HtmlEmitter {
fn default() -> Self {
Self::new()
}
}
impl HtmlEmitter {
pub fn new() -> Self {
Self::new_with_options(RenderOptions::default())
}
pub fn new_with_options(options: RenderOptions) -> Self {
Self { out: String::new(), diag_engine: DiagnosticEngine::new(), in_table_depth: 0, options }
}
pub fn into_string(self) -> String {
self.out
}
pub fn into_parts(self) -> (String, DiagnosticEngine<Code>) {
(self.out, self.diag_engine)
}
pub fn render(doc: &Document) -> (String, DiagnosticEngine<Code>) {
let mut e = Self::new();
Walker::new(doc).walk(&mut [&mut e]);
e.into_parts()
}
pub fn render_with(doc: &Document, options: RenderOptions) -> (String, DiagnosticEngine<Code>) {
let mut e = Self::new_with_options(options);
Walker::new(doc).walk(&mut [&mut e]);
e.into_parts()
}
fn diag(&mut self, code: Code, message: impl Into<String>) {
self.diag_engine.emit(diag!(code, message.into()));
}
fn is_block_node(node: &Node) -> bool {
matches!(
node,
Node::Paragraph(_)
| Node::List(_)
| Node::Blockquote(_)
| Node::CodeBlock(_)
| Node::Heading(_)
| Node::HorizontalRule(_)
| Node::Table(_)
| Node::Html(_)
)
}
fn maybe_separate_list_item_block_child(&mut self, node: &Node, ctx: &WalkCtx) {
let Some(parent) = ctx.parent else {
return;
};
if !matches!(parent, Node::ListItem(_) | Node::TaskListItem(_)) || ctx.index == 0 || !Self::is_block_node(node) {
return;
}
let prev = Node::children_of(parent).get(ctx.index - 1);
if prev.is_some_and(|n| !Self::is_block_node(n)) && !self.out.ends_with('\n') {
self.out.push('\n');
}
}
fn open_tag(&mut self, node: &Node) {
match node {
Node::Heading(h) => match &h.id {
Some(id) => self.out.push_str(&format!("<h{} id=\"{}\">", h.level, escape_attr(id))),
None => self.out.push_str(&format!("<h{}>", h.level)),
},
Node::Paragraph(_) => self.out.push_str("<p>"),
Node::Bold(_) => self.out.push_str("<strong>"),
Node::Italic(_) => self.out.push_str("<em>"),
Node::Strikethrough(_) => self.out.push_str("<del>"),
Node::Blockquote(_) => self.out.push_str("<blockquote>\n"),
Node::List(l) => {
let tag = if l.ordered { "ol" } else { "ul" };
self.out.push('<');
self.out.push_str(tag);
if l.children.iter().any(|c| matches!(c, Node::TaskListItem(_))) {
self.out.push_str(" class=\"contains-task-list\"");
}
if l.ordered
&& let Some(s) = l.start
&& s != 1
{
self.out.push_str(&format!(" start=\"{}\"", s));
}
self.out.push_str(">\n");
},
Node::ListItem(li) => {
let has_block_child = li.children.first().is_some_and(|c| {
matches!(
c,
Node::Paragraph(_)
| Node::List(_)
| Node::Blockquote(_)
| Node::CodeBlock(_)
| Node::Heading(_)
| Node::HorizontalRule(_)
| Node::Table(_)
| Node::Html(_)
)
});
if has_block_child {
self.out.push_str("<li>\n");
} else {
self.out.push_str("<li>");
}
},
Node::TaskListItem(t) => {
let checked = if t.checked { " checked" } else { "" };
self.out.push_str(&format!("<li class=\"task-list-item\"><input type=\"checkbox\"{} disabled> ", checked));
},
Node::Link(l) => {
self.out.push_str(&format!("<a href=\"{}\"", escape_attr(&escape_url(&l.href))));
if let Some(title) = &l.title {
self.out.push_str(&format!(" title=\"{}\"", escape_attr(title)));
}
self.out.push('>');
},
Node::JsxElement(e) => {
if e.name.is_empty() {
self.diag(Code::MalformedJsxTagName, "html: JSX element has empty name; skipped".to_string());
return;
}
if self.options.gfm_disallowed_raw_html && is_disallowed_raw_html(&e.name) {
self.out.push_str("<");
} else {
self.out.push('<');
}
self.out.push_str(&e.name);
for a in &e.attrs {
self.jsx_attr(a);
}
self.out.push('>');
},
Node::JsxFragment(_) => {},
_ => {},
}
}
fn close_tag(&mut self, node: &Node) {
match node {
Node::Heading(h) => self.out.push_str(&format!("</h{}>\n", h.level)),
Node::Paragraph(_) => self.out.push_str("</p>\n"),
Node::Bold(_) => self.out.push_str("</strong>"),
Node::Italic(_) => self.out.push_str("</em>"),
Node::Strikethrough(_) => self.out.push_str("</del>"),
Node::Blockquote(_) => self.out.push_str("</blockquote>\n"),
Node::List(l) => {
let tag = if l.ordered { "ol" } else { "ul" };
self.out.push_str(&format!("</{}>\n", tag));
},
Node::ListItem(_) | Node::TaskListItem(_) => self.out.push_str("</li>\n"),
Node::Link(_) => self.out.push_str("</a>"),
Node::JsxElement(e) if !e.name.is_empty() => {
if self.options.gfm_disallowed_raw_html && is_disallowed_raw_html(&e.name) {
self.out.push_str(&format!("</{}>", e.name));
} else {
self.out.push_str(&format!("</{}>", e.name));
}
},
Node::JsxFragment(_) => {},
_ => {},
}
}
fn code_block(&mut self, cb: &CodeBlock) {
self.out.push_str("<pre><code");
if let Some(lang) = &cb.lang {
self.out.push_str(&format!(" class=\"language-{}\"", escape_attr(lang)));
}
self.out.push('>');
self.out.push_str(&escape_text(&cb.value));
self.out.push_str("</code></pre>\n");
}
fn image(&mut self, i: &Image) {
self.out.push_str(&format!("<img src=\"{}\" alt=\"{}\"", escape_attr(&escape_url(&i.src)), escape_attr(&i.alt)));
if let Some(title) = &i.title {
self.out.push_str(&format!(" title=\"{}\"", escape_attr(title)));
}
self.out.push_str(" />");
}
fn jsx_self_closing(&mut self, s: &JsxSelfClosing) {
if s.name.is_empty() {
self.diag(Code::MalformedJsxTagName, "html: self-closing JSX has empty name; skipped".to_string());
return;
}
match s.name.as_str() {
"MermaidSvg" => {
if let Some(attr) = s.attrs.iter().find(|a| a.name == "svg")
&& let JsxAttrValue::String(svg) = &attr.value
{
self.out.push_str(svg);
}
},
"MathMl" => {
if let Some(attr) = s.attrs.iter().find(|a| a.name == "mathml")
&& let JsxAttrValue::String(mathml) = &attr.value
{
let unescaped = mathml.replace(""", "\"").replace("&", "&");
self.out.push_str(&unescaped);
}
},
"PackageManagerTabs" => {
self.out.push_str("<div class=\"gentledmc-pm-tabs\">");
for pm in ["npm", "yarn", "pnpm", "bun"] {
if let Some(attr) = s.attrs.iter().find(|a| a.name == pm)
&& let JsxAttrValue::String(cmd) = &attr.value
{
self.out.push_str(&format!(
"<pre><code class=\"gentledmc-language-bash\" data-pm=\"{}\">{}</code></pre>",
pm,
escape_text(cmd)
));
}
}
self.out.push_str("</div>");
},
_ => {
self.out.push('<');
self.out.push_str(&s.name);
for a in &s.attrs {
self.jsx_attr(a);
}
self.out.push_str(" />");
},
}
}
fn jsx_attr(&mut self, a: &JsxAttr) {
self.out.push(' ');
self.out.push_str(&a.name);
match &a.value {
JsxAttrValue::Boolean => self.out.push_str("=\"\""),
JsxAttrValue::String(s) => self.out.push_str(&format!("=\"{}\"", escape_attr(s))),
JsxAttrValue::Expression(e) => self.out.push_str(&format!("={{{}}}", e)),
JsxAttrValue::Spread(_) => {
self.out.pop();
},
}
}
fn inline_table(&mut self, t: &Table) {
self.out.push_str("<table>\n");
if let Some(header) = t.children.first() {
self.out.push_str("<thead>\n<tr>\n");
for (i, cell) in header.cells.iter().enumerate() {
self.inline_cell("th", cell, t.align.get(i).copied().unwrap_or(TableAlign::None));
}
self.out.push_str("</tr>\n</thead>\n");
}
if t.children.len() > 1 {
self.out.push_str("<tbody>\n");
for row in &t.children[1..] {
self.out.push_str("<tr>\n");
for (i, cell) in row.cells.iter().enumerate() {
self.inline_cell("td", cell, t.align.get(i).copied().unwrap_or(TableAlign::None));
}
self.out.push_str("</tr>\n");
}
self.out.push_str("</tbody>\n");
}
self.out.push_str("</table>\n");
}
fn inline_cell(&mut self, tag: &str, cell: &TableCell, align: TableAlign) {
self.out.push('<');
self.out.push_str(tag);
let align_str = match align {
TableAlign::Left => Some("left"),
TableAlign::Right => Some("right"),
TableAlign::Center => Some("center"),
TableAlign::None => None,
};
if let Some(a) = align_str {
self.out.push_str(&format!(" align=\"{}\"", a));
}
self.out.push('>');
for c in &cell.children {
self.inline_node(c);
}
self.out.push_str("</");
self.out.push_str(tag);
self.out.push_str(">\n");
}
fn inline_node(&mut self, node: &Node) {
match node {
Node::Text(t) => self.out.push_str(&escape_text(&t.value)),
Node::Bold(i) => self.wrap_tag("strong", &i.children),
Node::Italic(i) => self.wrap_tag("em", &i.children),
Node::Strikethrough(i) => self.wrap_tag("del", &i.children),
Node::InlineCode(c) => {
self.out.push_str("<code>");
self.out.push_str(&escape_text(&c.value));
self.out.push_str("</code>");
},
Node::Link(l) => {
self.out.push_str(&format!("<a href=\"{}\"", escape_attr(&escape_url(&l.href))));
if let Some(label) = &l.title {
self.out.push_str(&format!(" aria-label=\"{}\"", escape_attr(label)));
}
self.out.push('>');
for c in &l.children {
self.inline_node(c);
}
self.out.push_str("</a>");
},
Node::Image(i) => self.image(i),
Node::HardBreak(_) => self.out.push_str("<br />\n"),
Node::SoftBreak(_) => self.out.push('\n'),
Node::CodeBlock(cb) => self.code_block(cb),
_ => {
self.open_tag(node);
for kid in Node::children_of(node) {
self.inline_node(kid);
}
self.close_tag(node);
},
}
}
fn wrap_tag(&mut self, tag: &str, children: &[Node]) {
self.out.push('<');
self.out.push_str(tag);
self.out.push('>');
for c in children {
self.inline_node(c);
}
self.out.push_str("</");
self.out.push_str(tag);
self.out.push('>');
}
}
fn is_disallowed_raw_html(name: &str) -> bool {
matches!(
name.to_ascii_lowercase().as_str(),
"title" | "textarea" | "style" | "xmp" | "iframe" | "noembed" | "noframes" | "script" | "plaintext"
)
}
fn escape_disallowed_raw_html_tag(raw: &str) -> String {
let bytes = raw.as_bytes();
let mut out = String::with_capacity(raw.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'<' {
let mut j = i + 1;
if j < bytes.len() && bytes[j] == b'/' {
j += 1;
}
let name_start = j;
while j < bytes.len() && ((bytes[j] as char).is_ascii_alphanumeric() || bytes[j] == b'-') {
j += 1;
}
if j > name_start && is_disallowed_raw_html(&raw[name_start..j]) {
out.push_str("<");
i += 1;
continue;
}
}
out.push(bytes[i] as char);
i += 1;
}
out
}
pub fn render_html(doc: &Document) -> String {
let mut e = HtmlEmitter::new();
Walker::new(doc).walk(&mut [&mut e]);
e.into_string()
}
pub fn render_html_with(doc: &Document, options: RenderOptions) -> String {
let mut e = HtmlEmitter::new_with_options(options);
Walker::new(doc).walk(&mut [&mut e]);
e.into_string()
}
fn string_literal_expression(raw: &str) -> Option<String> {
let s = raw.trim();
if s.len() < 2 {
return None;
}
let bytes = s.as_bytes();
let q = bytes[0];
if !matches!(q, b'\'' | b'"' | b'`') || bytes[bytes.len() - 1] != q {
return None;
}
let inner = &s[1..s.len() - 1];
if q == b'`' {
let mut prev_backslash = false;
let bs = inner.as_bytes();
let mut i = 0;
while i + 1 < bs.len() {
if !prev_backslash && bs[i] == b'$' && bs[i + 1] == b'{' {
return None;
}
prev_backslash = bs[i] == b'\\' && !prev_backslash;
i += 1;
}
}
let mut out = String::with_capacity(inner.len());
let mut chars = inner.chars();
while let Some(c) = chars.next() {
if c != '\\' {
out.push(c);
continue;
}
match chars.next() {
Some('n') => out.push('\n'),
Some('t') => out.push('\t'),
Some('r') => out.push('\r'),
Some('\\') => out.push('\\'),
Some('\'') => out.push('\''),
Some('"') => out.push('"'),
Some('`') => out.push('`'),
Some(other) => {
out.push('\\');
out.push(other);
},
None => out.push('\\'),
}
}
Some(out)
}
#[cfg(test)]
mod tests {
use super::string_literal_expression;
#[test]
fn recognises_simple_quoted_strings() {
assert_eq!(string_literal_expression("' '"), Some(" ".into()));
assert_eq!(string_literal_expression("\"x\""), Some("x".into()));
assert_eq!(string_literal_expression("`y`"), Some("y".into()));
}
#[test]
fn rejects_template_with_interpolation() {
assert!(string_literal_expression("`hi ${name}`").is_none());
}
#[test]
fn rejects_dynamic_expression() {
assert!(string_literal_expression("count").is_none());
assert!(string_literal_expression("foo()").is_none());
assert!(string_literal_expression("a + b").is_none());
}
#[test]
fn decodes_common_escapes() {
assert_eq!(string_literal_expression("'\\n'"), Some("\n".into()));
assert_eq!(string_literal_expression("'\\\\'"), Some("\\".into()));
}
}