1use crate::{
2 NodeSink, WalkCtx, Walker,
3 escape::{escape_attr, escape_text, escape_url},
4};
5use dmc_diagnostic::Code;
6use dmc_parser::ast::*;
7use duck_diagnostic::{DiagnosticEngine, diag};
8
9#[derive(Debug, Clone, Copy, Default)]
10pub struct RenderOptions {
11 pub gfm_disallowed_raw_html: bool,
14}
15
16pub struct HtmlEmitter {
25 out: String,
26 diag_engine: DiagnosticEngine<Code>,
27 in_table_depth: usize,
28 options: RenderOptions,
29}
30
31impl NodeSink for HtmlEmitter {
32 fn enter(&mut self, node: &Node, ctx: &WalkCtx) {
33 if self.in_table_depth > 0 {
34 return;
35 }
36 self.maybe_separate_list_item_block_child(node, ctx);
37 match node {
38 Node::Text(t) => self.out.push_str(&escape_text(&t.value)),
39 Node::InlineCode(c) => {
40 self.out.push_str("<code>");
41 self.out.push_str(&escape_text(&c.value));
42 self.out.push_str("</code>");
43 },
44 Node::CodeBlock(cb) => self.code_block(cb),
45 Node::Image(i) => self.image(i),
46 Node::HorizontalRule(_) => self.out.push_str("<hr />\n"),
47 Node::HardBreak(_) => self.out.push_str("<br />\n"),
48 Node::Html(h) => {
53 let value =
54 if self.options.gfm_disallowed_raw_html { escape_disallowed_raw_html_tag(&h.value) } else { h.value.clone() };
55 self.out.push_str(&value);
56 let inline_context = matches!(ctx.parent, Some(Node::Paragraph(_)) | Some(Node::Heading(_)));
57 if !inline_context && !value.ends_with('\n') {
58 self.out.push('\n');
59 }
60 },
61 Node::SoftBreak(_) => self.out.push('\n'),
62 Node::JsxSelfClosing(s) => self.jsx_self_closing(s),
63 Node::JsxExpression(e) => {
64 if let Some(text) = string_literal_expression(&e.value) {
70 self.out.push_str(&escape_text(&text));
71 } else {
72 self.diag(Code::HtmlExpressionDropped, format!("html: raw `{{...}}` expression dropped: {}", e.value.trim()));
73 }
74 },
75 Node::Table(t) => {
76 self.in_table_depth += 1;
77 self.inline_table(t);
78 },
79 Node::Frontmatter(_) | Node::Import(_) | Node::Export(_) => {},
80 _ => self.open_tag(node),
81 }
82 }
83
84 fn leave(&mut self, node: &Node, _ctx: &WalkCtx) {
85 if let Node::Table(_) = node {
86 self.in_table_depth = self.in_table_depth.saturating_sub(1);
87 return;
88 }
89 if self.in_table_depth > 0 {
90 return;
91 }
92 self.close_tag(node);
93 }
94}
95
96impl Default for HtmlEmitter {
97 fn default() -> Self {
98 Self::new()
99 }
100}
101
102impl HtmlEmitter {
103 pub fn new() -> Self {
104 Self::new_with_options(RenderOptions::default())
105 }
106
107 pub fn new_with_options(options: RenderOptions) -> Self {
108 Self { out: String::new(), diag_engine: DiagnosticEngine::new(), in_table_depth: 0, options }
109 }
110
111 pub fn into_string(self) -> String {
112 self.out
113 }
114
115 pub fn into_parts(self) -> (String, DiagnosticEngine<Code>) {
119 (self.out, self.diag_engine)
120 }
121
122 pub fn render(doc: &Document) -> (String, DiagnosticEngine<Code>) {
125 let mut e = Self::new();
126 Walker::new(doc).walk(&mut [&mut e]);
127 e.into_parts()
128 }
129
130 pub fn render_with(doc: &Document, options: RenderOptions) -> (String, DiagnosticEngine<Code>) {
131 let mut e = Self::new_with_options(options);
132 Walker::new(doc).walk(&mut [&mut e]);
133 e.into_parts()
134 }
135
136 fn diag(&mut self, code: Code, message: impl Into<String>) {
137 self.diag_engine.emit(diag!(code, message.into()));
138 }
139
140 fn is_block_node(node: &Node) -> bool {
141 matches!(
142 node,
143 Node::Paragraph(_)
144 | Node::List(_)
145 | Node::Blockquote(_)
146 | Node::CodeBlock(_)
147 | Node::Heading(_)
148 | Node::HorizontalRule(_)
149 | Node::Table(_)
150 | Node::Html(_)
151 )
152 }
153
154 fn maybe_separate_list_item_block_child(&mut self, node: &Node, ctx: &WalkCtx) {
155 let Some(parent) = ctx.parent else {
156 return;
157 };
158 if !matches!(parent, Node::ListItem(_) | Node::TaskListItem(_)) || ctx.index == 0 || !Self::is_block_node(node) {
159 return;
160 }
161 let prev = Node::children_of(parent).get(ctx.index - 1);
162 if prev.is_some_and(|n| !Self::is_block_node(n)) && !self.out.ends_with('\n') {
163 self.out.push('\n');
164 }
165 }
166
167 fn open_tag(&mut self, node: &Node) {
171 match node {
172 Node::Heading(h) => match &h.id {
173 Some(id) => self.out.push_str(&format!("<h{} id=\"{}\">", h.level, escape_attr(id))),
174 None => self.out.push_str(&format!("<h{}>", h.level)),
175 },
176 Node::Paragraph(_) => self.out.push_str("<p>"),
177 Node::Bold(_) => self.out.push_str("<strong>"),
178 Node::Italic(_) => self.out.push_str("<em>"),
179 Node::Strikethrough(_) => self.out.push_str("<del>"),
180 Node::Blockquote(_) => self.out.push_str("<blockquote>\n"),
181 Node::List(l) => {
182 let tag = if l.ordered { "ol" } else { "ul" };
183 self.out.push('<');
184 self.out.push_str(tag);
185 if l.children.iter().any(|c| matches!(c, Node::TaskListItem(_))) {
188 self.out.push_str(" class=\"contains-task-list\"");
189 }
190 if l.ordered
191 && let Some(s) = l.start
192 && s != 1
193 {
194 self.out.push_str(&format!(" start=\"{}\"", s));
195 }
196 self.out.push_str(">\n");
197 },
198 Node::ListItem(li) => {
202 let has_block_child = li.children.first().is_some_and(|c| {
203 matches!(
204 c,
205 Node::Paragraph(_)
206 | Node::List(_)
207 | Node::Blockquote(_)
208 | Node::CodeBlock(_)
209 | Node::Heading(_)
210 | Node::HorizontalRule(_)
211 | Node::Table(_)
212 | Node::Html(_)
213 )
214 });
215 if has_block_child {
216 self.out.push_str("<li>\n");
217 } else {
218 self.out.push_str("<li>");
219 }
220 },
221 Node::TaskListItem(t) => {
222 let checked = if t.checked { " checked" } else { "" };
226 self.out.push_str(&format!("<li class=\"task-list-item\"><input type=\"checkbox\"{} disabled> ", checked));
227 },
228 Node::Link(l) => {
229 self.out.push_str(&format!("<a href=\"{}\"", escape_attr(&escape_url(&l.href))));
230 if let Some(title) = &l.title {
235 self.out.push_str(&format!(" title=\"{}\"", escape_attr(title)));
236 }
237 self.out.push('>');
238 },
239 Node::JsxElement(e) => {
240 if e.name.is_empty() {
241 self.diag(Code::MalformedJsxTagName, "html: JSX element has empty name; skipped".to_string());
242 return;
243 }
244 if self.options.gfm_disallowed_raw_html && is_disallowed_raw_html(&e.name) {
248 self.out.push_str("<");
249 } else {
250 self.out.push('<');
251 }
252 self.out.push_str(&e.name);
253 for a in &e.attrs {
254 self.jsx_attr(a);
255 }
256 self.out.push('>');
257 },
258 Node::JsxFragment(_) => {},
259 _ => {},
260 }
261 }
262
263 fn close_tag(&mut self, node: &Node) {
267 match node {
268 Node::Heading(h) => self.out.push_str(&format!("</h{}>\n", h.level)),
269 Node::Paragraph(_) => self.out.push_str("</p>\n"),
270 Node::Bold(_) => self.out.push_str("</strong>"),
271 Node::Italic(_) => self.out.push_str("</em>"),
272 Node::Strikethrough(_) => self.out.push_str("</del>"),
273 Node::Blockquote(_) => self.out.push_str("</blockquote>\n"),
274 Node::List(l) => {
275 let tag = if l.ordered { "ol" } else { "ul" };
276 self.out.push_str(&format!("</{}>\n", tag));
277 },
278 Node::ListItem(_) | Node::TaskListItem(_) => self.out.push_str("</li>\n"),
279 Node::Link(_) => self.out.push_str("</a>"),
280 Node::JsxElement(e) if !e.name.is_empty() => {
281 if self.options.gfm_disallowed_raw_html && is_disallowed_raw_html(&e.name) {
282 self.out.push_str(&format!("</{}>", e.name));
283 } else {
284 self.out.push_str(&format!("</{}>", e.name));
285 }
286 },
287 Node::JsxFragment(_) => {},
288 _ => {},
289 }
290 }
291
292 fn code_block(&mut self, cb: &CodeBlock) {
295 self.out.push_str("<pre><code");
296 if let Some(lang) = &cb.lang {
297 self.out.push_str(&format!(" class=\"language-{}\"", escape_attr(lang)));
299 }
300 self.out.push('>');
301 self.out.push_str(&escape_text(&cb.value));
302 self.out.push_str("</code></pre>\n");
303 }
304
305 fn image(&mut self, i: &Image) {
306 self.out.push_str(&format!("<img src=\"{}\" alt=\"{}\"", escape_attr(&escape_url(&i.src)), escape_attr(&i.alt)));
307 if let Some(title) = &i.title {
308 self.out.push_str(&format!(" title=\"{}\"", escape_attr(title)));
309 }
310 self.out.push_str(" />");
314 }
315
316 fn jsx_self_closing(&mut self, s: &JsxSelfClosing) {
317 if s.name.is_empty() {
318 self.diag(Code::MalformedJsxTagName, "html: self-closing JSX has empty name; skipped".to_string());
319 return;
320 }
321 match s.name.as_str() {
322 "MermaidSvg" => {
323 if let Some(attr) = s.attrs.iter().find(|a| a.name == "svg")
324 && let JsxAttrValue::String(svg) = &attr.value
325 {
326 self.out.push_str(svg);
327 }
328 },
329 "MathMl" => {
330 if let Some(attr) = s.attrs.iter().find(|a| a.name == "mathml")
331 && let JsxAttrValue::String(mathml) = &attr.value
332 {
333 let unescaped = mathml.replace(""", "\"").replace("&", "&");
336 self.out.push_str(&unescaped);
337 }
338 },
339 "PackageManagerTabs" => {
340 self.out.push_str("<div class=\"gentledmc-pm-tabs\">");
341 for pm in ["npm", "yarn", "pnpm", "bun"] {
342 if let Some(attr) = s.attrs.iter().find(|a| a.name == pm)
343 && let JsxAttrValue::String(cmd) = &attr.value
344 {
345 self.out.push_str(&format!(
346 "<pre><code class=\"gentledmc-language-bash\" data-pm=\"{}\">{}</code></pre>",
347 pm,
348 escape_text(cmd)
349 ));
350 }
351 }
352 self.out.push_str("</div>");
353 },
354 _ => {
355 self.out.push('<');
356 self.out.push_str(&s.name);
357 for a in &s.attrs {
358 self.jsx_attr(a);
359 }
360 self.out.push_str(" />");
361 },
362 }
363 }
364
365 fn jsx_attr(&mut self, a: &JsxAttr) {
366 self.out.push(' ');
367 self.out.push_str(&a.name);
368 match &a.value {
369 JsxAttrValue::Boolean => self.out.push_str("=\"\""),
374 JsxAttrValue::String(s) => self.out.push_str(&format!("=\"{}\"", escape_attr(s))),
375 JsxAttrValue::Expression(e) => self.out.push_str(&format!("={{{}}}", e)),
376 JsxAttrValue::Spread(_) => {
380 self.out.pop();
381 },
382 }
383 }
384
385 fn inline_table(&mut self, t: &Table) {
390 self.out.push_str("<table>\n");
391 if let Some(header) = t.children.first() {
392 self.out.push_str("<thead>\n<tr>\n");
393 for (i, cell) in header.cells.iter().enumerate() {
394 self.inline_cell("th", cell, t.align.get(i).copied().unwrap_or(TableAlign::None));
395 }
396 self.out.push_str("</tr>\n</thead>\n");
397 }
398 if t.children.len() > 1 {
399 self.out.push_str("<tbody>\n");
400 for row in &t.children[1..] {
401 self.out.push_str("<tr>\n");
402 for (i, cell) in row.cells.iter().enumerate() {
403 self.inline_cell("td", cell, t.align.get(i).copied().unwrap_or(TableAlign::None));
404 }
405 self.out.push_str("</tr>\n");
406 }
407 self.out.push_str("</tbody>\n");
408 }
409 self.out.push_str("</table>\n");
410 }
411
412 fn inline_cell(&mut self, tag: &str, cell: &TableCell, align: TableAlign) {
413 self.out.push('<');
414 self.out.push_str(tag);
415 let align_str = match align {
416 TableAlign::Left => Some("left"),
417 TableAlign::Right => Some("right"),
418 TableAlign::Center => Some("center"),
419 TableAlign::None => None,
420 };
421 if let Some(a) = align_str {
422 self.out.push_str(&format!(" align=\"{}\"", a));
423 }
424 self.out.push('>');
425 for c in &cell.children {
426 self.inline_node(c);
427 }
428 self.out.push_str("</");
429 self.out.push_str(tag);
430 self.out.push_str(">\n");
431 }
432
433 fn inline_node(&mut self, node: &Node) {
437 match node {
438 Node::Text(t) => self.out.push_str(&escape_text(&t.value)),
439 Node::Bold(i) => self.wrap_tag("strong", &i.children),
440 Node::Italic(i) => self.wrap_tag("em", &i.children),
441 Node::Strikethrough(i) => self.wrap_tag("del", &i.children),
442 Node::InlineCode(c) => {
443 self.out.push_str("<code>");
444 self.out.push_str(&escape_text(&c.value));
445 self.out.push_str("</code>");
446 },
447 Node::Link(l) => {
448 self.out.push_str(&format!("<a href=\"{}\"", escape_attr(&escape_url(&l.href))));
449 if let Some(label) = &l.title {
450 self.out.push_str(&format!(" aria-label=\"{}\"", escape_attr(label)));
451 }
452 self.out.push('>');
453 for c in &l.children {
454 self.inline_node(c);
455 }
456 self.out.push_str("</a>");
457 },
458 Node::Image(i) => self.image(i),
459 Node::HardBreak(_) => self.out.push_str("<br />\n"),
460 Node::SoftBreak(_) => self.out.push('\n'),
461 Node::CodeBlock(cb) => self.code_block(cb),
462 _ => {
463 self.open_tag(node);
464 for kid in Node::children_of(node) {
465 self.inline_node(kid);
466 }
467 self.close_tag(node);
468 },
469 }
470 }
471
472 fn wrap_tag(&mut self, tag: &str, children: &[Node]) {
473 self.out.push('<');
474 self.out.push_str(tag);
475 self.out.push('>');
476 for c in children {
477 self.inline_node(c);
478 }
479 self.out.push_str("</");
480 self.out.push_str(tag);
481 self.out.push('>');
482 }
483}
484
485fn is_disallowed_raw_html(name: &str) -> bool {
490 matches!(
491 name.to_ascii_lowercase().as_str(),
492 "title" | "textarea" | "style" | "xmp" | "iframe" | "noembed" | "noframes" | "script" | "plaintext"
493 )
494}
495
496fn escape_disallowed_raw_html_tag(raw: &str) -> String {
497 let bytes = raw.as_bytes();
498 let mut out = String::with_capacity(raw.len());
499 let mut i = 0;
500 while i < bytes.len() {
501 if bytes[i] == b'<' {
502 let mut j = i + 1;
503 if j < bytes.len() && bytes[j] == b'/' {
504 j += 1;
505 }
506 let name_start = j;
507 while j < bytes.len() && ((bytes[j] as char).is_ascii_alphanumeric() || bytes[j] == b'-') {
508 j += 1;
509 }
510 if j > name_start && is_disallowed_raw_html(&raw[name_start..j]) {
511 out.push_str("<");
512 i += 1;
513 continue;
514 }
515 }
516 out.push(bytes[i] as char);
517 i += 1;
518 }
519 out
520}
521
522pub fn render_html(doc: &Document) -> String {
523 let mut e = HtmlEmitter::new();
524 Walker::new(doc).walk(&mut [&mut e]);
525 e.into_string()
526}
527
528pub fn render_html_with(doc: &Document, options: RenderOptions) -> String {
529 let mut e = HtmlEmitter::new_with_options(options);
530 Walker::new(doc).walk(&mut [&mut e]);
531 e.into_string()
532}
533
534fn string_literal_expression(raw: &str) -> Option<String> {
542 let s = raw.trim();
543 if s.len() < 2 {
544 return None;
545 }
546 let bytes = s.as_bytes();
547 let q = bytes[0];
548 if !matches!(q, b'\'' | b'"' | b'`') || bytes[bytes.len() - 1] != q {
549 return None;
550 }
551 let inner = &s[1..s.len() - 1];
552 if q == b'`' {
556 let mut prev_backslash = false;
557 let bs = inner.as_bytes();
558 let mut i = 0;
559 while i + 1 < bs.len() {
560 if !prev_backslash && bs[i] == b'$' && bs[i + 1] == b'{' {
561 return None;
562 }
563 prev_backslash = bs[i] == b'\\' && !prev_backslash;
564 i += 1;
565 }
566 }
567 let mut out = String::with_capacity(inner.len());
572 let mut chars = inner.chars();
573 while let Some(c) = chars.next() {
574 if c != '\\' {
575 out.push(c);
576 continue;
577 }
578 match chars.next() {
579 Some('n') => out.push('\n'),
580 Some('t') => out.push('\t'),
581 Some('r') => out.push('\r'),
582 Some('\\') => out.push('\\'),
583 Some('\'') => out.push('\''),
584 Some('"') => out.push('"'),
585 Some('`') => out.push('`'),
586 Some(other) => {
587 out.push('\\');
588 out.push(other);
589 },
590 None => out.push('\\'),
591 }
592 }
593 Some(out)
594}
595
596#[cfg(test)]
597mod tests {
598 use super::string_literal_expression;
599
600 #[test]
601 fn recognises_simple_quoted_strings() {
602 assert_eq!(string_literal_expression("' '"), Some(" ".into()));
603 assert_eq!(string_literal_expression("\"x\""), Some("x".into()));
604 assert_eq!(string_literal_expression("`y`"), Some("y".into()));
605 }
606
607 #[test]
608 fn rejects_template_with_interpolation() {
609 assert!(string_literal_expression("`hi ${name}`").is_none());
610 }
611
612 #[test]
613 fn rejects_dynamic_expression() {
614 assert!(string_literal_expression("count").is_none());
615 assert!(string_literal_expression("foo()").is_none());
616 assert!(string_literal_expression("a + b").is_none());
617 }
618
619 #[test]
620 fn decodes_common_escapes() {
621 assert_eq!(string_literal_expression("'\\n'"), Some("\n".into()));
622 assert_eq!(string_literal_expression("'\\\\'"), Some("\\".into()));
623 }
624}