1use crate::{
2 NodeSink, WalkCtx, Walker,
3 escape::{escape_attr, escape_text, escape_url, sanitize_url},
4};
5use dmc_diagnostic::Code;
6use dmc_parser::ast::*;
7use duck_diagnostic::{DiagnosticEngine, diag};
8
9#[derive(Debug, Clone, Copy, Default)]
10pub struct RenderOptions {
11 pub gfm_disallowed_raw_html: bool,
14 pub allow_dangerous_html: bool,
21}
22
23pub struct HtmlEmitter {
27 out: String,
28 diag_engine: DiagnosticEngine<Code>,
29 in_table_depth: usize,
30 options: RenderOptions,
31}
32
33impl NodeSink for HtmlEmitter {
34 fn enter(&mut self, node: &Node, ctx: &WalkCtx) {
35 if self.in_table_depth > 0 {
36 return;
37 }
38 self.maybe_separate_list_item_block_child(node, ctx);
39 match node {
40 Node::Text(t) => self.out.push_str(&escape_text(&t.value)),
41 Node::InlineCode(c) => {
42 self.out.push_str("<code>");
43 self.out.push_str(&escape_text(&c.value));
44 self.out.push_str("</code>");
45 },
46 Node::CodeBlock(cb) => self.code_block(cb),
47 Node::Image(i) => self.image(i),
48 Node::HorizontalRule(_) => self.out.push_str("<hr />\n"),
49 Node::HardBreak(_) => self.out.push_str("<br />\n"),
50 Node::Html(h) => {
57 let inline_context = matches!(ctx.parent, Some(Node::Paragraph(_)) | Some(Node::Heading(_)));
58 if !self.options.allow_dangerous_html {
59 if inline_context {
60 self.out.push_str(&escape_text(&h.value));
61 }
62 return;
64 }
65 let value =
66 if self.options.gfm_disallowed_raw_html { escape_disallowed_raw_html_tag(&h.value) } else { h.value.clone() };
67 self.out.push_str(&value);
68 if !inline_context && !value.ends_with('\n') {
69 self.out.push('\n');
70 }
71 },
72 Node::SoftBreak(_) => self.out.push('\n'),
73 Node::JsxSelfClosing(s) => self.jsx_self_closing(s),
74 Node::JsxExpression(e) => {
75 if let Some(text) = string_literal_expression(&e.value) {
78 self.out.push_str(&escape_text(&text));
79 } else {
80 self.diag(Code::HtmlExpressionDropped, format!("html: raw `{{...}}` expression dropped: {}", e.value.trim()));
81 }
82 },
83 Node::Table(t) => {
84 self.in_table_depth += 1;
85 self.inline_table(t);
86 },
87 Node::Frontmatter(_) | Node::Import(_) | Node::Export(_) => {},
88 _ => self.open_tag(node),
89 }
90 }
91
92 fn leave(&mut self, node: &Node, _ctx: &WalkCtx) {
93 if let Node::Table(_) = node {
94 self.in_table_depth = self.in_table_depth.saturating_sub(1);
95 return;
96 }
97 if self.in_table_depth > 0 {
98 return;
99 }
100 self.close_tag(node);
101 }
102}
103
104impl Default for HtmlEmitter {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110impl HtmlEmitter {
111 pub fn new() -> Self {
112 Self::new_with_options(RenderOptions::default())
113 }
114
115 pub fn new_with_options(options: RenderOptions) -> Self {
116 Self { out: String::new(), diag_engine: DiagnosticEngine::new(), in_table_depth: 0, options }
117 }
118
119 pub fn into_string(self) -> String {
120 self.out
121 }
122
123 pub fn into_parts(self) -> (String, DiagnosticEngine<Code>) {
126 (self.out, self.diag_engine)
127 }
128
129 pub fn render(doc: &Document) -> (String, DiagnosticEngine<Code>) {
131 let mut e = Self::new();
132 Walker::new(doc).walk(&mut [&mut e]);
133 e.into_parts()
134 }
135
136 pub fn render_with(doc: &Document, options: RenderOptions) -> (String, DiagnosticEngine<Code>) {
137 let mut e = Self::new_with_options(options);
138 Walker::new(doc).walk(&mut [&mut e]);
139 e.into_parts()
140 }
141
142 fn diag(&mut self, code: Code, message: impl Into<String>) {
143 self.diag_engine.emit(diag!(code, message.into()));
144 }
145
146 fn is_block_node(node: &Node) -> bool {
147 matches!(
148 node,
149 Node::Paragraph(_)
150 | Node::List(_)
151 | Node::Blockquote(_)
152 | Node::CodeBlock(_)
153 | Node::Heading(_)
154 | Node::HorizontalRule(_)
155 | Node::Table(_)
156 | Node::Html(_)
157 )
158 }
159
160 fn maybe_separate_list_item_block_child(&mut self, node: &Node, ctx: &WalkCtx) {
161 let Some(parent) = ctx.parent else {
162 return;
163 };
164 if !matches!(parent, Node::ListItem(_) | Node::TaskListItem(_)) || ctx.index == 0 || !Self::is_block_node(node) {
165 return;
166 }
167 let prev = Node::children_of(parent).get(ctx.index - 1);
168 if prev.is_some_and(|n| !Self::is_block_node(n)) && !self.out.ends_with('\n') {
169 self.out.push('\n');
170 }
171 }
172
173 fn open_tag(&mut self, node: &Node) {
174 match node {
175 Node::Heading(h) => match &h.id {
176 Some(id) => self.out.push_str(&format!("<h{} id=\"{}\">", h.level, escape_attr(id))),
177 None => self.out.push_str(&format!("<h{}>", h.level)),
178 },
179 Node::Paragraph(_) => self.out.push_str("<p>"),
180 Node::Bold(_) => self.out.push_str("<strong>"),
181 Node::Italic(_) => self.out.push_str("<em>"),
182 Node::Strikethrough(_) => self.out.push_str("<del>"),
183 Node::Blockquote(_) => self.out.push_str("<blockquote>\n"),
184 Node::List(l) => {
185 let tag = if l.ordered { "ol" } else { "ul" };
186 self.out.push('<');
187 self.out.push_str(tag);
188 if l.children.iter().any(|c| matches!(c, Node::TaskListItem(_))) {
190 self.out.push_str(" class=\"contains-task-list\"");
191 }
192 if l.ordered
193 && let Some(s) = l.start
194 && s != 1
195 {
196 self.out.push_str(&format!(" start=\"{}\"", s));
197 }
198 self.out.push_str(">\n");
199 },
200 Node::ListItem(li) => {
203 let has_block_child = li.children.first().is_some_and(|c| {
204 matches!(
205 c,
206 Node::Paragraph(_)
207 | Node::List(_)
208 | Node::Blockquote(_)
209 | Node::CodeBlock(_)
210 | Node::Heading(_)
211 | Node::HorizontalRule(_)
212 | Node::Table(_)
213 | Node::Html(_)
214 )
215 });
216 if has_block_child {
217 self.out.push_str("<li>\n");
218 } else {
219 self.out.push_str("<li>");
220 }
221 },
222 Node::TaskListItem(t) => {
223 let checked = if t.checked { " checked" } else { "" };
226 self.out.push_str(&format!("<li class=\"task-list-item\"><input type=\"checkbox\"{} disabled> ", checked));
227 },
228 Node::Link(l) => {
229 self.out.push_str(&format!("<a href=\"{}\"", escape_attr(&escape_url(&sanitize_url(&l.href)))));
230 if let Some(title) = &l.title {
233 self.out.push_str(&format!(" title=\"{}\"", escape_attr(title)));
234 }
235 self.out.push('>');
236 },
237 Node::JsxElement(e) => {
238 if e.name.is_empty() {
239 self.diag(Code::MalformedJsxTagName, "html: JSX element has empty name; skipped".to_string());
240 return;
241 }
242 if self.options.gfm_disallowed_raw_html && is_disallowed_raw_html(&e.name) {
244 self.out.push_str("<");
245 } else {
246 self.out.push('<');
247 }
248 self.out.push_str(&e.name);
249 for a in &e.attrs {
250 self.jsx_attr(a);
251 }
252 self.out.push('>');
253 },
254 Node::JsxFragment(_) => {},
255 _ => {},
256 }
257 }
258
259 fn close_tag(&mut self, node: &Node) {
262 match node {
263 Node::Heading(h) => self.out.push_str(&format!("</h{}>\n", h.level)),
264 Node::Paragraph(_) => self.out.push_str("</p>\n"),
265 Node::Bold(_) => self.out.push_str("</strong>"),
266 Node::Italic(_) => self.out.push_str("</em>"),
267 Node::Strikethrough(_) => self.out.push_str("</del>"),
268 Node::Blockquote(_) => self.out.push_str("</blockquote>\n"),
269 Node::List(l) => {
270 let tag = if l.ordered { "ol" } else { "ul" };
271 self.out.push_str(&format!("</{}>\n", tag));
272 },
273 Node::ListItem(_) | Node::TaskListItem(_) => self.out.push_str("</li>\n"),
274 Node::Link(_) => self.out.push_str("</a>"),
275 Node::JsxElement(e) if !e.name.is_empty() => {
276 if self.options.gfm_disallowed_raw_html && is_disallowed_raw_html(&e.name) {
277 self.out.push_str(&format!("</{}>", e.name));
278 } else {
279 self.out.push_str(&format!("</{}>", e.name));
280 }
281 },
282 Node::JsxFragment(_) => {},
283 _ => {},
284 }
285 }
286
287 fn code_block(&mut self, cb: &CodeBlock) {
288 self.out.push_str("<pre><code");
289 if let Some(lang) = &cb.lang {
290 self.out.push_str(&format!(" class=\"language-{}\"", escape_attr(lang)));
291 }
292 self.out.push('>');
293 self.out.push_str(&escape_text(&cb.value));
294 self.out.push_str("</code></pre>\n");
295 }
296
297 fn image(&mut self, i: &Image) {
298 self.out.push_str(&format!(
299 "<img src=\"{}\" alt=\"{}\"",
300 escape_attr(&escape_url(&sanitize_url(&i.src))),
301 escape_attr(&i.alt)
302 ));
303 if let Some(title) = &i.title {
304 self.out.push_str(&format!(" title=\"{}\"", escape_attr(title)));
305 }
306 self.out.push_str(" />");
308 }
309
310 fn jsx_self_closing(&mut self, s: &JsxSelfClosing) {
311 if s.name.is_empty() {
312 self.diag(Code::MalformedJsxTagName, "html: self-closing JSX has empty name; skipped".to_string());
313 return;
314 }
315 match s.name.as_str() {
316 "MermaidSvg" => {
323 if self.options.allow_dangerous_html
324 && let Some(attr) = s.attrs.iter().find(|a| a.name == "svg")
325 && let JsxAttrValue::String(svg) = &attr.value
326 {
327 self.out.push_str(svg);
328 }
329 },
330 "MathMl" => {
331 if self.options.allow_dangerous_html
332 && let Some(attr) = s.attrs.iter().find(|a| a.name == "mathml")
333 && let JsxAttrValue::String(mathml) = &attr.value
334 {
335 let unescaped = mathml.replace(""", "\"").replace("&", "&");
337 self.out.push_str(&unescaped);
338 }
339 },
340 "PackageManagerTabs" => {
341 self.out.push_str("<div class=\"gentledmc-pm-tabs\">");
342 for pm in ["npm", "yarn", "pnpm", "bun"] {
343 if let Some(attr) = s.attrs.iter().find(|a| a.name == pm)
344 && let JsxAttrValue::String(cmd) = &attr.value
345 {
346 self.out.push_str(&format!(
347 "<pre><code class=\"gentledmc-language-bash\" data-pm=\"{}\">{}</code></pre>",
348 pm,
349 escape_text(cmd)
350 ));
351 }
352 }
353 self.out.push_str("</div>");
354 },
355 _ => {
356 self.out.push('<');
357 self.out.push_str(&s.name);
358 for a in &s.attrs {
359 self.jsx_attr(a);
360 }
361 self.out.push_str(" />");
362 },
363 }
364 }
365
366 fn jsx_attr(&mut self, a: &JsxAttr) {
367 self.out.push(' ');
368 self.out.push_str(&a.name);
369 match &a.value {
370 JsxAttrValue::Boolean => self.out.push_str("=\"\""),
373 JsxAttrValue::String(s) => self.out.push_str(&format!("=\"{}\"", escape_attr(s))),
374 JsxAttrValue::Expression(e) => self.out.push_str(&format!("={{{}}}", e)),
375 JsxAttrValue::Spread(_) => {
377 self.out.pop();
378 },
379 }
380 }
381
382 fn inline_table(&mut self, t: &Table) {
385 self.out.push_str("<table>\n");
386 if let Some(header) = t.children.first() {
387 self.out.push_str("<thead>\n<tr>\n");
388 for (i, cell) in header.cells.iter().enumerate() {
389 self.inline_cell("th", cell, t.align.get(i).copied().unwrap_or(TableAlign::None));
390 }
391 self.out.push_str("</tr>\n</thead>\n");
392 }
393 if t.children.len() > 1 {
394 self.out.push_str("<tbody>\n");
395 for row in &t.children[1..] {
396 self.out.push_str("<tr>\n");
397 for (i, cell) in row.cells.iter().enumerate() {
398 self.inline_cell("td", cell, t.align.get(i).copied().unwrap_or(TableAlign::None));
399 }
400 self.out.push_str("</tr>\n");
401 }
402 self.out.push_str("</tbody>\n");
403 }
404 self.out.push_str("</table>\n");
405 }
406
407 fn inline_cell(&mut self, tag: &str, cell: &TableCell, align: TableAlign) {
408 self.out.push('<');
409 self.out.push_str(tag);
410 let align_str = match align {
411 TableAlign::Left => Some("left"),
412 TableAlign::Right => Some("right"),
413 TableAlign::Center => Some("center"),
414 TableAlign::None => None,
415 };
416 if let Some(a) = align_str {
417 self.out.push_str(&format!(" align=\"{}\"", a));
418 }
419 self.out.push('>');
420 for c in &cell.children {
421 self.inline_node(c);
422 }
423 self.out.push_str("</");
424 self.out.push_str(tag);
425 self.out.push_str(">\n");
426 }
427
428 fn inline_node(&mut self, node: &Node) {
431 match node {
432 Node::Text(t) => self.out.push_str(&escape_text(&t.value)),
433 Node::Bold(i) => self.wrap_tag("strong", &i.children),
434 Node::Italic(i) => self.wrap_tag("em", &i.children),
435 Node::Strikethrough(i) => self.wrap_tag("del", &i.children),
436 Node::InlineCode(c) => {
437 self.out.push_str("<code>");
438 self.out.push_str(&escape_text(&c.value));
439 self.out.push_str("</code>");
440 },
441 Node::Link(l) => {
442 self.out.push_str(&format!("<a href=\"{}\"", escape_attr(&escape_url(&sanitize_url(&l.href)))));
443 if let Some(label) = &l.title {
444 self.out.push_str(&format!(" aria-label=\"{}\"", escape_attr(label)));
445 }
446 self.out.push('>');
447 for c in &l.children {
448 self.inline_node(c);
449 }
450 self.out.push_str("</a>");
451 },
452 Node::Image(i) => self.image(i),
453 Node::HardBreak(_) => self.out.push_str("<br />\n"),
454 Node::SoftBreak(_) => self.out.push('\n'),
455 Node::CodeBlock(cb) => self.code_block(cb),
456 _ => {
457 self.open_tag(node);
458 for kid in Node::children_of(node) {
459 self.inline_node(kid);
460 }
461 self.close_tag(node);
462 },
463 }
464 }
465
466 fn wrap_tag(&mut self, tag: &str, children: &[Node]) {
467 self.out.push('<');
468 self.out.push_str(tag);
469 self.out.push('>');
470 for c in children {
471 self.inline_node(c);
472 }
473 self.out.push_str("</");
474 self.out.push_str(tag);
475 self.out.push('>');
476 }
477}
478
479fn is_disallowed_raw_html(name: &str) -> bool {
481 matches!(
482 name.to_ascii_lowercase().as_str(),
483 "title" | "textarea" | "style" | "xmp" | "iframe" | "noembed" | "noframes" | "script" | "plaintext"
484 )
485}
486
487fn escape_disallowed_raw_html_tag(raw: &str) -> String {
488 let bytes = raw.as_bytes();
489 let mut out = String::with_capacity(raw.len());
490 let mut i = 0;
491 while i < bytes.len() {
492 if bytes[i] == b'<' {
493 let mut j = i + 1;
494 if j < bytes.len() && bytes[j] == b'/' {
495 j += 1;
496 }
497 let name_start = j;
498 while j < bytes.len() && ((bytes[j] as char).is_ascii_alphanumeric() || bytes[j] == b'-') {
499 j += 1;
500 }
501 if j > name_start && is_disallowed_raw_html(&raw[name_start..j]) {
502 out.push_str("<");
503 i += 1;
504 continue;
505 }
506 }
507 out.push(bytes[i] as char);
508 i += 1;
509 }
510 out
511}
512
513pub fn render_html(doc: &Document) -> String {
514 let mut e = HtmlEmitter::new();
515 Walker::new(doc).walk(&mut [&mut e]);
516 e.into_string()
517}
518
519pub fn render_html_with(doc: &Document, options: RenderOptions) -> String {
520 let mut e = HtmlEmitter::new_with_options(options);
521 Walker::new(doc).walk(&mut [&mut e]);
522 e.into_string()
523}
524
525fn string_literal_expression(raw: &str) -> Option<String> {
530 let s = raw.trim();
531 if s.len() < 2 {
532 return None;
533 }
534 let bytes = s.as_bytes();
535 let q = bytes[0];
536 if !matches!(q, b'\'' | b'"' | b'`') || bytes[bytes.len() - 1] != q {
537 return None;
538 }
539 let inner = &s[1..s.len() - 1];
540 if q == b'`' {
542 let mut prev_backslash = false;
543 let bs = inner.as_bytes();
544 let mut i = 0;
545 while i + 1 < bs.len() {
546 if !prev_backslash && bs[i] == b'$' && bs[i + 1] == b'{' {
547 return None;
548 }
549 prev_backslash = bs[i] == b'\\' && !prev_backslash;
550 i += 1;
551 }
552 }
553 let mut out = String::with_capacity(inner.len());
556 let mut chars = inner.chars();
557 while let Some(c) = chars.next() {
558 if c != '\\' {
559 out.push(c);
560 continue;
561 }
562 match chars.next() {
563 Some('n') => out.push('\n'),
564 Some('t') => out.push('\t'),
565 Some('r') => out.push('\r'),
566 Some('\\') => out.push('\\'),
567 Some('\'') => out.push('\''),
568 Some('"') => out.push('"'),
569 Some('`') => out.push('`'),
570 Some(other) => {
571 out.push('\\');
572 out.push(other);
573 },
574 None => out.push('\\'),
575 }
576 }
577 Some(out)
578}
579
580#[cfg(test)]
581mod tests {
582 use super::string_literal_expression;
583
584 #[test]
585 fn recognises_simple_quoted_strings() {
586 assert_eq!(string_literal_expression("' '"), Some(" ".into()));
587 assert_eq!(string_literal_expression("\"x\""), Some("x".into()));
588 assert_eq!(string_literal_expression("`y`"), Some("y".into()));
589 }
590
591 #[test]
592 fn rejects_template_with_interpolation() {
593 assert!(string_literal_expression("`hi ${name}`").is_none());
594 }
595
596 #[test]
597 fn rejects_dynamic_expression() {
598 assert!(string_literal_expression("count").is_none());
599 assert!(string_literal_expression("foo()").is_none());
600 assert!(string_literal_expression("a + b").is_none());
601 }
602
603 #[test]
604 fn decodes_common_escapes() {
605 assert_eq!(string_literal_expression("'\\n'"), Some("\n".into()));
606 assert_eq!(string_literal_expression("'\\\\'"), Some("\\".into()));
607 }
608}