Skip to main content

mdwright_document/
render.rs

1//! HTML rendering policy owned by the document crate.
2//!
3//! Production parsing still flows through `crate::parse`; this module
4//! only decides how a recognised event stream is spelled as HTML.
5
6use std::collections::HashMap;
7use std::fmt::Write as _;
8
9use pulldown_cmark::{Alignment, BlockQuoteKind, CodeBlockKind, CowStr, Event, HeadingLevel, LinkType, Tag, TagEnd};
10
11/// HTML spelling policy for [`crate::render_html_with_render_options`].
12#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
13pub enum RenderProfile {
14    /// Use pulldown-cmark's built-in HTML renderer.
15    #[default]
16    Pulldown,
17    /// Spell HTML like cmark-gfm where that can be done without
18    /// changing parser semantics.
19    CmarkGfm,
20}
21
22/// Rendering policy for source-to-HTML helpers.
23#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
24pub struct RenderOptions {
25    profile: RenderProfile,
26}
27
28impl RenderOptions {
29    /// HTML spelling profile.
30    #[must_use]
31    pub fn profile(&self) -> RenderProfile {
32        self.profile
33    }
34
35    /// Override the HTML spelling profile.
36    #[must_use]
37    pub fn with_profile(mut self, profile: RenderProfile) -> Self {
38        self.profile = profile;
39        self
40    }
41}
42
43#[derive(Copy, Clone, Debug, PartialEq, Eq)]
44enum TableState {
45    Head,
46    Body,
47}
48
49#[derive(Copy, Clone, Debug, PartialEq, Eq)]
50enum TableBodyState {
51    Closed,
52    Open,
53}
54
55#[derive(Copy, Clone, Debug, PartialEq, Eq)]
56enum TableCellState {
57    Closed,
58    Open,
59}
60
61pub(crate) fn render_cmark_gfm_html(events: Vec<Event<'_>>) -> String {
62    CmarkGfmHtml::new(events.into_iter().map(Event::into_static)).run()
63}
64
65struct CmarkGfmHtml<I> {
66    iter: I,
67    out: String,
68    end_newline: bool,
69    metadata_depth: u32,
70    html_block_depth: u32,
71    table_state: TableState,
72    table_alignments: Vec<Alignment>,
73    table_cell_index: usize,
74    table_body: TableBodyState,
75    table_cell: TableCellState,
76    numbers: HashMap<String, usize>,
77}
78
79impl<I> CmarkGfmHtml<I>
80where
81    I: Iterator<Item = Event<'static>>,
82{
83    fn new(iter: I) -> Self {
84        Self {
85            iter,
86            out: String::new(),
87            end_newline: true,
88            metadata_depth: 0,
89            html_block_depth: 0,
90            table_state: TableState::Head,
91            table_alignments: Vec::new(),
92            table_cell_index: 0,
93            table_body: TableBodyState::Closed,
94            table_cell: TableCellState::Closed,
95            numbers: HashMap::new(),
96        }
97    }
98
99    fn run(mut self) -> String {
100        while let Some(event) = self.iter.next() {
101            match event {
102                Event::Start(tag) => self.start_tag(tag),
103                Event::End(tag) => self.end_tag(tag),
104                Event::Text(text) => {
105                    if self.metadata_depth == 0 {
106                        self.escape_body_text(text.as_ref());
107                    }
108                }
109                Event::Code(text) => {
110                    self.write("<code>");
111                    self.escape_body_text(text.as_ref());
112                    self.write("</code>");
113                }
114                Event::InlineMath(text) => {
115                    self.write(r#"<span class="math math-inline">"#);
116                    self.escape_attr(text.as_ref());
117                    self.write("</span>");
118                }
119                Event::DisplayMath(text) => {
120                    self.write(r#"<span class="math math-display">"#);
121                    self.escape_attr(text.as_ref());
122                    self.write("</span>");
123                }
124                Event::Html(html) => {
125                    if self.html_block_depth > 0 && !self.end_newline {
126                        self.write_newline();
127                    }
128                    self.write(html.as_ref());
129                }
130                Event::InlineHtml(html) => self.write(html.as_ref()),
131                Event::FootnoteReference(name) => self.footnote_reference(name),
132                Event::SoftBreak => self.write_newline(),
133                Event::HardBreak => self.write("<br />\n"),
134                Event::Rule => {
135                    if !self.end_newline {
136                        self.write_newline();
137                    }
138                    self.write("<hr />\n");
139                }
140                Event::TaskListMarker(false) => self.write(r#"<input type="checkbox" disabled="" /> "#),
141                Event::TaskListMarker(true) => self.write(r#"<input type="checkbox" checked="" disabled="" /> "#),
142            }
143        }
144        self.out
145    }
146
147    fn write(&mut self, s: &str) {
148        self.out.push_str(s);
149        if !s.is_empty() {
150            self.end_newline = s.ends_with('\n');
151        }
152    }
153
154    fn write_newline(&mut self) {
155        self.out.push('\n');
156        self.end_newline = true;
157    }
158
159    fn start_tag(&mut self, tag: Tag<'static>) {
160        match tag {
161            Tag::HtmlBlock => {
162                self.html_block_depth = self.html_block_depth.saturating_add(1);
163            }
164            Tag::Paragraph => self.open_block("<p>"),
165            Tag::Heading {
166                level,
167                id,
168                classes,
169                attrs,
170            } => self.heading_start(level, id, &classes, &attrs),
171            Tag::BlockQuote(kind) => self.blockquote_start(kind),
172            Tag::CodeBlock(kind) => self.code_block_start(kind),
173            Tag::List(Some(1)) => self.open_line("<ol>\n"),
174            Tag::List(Some(start)) => {
175                if !self.end_newline {
176                    self.write_newline();
177                }
178                self.write(r#"<ol start=""#);
179                let _ = write!(self.out, "{start}");
180                self.write("\">\n");
181            }
182            Tag::List(None) => self.open_line("<ul>\n"),
183            Tag::Item => self.open_block("<li>"),
184            Tag::DefinitionList => self.open_line("<dl>\n"),
185            Tag::DefinitionListTitle => self.open_block("<dt>"),
186            Tag::DefinitionListDefinition => self.open_block("<dd>"),
187            Tag::Table(alignments) => {
188                self.table_alignments = alignments;
189                self.table_state = TableState::Head;
190                self.table_body = TableBodyState::Closed;
191                self.write("<table>\n");
192            }
193            Tag::TableHead => {
194                self.table_state = TableState::Head;
195                self.table_cell_index = 0;
196                self.write("<thead>\n<tr>\n");
197            }
198            Tag::TableRow => {
199                if self.table_state == TableState::Body && self.table_body == TableBodyState::Closed {
200                    self.table_body = TableBodyState::Open;
201                    self.write("<tbody>\n");
202                }
203                self.table_cell_index = 0;
204                self.write("<tr>\n");
205            }
206            Tag::TableCell => self.table_cell_start(),
207            Tag::Subscript => self.write("<sub>"),
208            Tag::Superscript => self.write("<sup>"),
209            Tag::Emphasis => self.write("<em>"),
210            Tag::Strong => self.write("<strong>"),
211            Tag::Strikethrough => self.write("<del>"),
212            Tag::Link {
213                link_type,
214                dest_url,
215                title,
216                ..
217            } => self.link_start(link_type, &dest_url, &title),
218            Tag::Image { dest_url, title, .. } => self.image(&dest_url, &title),
219            Tag::FootnoteDefinition(name) => self.footnote_definition_start(name),
220            Tag::MetadataBlock(_) => self.metadata_depth = self.metadata_depth.saturating_add(1),
221        }
222    }
223
224    fn end_tag(&mut self, tag: TagEnd) {
225        match tag {
226            TagEnd::HtmlBlock => self.html_block_depth = self.html_block_depth.saturating_sub(1),
227            TagEnd::Paragraph => self.write("</p>\n"),
228            TagEnd::Heading(level) => {
229                self.write("</");
230                self.write(&level.to_string());
231                self.write(">\n");
232            }
233            TagEnd::BlockQuote(_) => self.write("</blockquote>\n"),
234            TagEnd::CodeBlock => self.write("</code></pre>\n"),
235            TagEnd::List(true) => self.write("</ol>\n"),
236            TagEnd::List(false) => self.write("</ul>\n"),
237            TagEnd::Item => self.write("</li>\n"),
238            TagEnd::DefinitionList => self.write("</dl>\n"),
239            TagEnd::DefinitionListTitle => self.write("</dt>\n"),
240            TagEnd::DefinitionListDefinition => self.write("</dd>\n"),
241            TagEnd::Table => {
242                if self.table_body == TableBodyState::Open {
243                    self.write("</tbody>\n");
244                }
245                self.write("</table>\n");
246            }
247            TagEnd::TableHead => {
248                self.write("</tr>\n</thead>\n");
249                self.table_state = TableState::Body;
250            }
251            TagEnd::TableRow => {
252                self.close_table_cell_if_open();
253                self.write("</tr>\n");
254            }
255            TagEnd::TableCell => {
256                self.close_table_cell_if_open();
257            }
258            TagEnd::Emphasis => self.write("</em>"),
259            TagEnd::Superscript => self.write("</sup>"),
260            TagEnd::Subscript => self.write("</sub>"),
261            TagEnd::Strong => self.write("</strong>"),
262            TagEnd::Strikethrough => self.write("</del>"),
263            TagEnd::Link => self.write("</a>"),
264            TagEnd::Image => {}
265            TagEnd::FootnoteDefinition => self.write("</div>\n"),
266            TagEnd::MetadataBlock(_) => self.metadata_depth = self.metadata_depth.saturating_sub(1),
267        }
268    }
269
270    fn open_block(&mut self, tag: &str) {
271        if !self.end_newline {
272            self.write_newline();
273        }
274        self.write(tag);
275    }
276
277    fn open_line(&mut self, tag: &str) {
278        if !self.end_newline {
279            self.write_newline();
280        }
281        self.write(tag);
282    }
283
284    fn heading_start(
285        &mut self,
286        level: HeadingLevel,
287        id: Option<CowStr<'static>>,
288        classes: &[CowStr<'static>],
289        attrs: &[(CowStr<'static>, Option<CowStr<'static>>)],
290    ) {
291        if !self.end_newline {
292            self.write_newline();
293        }
294        self.write("<");
295        self.write(&level.to_string());
296        if let Some(id) = id {
297            self.write(r#" id=""#);
298            self.escape_attr(id.as_ref());
299            self.write("\"");
300        }
301        if !classes.is_empty() {
302            self.write(r#" class=""#);
303            for (i, class) in classes.iter().enumerate() {
304                if i > 0 {
305                    self.write(" ");
306                }
307                self.escape_attr(class.as_ref());
308            }
309            self.write("\"");
310        }
311        for (attr, value) in attrs {
312            self.write(" ");
313            self.escape_attr(attr.as_ref());
314            self.write(r#"=""#);
315            if let Some(value) = value {
316                self.escape_attr(value.as_ref());
317            }
318            self.write("\"");
319        }
320        self.write(">");
321    }
322
323    fn blockquote_start(&mut self, kind: Option<BlockQuoteKind>) {
324        if !self.end_newline {
325            self.write_newline();
326        }
327        match kind {
328            None => self.write("<blockquote>\n"),
329            Some(BlockQuoteKind::Note) => self.write(r#"<blockquote class="markdown-alert-note">"#),
330            Some(BlockQuoteKind::Tip) => self.write(r#"<blockquote class="markdown-alert-tip">"#),
331            Some(BlockQuoteKind::Important) => self.write(r#"<blockquote class="markdown-alert-important">"#),
332            Some(BlockQuoteKind::Warning) => self.write(r#"<blockquote class="markdown-alert-warning">"#),
333            Some(BlockQuoteKind::Caution) => self.write(r#"<blockquote class="markdown-alert-caution">"#),
334        }
335        if kind.is_some() {
336            self.write_newline();
337        }
338    }
339
340    fn code_block_start(&mut self, kind: CodeBlockKind<'static>) {
341        if !self.end_newline {
342            self.write_newline();
343        }
344        match kind {
345            CodeBlockKind::Indented => self.write("<pre><code>"),
346            CodeBlockKind::Fenced(info) => {
347                let lang = info.split(' ').next().unwrap_or_default();
348                if lang.is_empty() {
349                    self.write("<pre><code>");
350                } else {
351                    self.write(r#"<pre><code class="language-"#);
352                    self.escape_attr(lang);
353                    self.write("\">");
354                }
355            }
356        }
357    }
358
359    fn table_cell_start(&mut self) {
360        self.close_table_cell_if_open();
361        let (tag, end) = match self.table_state {
362            TableState::Head => ("<th", ">"),
363            TableState::Body => ("<td", ">"),
364        };
365        self.write(tag);
366        match self.table_alignments.get(self.table_cell_index) {
367            Some(Alignment::Left) => self.write(r#" align="left""#),
368            Some(Alignment::Center) => self.write(r#" align="center""#),
369            Some(Alignment::Right) => self.write(r#" align="right""#),
370            Some(Alignment::None) | None => {}
371        }
372        self.write(end);
373        self.table_cell = TableCellState::Open;
374    }
375
376    fn close_table_cell_if_open(&mut self) {
377        if self.table_cell == TableCellState::Closed {
378            return;
379        }
380        match self.table_state {
381            TableState::Head => self.write("</th>\n"),
382            TableState::Body => self.write("</td>\n"),
383        }
384        self.table_cell_index = self.table_cell_index.saturating_add(1);
385        self.table_cell = TableCellState::Closed;
386    }
387
388    fn link_start(&mut self, link_type: LinkType, dest_url: &CowStr<'static>, title: &CowStr<'static>) {
389        self.write(r#"<a href=""#);
390        if link_type == LinkType::Email && !dest_url.starts_with("mailto:") {
391            self.write("mailto:");
392        }
393        self.escape_href(dest_url.as_ref());
394        if !title.is_empty() {
395            self.write(r#"" title=""#);
396            self.escape_attr(title.as_ref());
397        }
398        self.write("\">");
399    }
400
401    fn image(&mut self, dest_url: &CowStr<'static>, title: &CowStr<'static>) {
402        self.write(r#"<img src=""#);
403        self.escape_href(dest_url.as_ref());
404        self.write(r#"" alt=""#);
405        self.raw_text();
406        if !title.is_empty() {
407            self.write(r#"" title=""#);
408            self.escape_attr(title.as_ref());
409        }
410        self.write(r#"" />"#);
411    }
412
413    fn footnote_reference(&mut self, name: CowStr<'static>) {
414        let len = self.numbers.len().saturating_add(1);
415        self.write(r##"<sup class="footnote-reference"><a href="#"##);
416        self.escape_attr(name.as_ref());
417        self.write("\">");
418        let number = *self.numbers.entry(name.into_string()).or_insert(len);
419        let _ = write!(self.out, "{number}");
420        self.write("</a></sup>");
421    }
422
423    fn footnote_definition_start(&mut self, name: CowStr<'static>) {
424        if !self.end_newline {
425            self.write_newline();
426        }
427        self.write(r#"<div class="footnote-definition" id=""#);
428        self.escape_attr(name.as_ref());
429        self.write(r#""><sup class="footnote-definition-label">"#);
430        let len = self.numbers.len().saturating_add(1);
431        let number = *self.numbers.entry(name.into_string()).or_insert(len);
432        let _ = write!(self.out, "{number}");
433        self.write("</sup>");
434    }
435
436    fn raw_text(&mut self) {
437        let mut nest = 0usize;
438        while let Some(event) = self.iter.next() {
439            match event {
440                Event::Start(_) => nest = nest.saturating_add(1),
441                Event::End(_) if nest == 0 => break,
442                Event::End(_) => nest = nest.saturating_sub(1),
443                Event::Html(_) => {}
444                Event::InlineHtml(text) | Event::Code(text) | Event::Text(text) => self.escape_attr(text.as_ref()),
445                Event::InlineMath(text) => {
446                    self.write("$");
447                    self.escape_attr(text.as_ref());
448                    self.write("$");
449                }
450                Event::DisplayMath(text) => {
451                    self.write("$$");
452                    self.escape_attr(text.as_ref());
453                    self.write("$$");
454                }
455                Event::SoftBreak | Event::HardBreak | Event::Rule => self.write(" "),
456                Event::FootnoteReference(name) => {
457                    let len = self.numbers.len().saturating_add(1);
458                    let number = *self.numbers.entry(name.into_string()).or_insert(len);
459                    let _ = write!(self.out, "[{number}]");
460                }
461                Event::TaskListMarker(true) => self.write("[x]"),
462                Event::TaskListMarker(false) => self.write("[ ]"),
463            }
464        }
465    }
466
467    fn escape_body_text(&mut self, s: &str) {
468        escape_html(&mut self.out, s);
469        self.end_newline = s.ends_with('\n');
470    }
471
472    fn escape_attr(&mut self, s: &str) {
473        escape_html(&mut self.out, s);
474        self.end_newline = s.ends_with('\n');
475    }
476
477    fn escape_href(&mut self, s: &str) {
478        escape_href(&mut self.out, s);
479        self.end_newline = s.ends_with('\n');
480    }
481}
482
483fn escape_html(out: &mut String, s: &str) {
484    for ch in s.chars() {
485        match ch {
486            '&' => out.push_str("&amp;"),
487            '<' => out.push_str("&lt;"),
488            '>' => out.push_str("&gt;"),
489            '"' => out.push_str("&quot;"),
490            _ => out.push(ch),
491        }
492    }
493}
494
495fn escape_href(out: &mut String, s: &str) {
496    for ch in s.chars() {
497        match ch {
498            '&' => out.push_str("&amp;"),
499            '\'' => out.push_str("&#x27;"),
500            '"' | '<' | '>' | '\\' | '[' | ']' | '`' => {
501                let mut buf = [0u8; 4];
502                for byte in ch.encode_utf8(&mut buf).as_bytes() {
503                    push_percent_byte(out, *byte);
504                }
505            }
506            ch if ch.is_ascii_control() || ch == ' ' || !ch.is_ascii() => {
507                let mut buf = [0u8; 4];
508                for byte in ch.encode_utf8(&mut buf).as_bytes() {
509                    push_percent_byte(out, *byte);
510                }
511            }
512            _ => out.push(ch),
513        }
514    }
515}
516
517fn push_percent_byte(out: &mut String, byte: u8) {
518    out.push('%');
519    out.push(hex_digit(byte >> 4));
520    out.push(hex_digit(byte & 0x0f));
521}
522
523fn hex_digit(nibble: u8) -> char {
524    match nibble {
525        0 => '0',
526        1 => '1',
527        2 => '2',
528        3 => '3',
529        4 => '4',
530        5 => '5',
531        6 => '6',
532        7 => '7',
533        8 => '8',
534        9 => '9',
535        10 => 'A',
536        11 => 'B',
537        12 => 'C',
538        13 => 'D',
539        14 => 'E',
540        15 => 'F',
541        _ => '0',
542    }
543}