Skip to main content

markdown_syntax/html/
mod.rs

1//! AST to HTML rendering.
2//!
3//! The renderer is safe by default: raw HTML is escaped, dangerous protocols
4//! are blanked, and image `src` values use the same protocol filter unless
5//! explicitly relaxed through [`HtmlOptions`].
6
7mod blocks;
8mod escape;
9mod footnotes;
10mod inlines;
11mod refs;
12mod tables;
13
14use alloc::{string::String, vec::Vec};
15
16use crate::{ast::Document, diagnostic::Diagnostic, validate::validate_document};
17
18use self::footnotes::FootnoteContext;
19use self::refs::DefMap;
20
21/// How raw HTML is handled in safe mode (when `allow_dangerous_html` is off).
22#[derive(Clone, Copy, Debug, Eq, PartialEq)]
23#[non_exhaustive]
24pub enum SafeRawHtmlForm {
25    /// Escape raw HTML as text. This is the default safe form and uses the
26    /// CommonMark/micromark link-scheme allowlist for unknown URI schemes.
27    EscapeText,
28    /// Emit the GFM raw-HTML placeholder. This matches cmark-gfm oracle
29    /// conventions, including its link-scheme denylist for unknown URI schemes.
30    OmitPlaceholder,
31}
32
33/// The attribute order on disabled task-list checkbox inputs (an oracle-parity
34/// convention).
35#[derive(Clone, Copy, Debug, Eq, PartialEq)]
36#[non_exhaustive]
37pub enum TasklistAttrOrder {
38    /// Emit `disabled=""` before `checked=""` on disabled task-list inputs.
39    DisabledFirst,
40    /// Emit `checked=""` before `disabled=""` on disabled task-list inputs.
41    CheckedFirst,
42}
43
44/// HTML rendering options. The default is safe: raw HTML is escaped, dangerous
45/// link/image protocols are blanked, and task-list checkboxes are disabled.
46#[derive(Clone, Debug, Eq, PartialEq)]
47#[non_exhaustive]
48pub struct HtmlOptions {
49    /// Emit raw HTML blocks/inlines verbatim.
50    pub allow_dangerous_html: bool,
51    /// Keep dangerous link/image protocols instead of blanking the attribute.
52    pub allow_dangerous_protocol: bool,
53    /// Let image `src` values bypass the protocol filter.
54    pub allow_any_img_src: bool,
55    /// Apply the GFM tagfilter to raw HTML when dangerous HTML is enabled.
56    pub gfm_tagfilter: bool,
57    /// Omit `disabled=""` from task-list checkbox inputs.
58    pub tasklist_checkable: bool,
59    /// Safe-mode raw HTML convention.
60    pub safe_raw_html_form: SafeRawHtmlForm,
61    /// Attribute ordering convention for disabled task-list checkbox inputs.
62    pub tasklist_attr_order: TasklistAttrOrder,
63}
64
65impl Default for HtmlOptions {
66    fn default() -> Self {
67        Self {
68            allow_dangerous_html: false,
69            allow_dangerous_protocol: false,
70            allow_any_img_src: false,
71            gfm_tagfilter: false,
72            tasklist_checkable: false,
73            safe_raw_html_form: SafeRawHtmlForm::EscapeText,
74            tasklist_attr_order: TasklistAttrOrder::DisabledFirst,
75        }
76    }
77}
78
79/// Why HTML rendering failed.
80#[derive(Clone, Debug, Eq, PartialEq)]
81pub enum HtmlError {
82    /// The AST failed validation before rendering.
83    InvalidDocument(Vec<Diagnostic>),
84}
85
86/// Render-time context threaded through the block/inline renderers. Holds the
87/// resolved definition map, the footnote document state, and the config flags.
88pub(crate) struct Ctx<'a> {
89    pub defs: &'a DefMap,
90    pub footnotes: &'a FootnoteContext,
91    pub allow_dangerous_html: bool,
92    pub allow_dangerous_protocol: bool,
93    pub allow_any_img_src: bool,
94    pub gfm_tagfilter: bool,
95    pub tasklist_checkable: bool,
96    pub safe_raw_html_form: SafeRawHtmlForm,
97    pub tasklist_attr_order: TasklistAttrOrder,
98}
99
100impl<'a> Ctx<'a> {
101    /// GFM (cmark-gfm) suites use a URL-scheme denylist (keep everything but
102    /// `javascript:`/`vbscript:`/`file:`/`data:`); CommonMark (micromark)
103    /// suites use an allowlist. The two oracles genuinely disagree on unknown
104    /// schemes (cmark-gfm keeps `smb:`; micromark blanks `made-up-scheme:`), so
105    /// the conformance runner selects the GFM policy through the GFM raw-HTML
106    /// convention option.
107    pub fn gfm_url_denylist(&self) -> bool {
108        matches!(self.safe_raw_html_form, SafeRawHtmlForm::OmitPlaceholder)
109    }
110}
111
112impl Document {
113    /// Render this document to safe-by-default HTML with default options.
114    pub fn to_html(&self) -> Result<String, HtmlError> {
115        self.to_html_with(&HtmlOptions::default())
116    }
117
118    /// Render this document to HTML with explicit options.
119    pub fn to_html_with(&self, options: &HtmlOptions) -> Result<String, HtmlError> {
120        let diagnostics = validate_document(self);
121        if !diagnostics.is_empty() {
122            return Err(HtmlError::InvalidDocument(diagnostics));
123        }
124
125        Ok(render_document(self, options))
126    }
127}
128
129/// Render a whole validated document to HTML.
130///
131/// Top-level blocks are rendered, empty-string results are filtered, survivors
132/// are joined with a single `\n` (no leading/trailing newline), then the
133/// document-end footnote section is appended when any footnote was referenced.
134fn render_document(doc: &Document, options: &HtmlOptions) -> String {
135    let defs = DefMap::build(&doc.children);
136    let footnote_ctx = footnotes::build(&doc.children);
137
138    let ctx = Ctx {
139        defs: &defs,
140        footnotes: &footnote_ctx,
141        allow_dangerous_html: options.allow_dangerous_html,
142        allow_dangerous_protocol: options.allow_dangerous_protocol,
143        allow_any_img_src: options.allow_any_img_src,
144        gfm_tagfilter: options.gfm_tagfilter,
145        tasklist_checkable: options.tasklist_checkable,
146        safe_raw_html_form: options.safe_raw_html_form,
147        tasklist_attr_order: options.tasklist_attr_order,
148    };
149
150    let mut out = blocks::render_blocks_joined(&doc.children, &ctx);
151
152    let section = footnotes::emit_footnote_section(&footnote_ctx, |body| {
153        blocks::render_blocks_joined(body, &ctx)
154    });
155    if !section.is_empty() {
156        if !out.is_empty() {
157            out.push('\n');
158        }
159        out.push_str(&section);
160    }
161
162    out
163}