cmark_writer/writer/html/core/
mod.rs

1use super::{HtmlWriteError, HtmlWriteResult, HtmlWriterOptions};
2#[cfg(feature = "gfm")]
3use crate::ast::TableAlignment;
4use crate::ast::{CodeBlockType, CustomNode, HeadingType, HtmlElement, ListItem, Node};
5use crate::writer::runtime::diagnostics::{Diagnostic, DiagnosticSink, NullSink};
6use crate::writer::runtime::visitor::{walk_node, NodeHandler};
7use ecow::EcoString;
8use html_escape;
9use log;
10use std::fmt;
11
12mod guard;
13
14pub(crate) use guard::GuardedHtmlElement;
15use guard::GuardedTagWriter;
16
17/// HTML writer for serializing CommonMark AST nodes to HTML.
18///
19/// `HtmlWriter` provides a flexible API for generating HTML content from AST nodes. It can be used:
20/// - Directly with individual nodes through methods like `write_node`
21/// - For building HTML elements programmatically using the tag and attribute methods
22/// - As part of the CommonMarkWriter's HTML rendering process
23/// - In custom node implementations via the `html_impl=true` attribute
24///
25/// # Examples
26///
27/// ## Basic usage
28///
29/// ```rust
30/// use cmark_writer::{HtmlWriter, Node};
31///
32/// let mut writer = HtmlWriter::new();
33/// let para = Node::Paragraph(vec![Node::Text("Hello, world!".into())]);
34/// writer.write_node(&para).unwrap();
35///
36/// let output = writer.into_string().unwrap();
37/// assert_eq!(output, "<p>Hello, world!</p>\n");
38/// ```
39///
40/// ## Building HTML elements manually
41///
42/// ```rust
43/// use cmark_writer::HtmlWriter;
44///
45/// let mut writer = HtmlWriter::new();
46///
47/// // Create a custom HTML element
48/// writer.start_tag("div").unwrap();
49/// writer.attribute("class", "container").unwrap();
50/// writer.finish_tag().unwrap();
51///
52/// writer.start_tag("h1").unwrap();
53/// writer.finish_tag().unwrap();
54/// writer.text("Welcome").unwrap();
55/// writer.end_tag("h1").unwrap();
56///
57/// writer.end_tag("div").unwrap();
58///
59/// let output = writer.into_string().unwrap();
60/// assert_eq!(output, "<div class=\"container\"><h1>Welcome</h1></div>");
61/// ```
62pub struct HtmlWriter {
63    /// Writer options
64    pub options: HtmlWriterOptions,
65    /// Buffer for storing the output text
66    pub(crate) buffer: EcoString,
67    /// Whether a tag is currently opened
68    tag_opened: bool,
69    /// Sink for reporting non-fatal diagnostics.
70    diagnostics: Box<dyn DiagnosticSink + 'static>,
71}
72
73impl fmt::Debug for HtmlWriter {
74    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75        f.debug_struct("HtmlWriter")
76            .field("options", &self.options)
77            .field("buffer", &self.buffer)
78            .field("tag_opened", &self.tag_opened)
79            .finish()
80    }
81}
82
83impl Default for HtmlWriter {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89impl HtmlWriter {
90    /// Creates a new HTML writer with default options.
91    pub fn new() -> Self {
92        Self::with_options(HtmlWriterOptions::default())
93    }
94
95    /// Creates a new HTML writer with the specified options.
96    pub fn with_options(options: HtmlWriterOptions) -> Self {
97        HtmlWriter {
98            options,
99            buffer: EcoString::new(),
100            tag_opened: false,
101            diagnostics: Box::new(NullSink),
102        }
103    }
104
105    /// Replace the diagnostic sink used to capture non-fatal issues.
106    pub fn with_diagnostic_sink(mut self, sink: Box<dyn DiagnosticSink + 'static>) -> Self {
107        self.diagnostics = sink;
108        self
109    }
110
111    /// Swap the diagnostic sink on an existing writer.
112    pub fn set_diagnostic_sink(&mut self, sink: Box<dyn DiagnosticSink + 'static>) {
113        self.diagnostics = sink;
114    }
115
116    /// Get a mutable handle to the diagnostic sink.
117    pub fn diagnostic_sink(&mut self) -> &mut dyn DiagnosticSink {
118        self.diagnostics.as_mut()
119    }
120
121    pub(crate) fn emit_warning<S: Into<EcoString>>(&mut self, message: S) {
122        let message = message.into();
123        self.diagnostics.emit(Diagnostic::warning(message.clone()));
124        log::warn!("{message}");
125    }
126
127    #[allow(dead_code)]
128    pub(crate) fn emit_info<S: Into<EcoString>>(&mut self, message: S) {
129        let message = message.into();
130        self.diagnostics.emit(Diagnostic::info(message.clone()));
131        log::info!("{message}");
132    }
133
134    #[allow(dead_code)]
135    pub(crate) fn emit_debug<S: Into<EcoString>>(&mut self, message: S) {
136        let message = message.into();
137        self.diagnostics.emit(Diagnostic::info(message.clone()));
138        log::debug!("{message}");
139    }
140
141    /// Updates the writer's options at runtime.
142    pub fn set_options(&mut self, options: HtmlWriterOptions) {
143        self.options = options;
144    }
145
146    /// Gets a reference to the current options.
147    pub fn options(&self) -> &HtmlWriterOptions {
148        &self.options
149    }
150
151    /// Gets a mutable reference to the current options.
152    pub fn options_mut(&mut self) -> &mut HtmlWriterOptions {
153        &mut self.options
154    }
155
156    /// Creates a new writer with modified options using a closure.
157    pub fn with_modified_options<F>(mut self, f: F) -> Self
158    where
159        F: FnOnce(&mut HtmlWriterOptions),
160    {
161        f(&mut self.options);
162        self
163    }
164
165    /// Consumes the writer and returns the generated HTML string.
166    pub fn into_string(mut self) -> HtmlWriteResult<EcoString> {
167        self.ensure_tag_closed()?;
168        Ok(self.buffer)
169    }
170
171    fn ensure_tag_closed(&mut self) -> HtmlWriteResult<()> {
172        if self.tag_opened {
173            self.buffer.push('>');
174            self.tag_opened = false;
175        }
176        Ok(())
177    }
178
179    /// Starts an HTML tag with the given name.
180    pub fn start_tag(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
181        self.ensure_tag_closed()?;
182        self.buffer.push('<');
183        self.buffer.push_str(tag_name);
184        self.tag_opened = true;
185        Ok(())
186    }
187
188    /// Adds an attribute to the currently open tag.
189    pub fn attribute(&mut self, key: &str, value: &str) -> HtmlWriteResult<()> {
190        if !self.tag_opened {
191            return Err(HtmlWriteError::InvalidHtmlTag(
192                "Cannot write attribute: no tag is currently open.".to_string(),
193            ));
194        }
195        self.buffer.push(' ');
196        self.buffer.push_str(key);
197        self.buffer.push_str("=\"");
198        self.buffer
199            .push_str(html_escape::encode_double_quoted_attribute(value).as_ref());
200        self.buffer.push('"');
201        Ok(())
202    }
203
204    /// Finishes the current open tag.
205    pub fn finish_tag(&mut self) -> HtmlWriteResult<()> {
206        if self.tag_opened {
207            self.buffer.push('>');
208            self.tag_opened = false;
209        }
210        Ok(())
211    }
212
213    /// Closes an HTML tag with the given name.
214    pub fn end_tag(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
215        self.ensure_tag_closed()?;
216        self.buffer.push_str("</");
217        self.buffer.push_str(tag_name);
218        self.buffer.push('>');
219        Ok(())
220    }
221
222    /// Writes text content, escaping HTML special characters.
223    pub fn text(&mut self, text: &str) -> HtmlWriteResult<()> {
224        self.ensure_tag_closed()?;
225        self.buffer
226            .push_str(html_escape::encode_text(text).as_ref());
227        Ok(())
228    }
229
230    /// Writes a self-closing tag with only a tag name.
231    pub fn self_closing_tag(&mut self, tag_name: &str) -> HtmlWriteResult<()> {
232        self.ensure_tag_closed()?;
233        self.buffer.push('<');
234        self.buffer.push_str(tag_name);
235        self.buffer.push_str(" />");
236        self.tag_opened = false;
237        Ok(())
238    }
239
240    /// Finishes the current open tag as a self-closing tag.
241    pub fn finish_self_closing_tag(&mut self) -> HtmlWriteResult<()> {
242        if !self.tag_opened {
243            return Err(HtmlWriteError::InvalidHtmlTag(
244                "Cannot finish self-closing tag: no tag is currently open.".to_string(),
245            ));
246        }
247        self.buffer.push_str(" />");
248        self.tag_opened = false;
249        Ok(())
250    }
251
252    /// Writes HTML content that is trusted to be well-formed and safe.
253    ///
254    /// Prefer this when the HTML fragment originates from the renderer itself
255    /// (e.g. structural newlines or tags we synthesise). External or
256    /// user-provided content should go through [`Self::write_untrusted_html`] to
257    /// ensure escaping.
258    pub fn write_trusted_html(&mut self, html: &str) -> HtmlWriteResult<()> {
259        self.ensure_tag_closed()?;
260        self.buffer.push_str(html);
261        Ok(())
262    }
263
264    /// Writes HTML content that may contain characters requiring escaping.
265    ///
266    /// This is a semantic alias for [`Self::text`], making call sites explicit about
267    /// handling potentially untrusted content.
268    pub fn write_untrusted_html(&mut self, html: &str) -> HtmlWriteResult<()> {
269        self.text(html)
270    }
271
272    pub(crate) fn guard_html_element<'a>(
273        &'a mut self,
274        element: &HtmlElement,
275    ) -> HtmlWriteResult<GuardedHtmlElement<'a>> {
276        #[cfg(feature = "gfm")]
277        if self.options.enable_gfm
278            && self
279                .options
280                .gfm_disallowed_html_tags
281                .iter()
282                .any(|tag| tag.eq_ignore_ascii_case(&element.tag))
283        {
284            self.emit_debug(format!(
285                "GFM: Textualizing disallowed HTML tag: <{}>",
286                element.tag
287            ));
288            return Ok(GuardedHtmlElement::Textualize);
289        }
290
291        if !crate::writer::html::utils::is_safe_tag_name(&element.tag) {
292            if self.options.strict {
293                return Err(HtmlWriteError::InvalidHtmlTag(element.tag.to_string()));
294            }
295
296            self.emit_warning(format!(
297                "Invalid HTML tag name '{}' encountered. Textualizing in non-strict mode.",
298                element.tag
299            ));
300            return Ok(GuardedHtmlElement::Textualize);
301        }
302
303        for attr in &element.attributes {
304            if !crate::writer::html::utils::is_safe_attribute_name(&attr.name) {
305                if self.options.strict {
306                    return Err(HtmlWriteError::InvalidHtmlAttribute(attr.name.to_string()));
307                }
308
309                self.emit_warning(format!(
310                    "Invalid attribute name '{}' encountered. Textualizing element in non-strict mode.",
311                    attr.name
312                ));
313                return Ok(GuardedHtmlElement::Textualize);
314            }
315        }
316
317        self.start_tag(&element.tag)?;
318        Ok(GuardedHtmlElement::Render(GuardedTagWriter::new(
319            self,
320            element.tag.clone(),
321        )))
322    }
323
324    /// Writes an HTML fragment without additional escaping.
325    ///
326    /// # Deprecation
327    /// Prefer using [`Self::write_trusted_html`] or [`Self::write_untrusted_html`] to make
328    /// the trust boundary explicit at the call site.
329    #[deprecated(
330        since = "0.8.0",
331        note = "Use write_trusted_html for trusted fragments or write_untrusted_html for escaping"
332    )]
333    pub fn raw_html(&mut self, html: &str) -> HtmlWriteResult<()> {
334        self.write_trusted_html(html)
335    }
336
337    /// Writes an AST `Node` to HTML using the configured options.
338    pub fn write_node(&mut self, node: &Node) -> HtmlWriteResult<()> {
339        walk_node(self, node)
340    }
341}
342
343impl NodeHandler for HtmlWriter {
344    type Error = HtmlWriteError;
345
346    fn document(&mut self, children: &[Node]) -> HtmlWriteResult<()> {
347        self.write_document(children)
348    }
349
350    fn paragraph(&mut self, content: &[Node]) -> HtmlWriteResult<()> {
351        self.write_paragraph(content)
352    }
353
354    fn text(&mut self, text: &EcoString) -> HtmlWriteResult<()> {
355        self.write_text(text)
356    }
357
358    fn emphasis(&mut self, content: &[Node]) -> HtmlWriteResult<()> {
359        self.write_emphasis(content)
360    }
361
362    fn strong(&mut self, content: &[Node]) -> HtmlWriteResult<()> {
363        self.write_strong(content)
364    }
365
366    fn thematic_break(&mut self) -> HtmlWriteResult<()> {
367        self.write_thematic_break()
368    }
369
370    fn heading(
371        &mut self,
372        level: u8,
373        content: &[Node],
374        _heading_type: &HeadingType,
375    ) -> HtmlWriteResult<()> {
376        self.write_heading(level, content)
377    }
378
379    fn inline_code(&mut self, code: &EcoString) -> HtmlWriteResult<()> {
380        self.write_inline_code(code)
381    }
382
383    fn code_block(
384        &mut self,
385        language: &Option<EcoString>,
386        content: &EcoString,
387        _kind: &CodeBlockType,
388    ) -> HtmlWriteResult<()> {
389        self.write_code_block(language, content)
390    }
391
392    fn html_block(&mut self, content: &EcoString) -> HtmlWriteResult<()> {
393        self.write_html_block(content)
394    }
395
396    fn html_element(&mut self, element: &HtmlElement) -> HtmlWriteResult<()> {
397        self.write_html_element(element)
398    }
399
400    fn block_quote(&mut self, content: &[Node]) -> HtmlWriteResult<()> {
401        self.write_blockquote(content)
402    }
403
404    fn unordered_list(&mut self, items: &[ListItem]) -> HtmlWriteResult<()> {
405        self.write_unordered_list(items)
406    }
407
408    fn ordered_list(&mut self, start: u32, items: &[ListItem]) -> HtmlWriteResult<()> {
409        self.write_ordered_list(start, items)
410    }
411
412    #[cfg(feature = "gfm")]
413    fn table(
414        &mut self,
415        headers: &[Node],
416        alignments: &[TableAlignment],
417        rows: &[Vec<Node>],
418    ) -> HtmlWriteResult<()> {
419        self.write_table(headers, alignments, rows)
420    }
421
422    #[cfg(not(feature = "gfm"))]
423    fn table(&mut self, headers: &[Node], rows: &[Vec<Node>]) -> HtmlWriteResult<()> {
424        self.write_table(headers, rows)
425    }
426
427    fn link(
428        &mut self,
429        url: &EcoString,
430        title: &Option<EcoString>,
431        content: &[Node],
432    ) -> HtmlWriteResult<()> {
433        self.write_link(url, title, content)
434    }
435
436    fn image(
437        &mut self,
438        url: &EcoString,
439        title: &Option<EcoString>,
440        alt: &[Node],
441    ) -> HtmlWriteResult<()> {
442        self.write_image(url, title, alt)
443    }
444
445    fn soft_break(&mut self) -> HtmlWriteResult<()> {
446        self.write_soft_break()
447    }
448
449    fn hard_break(&mut self) -> HtmlWriteResult<()> {
450        self.write_hard_break()
451    }
452
453    fn autolink(&mut self, url: &EcoString, is_email: bool) -> HtmlWriteResult<()> {
454        self.write_autolink(url, is_email)
455    }
456
457    #[cfg(feature = "gfm")]
458    fn extended_autolink(&mut self, url: &EcoString) -> HtmlWriteResult<()> {
459        self.write_extended_autolink(url)
460    }
461
462    fn link_reference_definition(
463        &mut self,
464        _label: &EcoString,
465        _destination: &EcoString,
466        _title: &Option<EcoString>,
467    ) -> HtmlWriteResult<()> {
468        Ok(())
469    }
470
471    fn reference_link(&mut self, label: &EcoString, content: &[Node]) -> HtmlWriteResult<()> {
472        self.write_reference_link(label, content)
473    }
474
475    #[cfg(feature = "gfm")]
476    fn strikethrough(&mut self, content: &[Node]) -> HtmlWriteResult<()> {
477        self.write_strikethrough(content)
478    }
479
480    fn custom(&mut self, node: &dyn CustomNode) -> HtmlWriteResult<()> {
481        node.html_write(self)
482    }
483
484    fn unsupported(&mut self, node: &Node) -> HtmlWriteResult<()> {
485        #[cfg(not(feature = "gfm"))]
486        if let Node::ExtendedAutolink(url) = node {
487            self.emit_warning(
488                format!(
489                    "ExtendedAutolink encountered but GFM feature is not enabled. Rendering as text: {url}"
490                ),
491            );
492            return self.text(url);
493        }
494
495        Err(HtmlWriteError::UnsupportedNodeType(format!("{node:?}")))
496    }
497}