Skip to main content

satteri_pulldown_cmark/
html.rs

1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! HTML renderer that takes an iterator of events as input.
22
23use alloc::{string::String, vec::Vec};
24#[cfg(feature = "std")]
25use pulldown_cmark_escape::IoWriter;
26use pulldown_cmark_escape::{escape_href, escape_html, escape_html_body_text, FmtWriter, StrWrite};
27use rustc_hash::FxHashMap;
28
29use crate::{
30    strings::CowStr,
31    Alignment, BlockQuoteKind, CodeBlockKind,
32    ContainerKind::*,
33    Event::{self, *},
34    LinkType, Tag, TagEnd,
35};
36
37enum TableState {
38    Head,
39    Body,
40}
41
42struct HtmlWriter<'a, I, W> {
43    /// Iterator supplying events.
44    iter: I,
45
46    /// Writer to write to.
47    writer: W,
48
49    /// Whether or not the last write wrote a newline.
50    end_newline: bool,
51
52    /// Whether if inside a metadata block (text should not be written)
53    in_non_writing_block: bool,
54
55    table_state: TableState,
56    table_alignments: Vec<Alignment>,
57    table_cell_index: usize,
58    numbers: FxHashMap<CowStr<'a>, usize>,
59}
60
61impl<'a, I, W> HtmlWriter<'a, I, W>
62where
63    I: Iterator<Item = Event<'a>>,
64    W: StrWrite,
65{
66    fn new(iter: I, writer: W) -> Self {
67        Self {
68            iter,
69            writer,
70            end_newline: true,
71            in_non_writing_block: false,
72            table_state: TableState::Head,
73            table_alignments: vec![],
74            table_cell_index: 0,
75            numbers: FxHashMap::default(),
76        }
77    }
78
79    /// Writes a new line.
80    #[inline]
81    fn write_newline(&mut self) -> Result<(), W::Error> {
82        self.end_newline = true;
83        self.writer.write_str("\n")
84    }
85
86    /// Writes a buffer, and tracks whether or not a newline was written.
87    #[inline]
88    fn write(&mut self, s: &str) -> Result<(), W::Error> {
89        self.writer.write_str(s)?;
90
91        if !s.is_empty() {
92            self.end_newline = s.ends_with('\n');
93        }
94        Ok(())
95    }
96
97    fn run(mut self) -> Result<(), W::Error> {
98        while let Some(event) = self.iter.next() {
99            match event {
100                Start(tag) => {
101                    self.start_tag(tag)?;
102                }
103                End(tag) => {
104                    self.end_tag(tag)?;
105                }
106                Text(text) => {
107                    if !self.in_non_writing_block {
108                        escape_html_body_text(&mut self.writer, &text)?;
109                        self.end_newline = text.ends_with('\n');
110                    }
111                }
112                Code(text) => {
113                    self.write("<code>")?;
114                    escape_html_body_text(&mut self.writer, &text)?;
115                    self.write("</code>")?;
116                }
117                InlineMath(text) => {
118                    self.write(r#"<span class="math math-inline">"#)?;
119                    escape_html(&mut self.writer, &text)?;
120                    self.write("</span>")?;
121                }
122                DisplayMath(text) => {
123                    self.write(r#"<span class="math math-display">"#)?;
124                    escape_html(&mut self.writer, &text)?;
125                    self.write("</span>")?;
126                }
127                Html(html) | InlineHtml(html) => {
128                    self.write(&html)?;
129                }
130                SoftBreak => {
131                    self.write_newline()?;
132                }
133                HardBreak => {
134                    self.write("<br />\n")?;
135                }
136                Rule => {
137                    if self.end_newline {
138                        self.write("<hr />\n")?;
139                    } else {
140                        self.write("\n<hr />\n")?;
141                    }
142                }
143                FootnoteReference(name) => {
144                    let len = self.numbers.len() + 1;
145                    self.write("<sup class=\"footnote-reference\"><a href=\"#")?;
146                    escape_html(&mut self.writer, &name)?;
147                    self.write("\">")?;
148                    let number = *self.numbers.entry(name).or_insert(len);
149                    write!(&mut self.writer, "{}", number)?;
150                    self.write("</a></sup>")?;
151                }
152                TaskListMarker(true) => {
153                    self.write("<input disabled=\"\" type=\"checkbox\" checked=\"\"/>\n")?;
154                }
155                TaskListMarker(false) => {
156                    self.write("<input disabled=\"\" type=\"checkbox\"/>\n")?;
157                }
158                MdxFlowExpression(s) | MdxTextExpression(s) => {
159                    self.write("{")?;
160                    self.write(&s)?;
161                    self.write("}")?;
162                }
163                MdxEsm(s) => {
164                    self.write(&s)?;
165                    self.write("\n")?;
166                }
167            }
168        }
169        Ok(())
170    }
171
172    /// Writes the start of an HTML tag.
173    fn start_tag(&mut self, tag: Tag<'a>) -> Result<(), W::Error> {
174        match tag {
175            Tag::HtmlBlock => Ok(()),
176            Tag::Paragraph => {
177                if self.end_newline {
178                    self.write("<p>")
179                } else {
180                    self.write("\n<p>")
181                }
182            }
183            Tag::Heading {
184                level,
185                id,
186                classes,
187                attrs,
188            } => {
189                if self.end_newline {
190                    self.write("<")?;
191                } else {
192                    self.write("\n<")?;
193                }
194                write!(&mut self.writer, "{}", level)?;
195                if let Some(id) = id {
196                    self.write(" id=\"")?;
197                    escape_html(&mut self.writer, &id)?;
198                    self.write("\"")?;
199                }
200                let mut classes = classes.iter();
201                if let Some(class) = classes.next() {
202                    self.write(" class=\"")?;
203                    escape_html(&mut self.writer, class)?;
204                    for class in classes {
205                        self.write(" ")?;
206                        escape_html(&mut self.writer, class)?;
207                    }
208                    self.write("\"")?;
209                }
210                for (attr, value) in attrs {
211                    self.write(" ")?;
212                    escape_html(&mut self.writer, &attr)?;
213                    if let Some(val) = value {
214                        self.write("=\"")?;
215                        escape_html(&mut self.writer, &val)?;
216                        self.write("\"")?;
217                    } else {
218                        self.write("=\"\"")?;
219                    }
220                }
221                self.write(">")
222            }
223            Tag::Table(alignments) => {
224                self.table_alignments = alignments;
225                self.write("<table>")
226            }
227            Tag::TableHead => {
228                self.table_state = TableState::Head;
229                self.table_cell_index = 0;
230                self.write("<thead><tr>")
231            }
232            Tag::TableRow => {
233                self.table_cell_index = 0;
234                self.write("<tr>")
235            }
236            Tag::TableCell => {
237                match self.table_state {
238                    TableState::Head => {
239                        self.write("<th")?;
240                    }
241                    TableState::Body => {
242                        self.write("<td")?;
243                    }
244                }
245                match self.table_alignments.get(self.table_cell_index) {
246                    Some(&Alignment::Left) => self.write(" style=\"text-align: left\">"),
247                    Some(&Alignment::Center) => self.write(" style=\"text-align: center\">"),
248                    Some(&Alignment::Right) => self.write(" style=\"text-align: right\">"),
249                    _ => self.write(">"),
250                }
251            }
252            Tag::BlockQuote(kind) => {
253                let class_str = match kind {
254                    None => "",
255                    Some(kind) => match kind {
256                        BlockQuoteKind::Note => " class=\"markdown-alert-note\"",
257                        BlockQuoteKind::Tip => " class=\"markdown-alert-tip\"",
258                        BlockQuoteKind::Important => " class=\"markdown-alert-important\"",
259                        BlockQuoteKind::Warning => " class=\"markdown-alert-warning\"",
260                        BlockQuoteKind::Caution => " class=\"markdown-alert-caution\"",
261                    },
262                };
263                if self.end_newline {
264                    self.write(&format!("<blockquote{}>\n", class_str))
265                } else {
266                    self.write(&format!("\n<blockquote{}>\n", class_str))
267                }
268            }
269            Tag::CodeBlock(info) => {
270                if !self.end_newline {
271                    self.write_newline()?;
272                }
273                match info {
274                    CodeBlockKind::Fenced(info) => {
275                        let lang = info.split(' ').next().unwrap();
276                        if lang.is_empty() {
277                            self.write("<pre><code>")
278                        } else {
279                            self.write("<pre><code class=\"language-")?;
280                            escape_html(&mut self.writer, lang)?;
281                            self.write("\">")
282                        }
283                    }
284                    CodeBlockKind::Indented => self.write("<pre><code>"),
285                }
286            }
287            Tag::ContainerBlock(Default, kind) => {
288                if !self.end_newline {
289                    self.write_newline()?;
290                }
291                self.write("<div class=\"")?;
292                escape_html(&mut self.writer, &kind)?;
293                self.write("\">")
294            }
295            Tag::ContainerBlock(Spoiler, summary) => {
296                if !self.end_newline {
297                    self.write_newline()?;
298                }
299                if summary.is_empty() {
300                    self.write("<details>")
301                } else {
302                    self.write("<details><summary>")?;
303                    escape_html(&mut self.writer, summary.as_ref())?;
304                    self.write("</summary>")
305                }
306            }
307            Tag::List(Some(1), _) => {
308                if self.end_newline {
309                    self.write("<ol>\n")
310                } else {
311                    self.write("\n<ol>\n")
312                }
313            }
314            Tag::List(Some(start), _) => {
315                if self.end_newline {
316                    self.write("<ol start=\"")?;
317                } else {
318                    self.write("\n<ol start=\"")?;
319                }
320                write!(&mut self.writer, "{}", start)?;
321                self.write("\">\n")
322            }
323            Tag::List(None, _) => {
324                if self.end_newline {
325                    self.write("<ul>\n")
326                } else {
327                    self.write("\n<ul>\n")
328                }
329            }
330            Tag::Item => {
331                if self.end_newline {
332                    self.write("<li>")
333                } else {
334                    self.write("\n<li>")
335                }
336            }
337            Tag::DefinitionList => {
338                if self.end_newline {
339                    self.write("<dl>\n")
340                } else {
341                    self.write("\n<dl>\n")
342                }
343            }
344            Tag::DefinitionListTitle => {
345                if self.end_newline {
346                    self.write("<dt>")
347                } else {
348                    self.write("\n<dt>")
349                }
350            }
351            Tag::DefinitionListDefinition => {
352                if self.end_newline {
353                    self.write("<dd>")
354                } else {
355                    self.write("\n<dd>")
356                }
357            }
358            Tag::Subscript => self.write("<sub>"),
359            Tag::Superscript => self.write("<sup>"),
360            Tag::Emphasis => self.write("<em>"),
361            Tag::Strong => self.write("<strong>"),
362            Tag::Strikethrough => self.write("<del>"),
363            Tag::Link {
364                link_type: LinkType::Email,
365                dest_url,
366                title,
367                id: _,
368            } => {
369                self.write("<a href=\"mailto:")?;
370                escape_href(&mut self.writer, &dest_url)?;
371                if !title.is_empty() {
372                    self.write("\" title=\"")?;
373                    escape_html(&mut self.writer, &title)?;
374                }
375                self.write("\">")
376            }
377            Tag::Link {
378                link_type: _,
379                dest_url,
380                title,
381                id: _,
382            } => {
383                self.write("<a href=\"")?;
384                escape_href(&mut self.writer, &dest_url)?;
385                if !title.is_empty() {
386                    self.write("\" title=\"")?;
387                    escape_html(&mut self.writer, &title)?;
388                }
389                self.write("\">")
390            }
391            Tag::Image {
392                link_type: _,
393                dest_url,
394                title,
395                id: _,
396            } => {
397                self.write("<img src=\"")?;
398                escape_href(&mut self.writer, &dest_url)?;
399                self.write("\" alt=\"")?;
400                self.raw_text()?;
401                if !title.is_empty() {
402                    self.write("\" title=\"")?;
403                    escape_html(&mut self.writer, &title)?;
404                }
405                self.write("\" />")
406            }
407            Tag::FootnoteDefinition(name) => {
408                if self.end_newline {
409                    self.write("<div class=\"footnote-definition\" id=\"")?;
410                } else {
411                    self.write("\n<div class=\"footnote-definition\" id=\"")?;
412                }
413                escape_html(&mut self.writer, &name)?;
414                self.write("\"><sup class=\"footnote-definition-label\">")?;
415                let len = self.numbers.len() + 1;
416                let number = *self.numbers.entry(name).or_insert(len);
417                write!(&mut self.writer, "{}", number)?;
418                self.write("</sup>")
419            }
420            Tag::MetadataBlock(_) => {
421                self.in_non_writing_block = true;
422                Ok(())
423            }
424            Tag::MdxJsxFlowElement(_) | Tag::MdxJsxTextElement(_) => {
425                // MDX JSX elements pass through as-is (not rendered to HTML).
426                Ok(())
427            }
428        }
429    }
430
431    fn end_tag(&mut self, tag: TagEnd) -> Result<(), W::Error> {
432        match tag {
433            TagEnd::HtmlBlock => {}
434            TagEnd::Paragraph => {
435                self.write("</p>\n")?;
436            }
437            TagEnd::Heading(level) => {
438                self.write("</")?;
439                write!(&mut self.writer, "{}", level)?;
440                self.write(">\n")?;
441            }
442            TagEnd::Table => {
443                self.write("</tbody></table>\n")?;
444            }
445            TagEnd::TableHead => {
446                self.write("</tr></thead><tbody>\n")?;
447                self.table_state = TableState::Body;
448            }
449            TagEnd::TableRow => {
450                self.write("</tr>\n")?;
451            }
452            TagEnd::TableCell => {
453                match self.table_state {
454                    TableState::Head => {
455                        self.write("</th>")?;
456                    }
457                    TableState::Body => {
458                        self.write("</td>")?;
459                    }
460                }
461                self.table_cell_index += 1;
462            }
463            TagEnd::BlockQuote(_) => {
464                self.write("</blockquote>\n")?;
465            }
466            TagEnd::CodeBlock => {
467                self.write("</code></pre>\n")?;
468            }
469            TagEnd::ContainerBlock(Spoiler) => {
470                self.write("</details>\n")?;
471            }
472            TagEnd::ContainerBlock(Default) => {
473                self.write("</div>\n")?;
474            }
475            TagEnd::List(true) => {
476                self.write("</ol>\n")?;
477            }
478            TagEnd::List(false) => {
479                self.write("</ul>\n")?;
480            }
481            TagEnd::Item => {
482                self.write("</li>\n")?;
483            }
484            TagEnd::DefinitionList => {
485                self.write("</dl>\n")?;
486            }
487            TagEnd::DefinitionListTitle => {
488                self.write("</dt>\n")?;
489            }
490            TagEnd::DefinitionListDefinition => {
491                self.write("</dd>\n")?;
492            }
493            TagEnd::Emphasis => {
494                self.write("</em>")?;
495            }
496            TagEnd::Superscript => {
497                self.write("</sup>")?;
498            }
499            TagEnd::Subscript => {
500                self.write("</sub>")?;
501            }
502            TagEnd::Strong => {
503                self.write("</strong>")?;
504            }
505            TagEnd::Strikethrough => {
506                self.write("</del>")?;
507            }
508            TagEnd::Link => {
509                self.write("</a>")?;
510            }
511            TagEnd::Image => (), // shouldn't happen, handled in start
512            TagEnd::FootnoteDefinition => {
513                self.write("</div>\n")?;
514            }
515            TagEnd::MetadataBlock(_) => {
516                self.in_non_writing_block = false;
517            }
518            TagEnd::MdxJsxFlowElement | TagEnd::MdxJsxTextElement => {}
519        }
520        Ok(())
521    }
522
523    // run raw text, consuming end tag
524    fn raw_text(&mut self) -> Result<(), W::Error> {
525        let mut nest = 0;
526        while let Some(event) = self.iter.next() {
527            match event {
528                Start(_) => nest += 1,
529                End(_) => {
530                    if nest == 0 {
531                        break;
532                    }
533                    nest -= 1;
534                }
535                Html(_) => {}
536                InlineHtml(text) | Code(text) | Text(text) => {
537                    // Don't use escape_html_body_text here.
538                    // The output of this function is used in the `alt` attribute.
539                    escape_html(&mut self.writer, &text)?;
540                    self.end_newline = text.ends_with('\n');
541                }
542                InlineMath(text) => {
543                    self.write("$")?;
544                    escape_html(&mut self.writer, &text)?;
545                    self.write("$")?;
546                }
547                DisplayMath(text) => {
548                    self.write("$$")?;
549                    escape_html(&mut self.writer, &text)?;
550                    self.write("$$")?;
551                }
552                SoftBreak | HardBreak | Rule => {
553                    self.write(" ")?;
554                }
555                FootnoteReference(name) => {
556                    let len = self.numbers.len() + 1;
557                    let number = *self.numbers.entry(name).or_insert(len);
558                    write!(&mut self.writer, "[{}]", number)?;
559                }
560                TaskListMarker(true) => self.write("[x]")?,
561                TaskListMarker(false) => self.write("[ ]")?,
562                MdxFlowExpression(_) | MdxTextExpression(_) | MdxEsm(_) => {}
563            }
564        }
565        Ok(())
566    }
567}
568
569/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
570/// push it to a `String`.
571///
572/// # Examples
573///
574/// ```
575/// use satteri_pulldown_cmark::{html, Parser};
576///
577/// let markdown_str = r#"
578/// hello
579/// =====
580///
581/// * alpha
582/// * beta
583/// "#;
584/// let parser = Parser::new(markdown_str);
585///
586/// let mut html_buf = String::new();
587/// html::push_html(&mut html_buf, parser);
588///
589/// assert_eq!(html_buf, r#"<h1>hello</h1>
590/// <ul>
591/// <li>alpha</li>
592/// <li>beta</li>
593/// </ul>
594/// "#);
595/// ```
596pub fn push_html<'a, I>(s: &mut String, iter: I)
597where
598    I: Iterator<Item = Event<'a>>,
599{
600    write_html_fmt(s, iter).unwrap()
601}
602
603/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
604/// write it out to an I/O stream.
605///
606/// **Note**: using this function with an unbuffered writer like a file or socket
607/// will result in poor performance. Wrap these in a
608/// [`BufWriter`](https://doc.rust-lang.org/std/io/struct.BufWriter.html) to
609/// prevent unnecessary slowdowns.
610///
611/// # Examples
612///
613/// ```
614/// use satteri_pulldown_cmark::{html, Parser};
615/// use std::io::Cursor;
616///
617/// let markdown_str = r#"
618/// hello
619/// =====
620///
621/// * alpha
622/// * beta
623/// "#;
624/// let mut bytes = Vec::new();
625/// let parser = Parser::new(markdown_str);
626///
627/// html::write_html_io(Cursor::new(&mut bytes), parser);
628///
629/// assert_eq!(&String::from_utf8_lossy(&bytes)[..], r#"<h1>hello</h1>
630/// <ul>
631/// <li>alpha</li>
632/// <li>beta</li>
633/// </ul>
634/// "#);
635/// ```
636#[cfg(feature = "std")]
637pub fn write_html_io<'a, I, W>(writer: W, iter: I) -> std::io::Result<()>
638where
639    I: Iterator<Item = Event<'a>>,
640    W: std::io::Write,
641{
642    HtmlWriter::new(iter, IoWriter(writer)).run()
643}
644
645/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
646/// write it into Unicode-accepting buffer or stream.
647///
648/// # Examples
649///
650/// ```
651/// use satteri_pulldown_cmark::{html, Parser};
652///
653/// let markdown_str = r#"
654/// hello
655/// =====
656///
657/// * alpha
658/// * beta
659/// "#;
660/// let mut buf = String::new();
661/// let parser = Parser::new(markdown_str);
662///
663/// html::write_html_fmt(&mut buf, parser);
664///
665/// assert_eq!(buf, r#"<h1>hello</h1>
666/// <ul>
667/// <li>alpha</li>
668/// <li>beta</li>
669/// </ul>
670/// "#);
671/// ```
672pub fn write_html_fmt<'a, I, W>(writer: W, iter: I) -> core::fmt::Result
673where
674    I: Iterator<Item = Event<'a>>,
675    W: core::fmt::Write,
676{
677    HtmlWriter::new(iter, FmtWriter(writer)).run()
678}