pulldown_cmark_fork/
html.rs

1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! HTML renderer that takes an iterator of events as input.
22
23use std::collections::HashMap;
24use std::io::{self, Write, ErrorKind};
25use std::fmt::{Arguments, Write as FmtWrite};
26
27use crate::parse::{LinkType, Event, Tag, Alignment};
28use crate::parse::Event::*;
29use crate::strings::CowStr;
30use crate::escape::{escape_html, escape_href};
31
32enum TableState {
33    Head,
34    Body,
35}
36
37/// This wrapper exists because we can't have both a blanket implementation
38/// for all types implementing `Write` and types of the for `&mut W` where
39/// `W: StrWrite`. Since we need the latter a lot, we choose to wrap
40/// `Write` types.
41struct WriteWrapper<W>(W);
42
43/// Trait that allows writing string slices. This is basically an extension
44/// of `std::io::Write` in order to include `String`.
45pub(crate) trait StrWrite {
46    fn write_str(&mut self, s: &str) -> io::Result<()>;
47
48    fn write_fmt(&mut self, args: Arguments) -> io::Result<()>;
49}
50
51impl<W> StrWrite for WriteWrapper<W>
52    where W: Write
53{
54    #[inline]
55    fn write_str(&mut self, s: &str) -> io::Result<()> {
56        self.0.write_all(s.as_bytes())
57    }
58
59    #[inline]
60    fn write_fmt(&mut self, args: Arguments) -> io::Result<()> {
61        self.0.write_fmt(args)
62    }
63}
64
65impl<'w> StrWrite for String {
66    #[inline]
67    fn write_str(&mut self, s: &str) -> io::Result<()> {
68        self.push_str(s);
69        Ok(())
70    }
71
72    #[inline]
73    fn write_fmt(&mut self, args: Arguments) -> io::Result<()> {
74        // FIXME: translate fmt error to io error?
75        FmtWrite::write_fmt(self, args).map_err(|_| ErrorKind::Other.into())
76    }
77}
78
79impl<W> StrWrite for &'_ mut W
80    where W: StrWrite
81{
82    #[inline]
83    fn write_str(&mut self, s: &str) -> io::Result<()> {
84        (**self).write_str(s)
85    }
86
87    #[inline]
88    fn write_fmt(&mut self, args: Arguments) -> io::Result<()> {
89        (**self).write_fmt(args)
90    }
91}
92
93struct HtmlWriter<'a, I, W> {
94    /// Iterator supplying events.
95    iter: I,
96
97    /// Writer to write to.
98    writer: W,
99
100    /// Whether or not the last write wrote a newline.
101    end_newline: bool,
102
103    table_state: TableState,
104    table_alignments: Vec<Alignment>,
105    table_cell_index: usize,
106    numbers: HashMap<CowStr<'a>, usize>,
107}
108
109impl<'a, I, W> HtmlWriter<'a, I, W>
110where
111    I: Iterator<Item = Event<'a>>,
112    W: StrWrite,
113{
114    fn new(iter: I, writer: W) -> Self {
115        Self {
116            iter,
117            writer,
118            end_newline: true,
119            table_state: TableState::Head,
120            table_alignments: vec![],
121            table_cell_index: 0,
122            numbers: HashMap::new(),
123        }
124    }
125
126    /// Writes a new line.
127    fn write_newline(&mut self) -> io::Result<()> {
128        self.end_newline = true;
129        self.writer.write_str("\n")
130    }
131
132    /// Writes a buffer, and tracks whether or not a newline was written.
133    #[inline]
134    fn write(&mut self, s: &str) -> io::Result<()> {
135        self.writer.write_str(s)?;
136
137        if !s.is_empty() {
138            self.end_newline = s.ends_with('\n');
139        }
140        Ok(())
141    }
142
143    pub fn run(mut self) -> io::Result<()> {
144        while let Some(event) = self.iter.next() {
145            match event {
146                Start(tag) => {
147                    self.start_tag(tag)?;
148                }
149                End(tag) => {
150                    self.end_tag(tag)?;
151                }
152                Text(text) => {
153                    escape_html(&mut self.writer, &text)?;
154                    self.end_newline = text.ends_with('\n');
155                }
156                Code(text) => {
157                    self.write("<code>")?;
158                    escape_html(&mut self.writer, &text)?;
159                    self.write("</code>")?;
160                }
161                Html(html) | InlineHtml(html) => {
162                    self.write(&html)?;
163                }
164                SoftBreak => {
165                    self.write_newline()?;
166                }
167                HardBreak => {
168                    self.write("<br />\n")?;
169                }
170                FootnoteReference(name) => {
171                    let len = self.numbers.len() + 1;
172                    self.write("<sup class=\"footnote-reference\"><a href=\"#")?;
173                    escape_html(&mut self.writer, &name)?;
174                    self.write("\">")?;
175                    let number = *self.numbers.entry(name).or_insert(len);
176                    write!(&mut self.writer, "{}", number)?;
177                    self.write("</a></sup>")?;
178                }
179                TaskListMarker(true) => {
180                    self.write("<input disabled=\"\" type=\"checkbox\" checked=\"\"/>\n")?;
181                }
182                TaskListMarker(false) => {
183                    self.write("<input disabled=\"\" type=\"checkbox\"/>\n")?;
184                }
185            }
186        }
187        Ok(())
188    }
189
190    /// Writes the start of an HTML tag.
191    fn start_tag(&mut self, tag: Tag<'a>) -> io::Result<()> {
192        match tag {
193            Tag::Paragraph => {
194                if self.end_newline {
195                    self.write("<p>")
196                } else {
197                    self.write("\n<p>")
198                }
199            }
200            Tag::Rule => {
201                if self.end_newline {
202                    self.write("<hr />\n")
203                } else {
204                    self.write("\n<hr />\n")
205                }
206            }
207            Tag::Header(level) => {
208                if self.end_newline {
209                    self.end_newline = false;
210                    write!(&mut self.writer, "<h{}>", level)
211                } else {
212                    write!(&mut self.writer, "\n<h{}>", level)
213                }
214            }
215            Tag::Table(alignments) => {
216                self.table_alignments = alignments;
217                self.write("<table>")
218            }
219            Tag::TableHead => {
220                self.table_state = TableState::Head;
221                self.table_cell_index = 0;
222                self.write("<thead><tr>")
223            }
224            Tag::TableRow => {
225                self.table_cell_index = 0;
226                self.write("<tr>")
227            }
228            Tag::TableCell => {
229                match self.table_state {
230                    TableState::Head => {
231                        self.write("<th")?;
232                    }
233                    TableState::Body => {
234                        self.write("<td")?;
235                    }
236                }
237                match self.table_alignments.get(self.table_cell_index) {
238                    Some(&Alignment::Left) => {
239                        self.write(" align=\"left\">")
240                    }
241                    Some(&Alignment::Center) => {
242                        self.write(" align=\"center\">")
243                    }
244                    Some(&Alignment::Right) => {
245                        self.write(" align=\"right\">")
246                    }
247                    _ => self.write(">"),
248                }
249            }
250            Tag::BlockQuote => {
251                if self.end_newline {
252                    self.write("<blockquote>\n")
253                } else {
254                    self.write("\n<blockquote>\n")
255                }
256            }
257            Tag::CodeBlock(info) => {
258                if !self.end_newline {
259                    self.write_newline()?;
260                }
261                let lang = info.split(' ').next().unwrap();
262                if lang.is_empty() {
263                    self.write("<pre><code>")
264                } else {
265                    self.write("<pre><code class=\"language-")?;
266                    escape_html(&mut self.writer, lang)?;
267                    self.write("\">")
268                }
269            }
270            Tag::List(Some(1)) => {
271                if self.end_newline {
272                    self.write("<ol>\n")
273                } else {
274                    self.write("\n<ol>\n")
275                }
276            }
277            Tag::List(Some(start)) => {
278                if self.end_newline {
279                    self.write("<ol start=\"")?;
280                } else {
281                    self.write("\n<ol start=\"")?;
282                }
283                write!(&mut self.writer, "{}", start)?;
284                self.write("\">\n")
285            }
286            Tag::List(None) => {
287                if self.end_newline {
288                    self.write("<ul>\n")
289                } else {
290                    self.write("\n<ul>\n")
291                }
292            }
293            Tag::Item => {
294                if self.end_newline {
295                    self.write("<li>")
296                } else {
297                    self.write("\n<li>")
298                }
299            }
300            Tag::Emphasis => self.write("<em>"),
301            Tag::Strong => self.write("<strong>"),
302            Tag::Strikethrough => self.write("<del>"),
303            Tag::Link(LinkType::Email, dest, title) => {
304                self.write("<a href=\"mailto:")?;
305                escape_href(&mut self.writer, &dest)?;
306                if !title.is_empty() {
307                    self.write("\" title=\"")?;
308                    escape_html(&mut self.writer, &title)?;
309                }
310                self.write("\">")
311            }
312            Tag::Link(_link_type, dest, title) => {
313                self.write("<a href=\"")?;
314                escape_href(&mut self.writer, &dest)?;
315                if !title.is_empty() {
316                    self.write("\" title=\"")?;
317                    escape_html(&mut self.writer, &title)?;
318                }
319                self.write("\">")
320            }
321            Tag::Image(_link_type, dest, title) => {
322                self.write("<img src=\"")?;
323                self.write("\" data-src=\"")?;
324                escape_href(&mut self.writer, &dest)?;
325                self.write("\" alt=\"")?;
326                self.raw_text()?;
327                if !title.is_empty() {
328                    self.write("\" title=\"")?;
329                    escape_html(&mut self.writer, &title)?;
330                }
331                self.write("\" />")
332            }
333            Tag::FootnoteDefinition(name) => {
334                if self.end_newline {
335                    self.write("<div class=\"footnote-definition\" id=\"")?;
336                } else {
337                    self.write("\n<div class=\"footnote-definition\" id=\"")?;
338                }
339                escape_html(&mut self.writer, &*name)?;
340                self.write("\"><sup class=\"footnote-definition-label\">")?;
341                let len = self.numbers.len() + 1;
342                let number = *self.numbers.entry(name).or_insert(len);
343                write!(&mut self.writer, "{}", number)?;
344                self.write("</sup>")
345            }
346            Tag::HtmlBlock => Ok(())
347        }
348    }
349
350    fn end_tag(&mut self, tag: Tag) -> io::Result<()> {
351        match tag {
352            Tag::Paragraph => {
353                self.write("</p>\n")?;
354            }
355            Tag::Rule => (),
356            Tag::Header(level) => {
357                self.write("</h")?;
358                write!(&mut self.writer, "{}", level)?;
359                self.write(">\n")?;
360            }
361            Tag::Table(_) => {
362                self.write("</tbody></table>\n")?;
363            }
364            Tag::TableHead => {
365                self.write("</tr></thead><tbody>\n")?;
366                self.table_state = TableState::Body;
367            }
368            Tag::TableRow => {
369                self.write("</tr>\n")?;
370            }
371            Tag::TableCell => {
372                match self.table_state {
373                    TableState::Head => {
374                        self.write("</th>")?;
375                    }
376                    TableState::Body => {
377                        self.write("</td>")?;
378                    }
379                }
380                self.table_cell_index += 1;
381            }
382            Tag::BlockQuote => {
383                self.write("</blockquote>\n")?;
384            }
385            Tag::CodeBlock(_) => {
386                self.write("</code></pre>\n")?;
387            }
388            Tag::List(Some(_)) => {
389                self.write("</ol>\n")?;
390            }
391            Tag::List(None) => {
392                self.write("</ul>\n")?;
393            }
394            Tag::Item => {
395                self.write("</li>\n")?;
396            }
397            Tag::Emphasis => {
398                self.write("</em>")?;
399            }
400            Tag::Strong => {
401                self.write("</strong>")?;
402            }
403            Tag::Strikethrough => {
404                self.write("</del>")?;
405            }
406            Tag::Link(_, _, _) => {
407                self.write("</a>")?;
408            }
409            Tag::Image(_, _, _) => (), // shouldn't happen, handled in start
410            Tag::FootnoteDefinition(_) => {
411                self.write("</div>\n")?;
412            }
413            Tag::HtmlBlock => {}
414        }
415        Ok(())
416    }
417
418    // run raw text, consuming end tag
419    fn raw_text(&mut self) -> io::Result<()> {
420        let mut nest = 0;
421        while let Some(event) = self.iter.next() {
422            match event {
423                Start(_) => nest += 1,
424                End(_) => {
425                    if nest == 0 {
426                        break;
427                    }
428                    nest -= 1;
429                }
430                Html(_) => (),
431                InlineHtml(text) | Code(text) | Text(text) => {
432                    escape_html(&mut self.writer, &text)?;
433                    self.end_newline = text.ends_with('\n');
434                }
435                SoftBreak | HardBreak => {
436                    self.write(" ")?;
437                }
438                FootnoteReference(name) => {
439                    let len = self.numbers.len() + 1;
440                    let number = *self.numbers.entry(name).or_insert(len);
441                    write!(&mut self.writer, "[{}]", number)?;
442                }
443                TaskListMarker(true) => self.write("[x]")?,
444                TaskListMarker(false) => self.write("[ ]")?,
445            }
446        }
447        Ok(())
448    }
449}
450
451/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
452/// push it to a `String`.
453///
454/// # Examples
455///
456/// ```
457/// use pulldown_cmark_fork::{html, Parser};
458///
459/// let markdown_str = r#"
460/// hello
461/// =====
462///
463/// * alpha
464/// * beta
465/// "#;
466/// let parser = Parser::new(markdown_str);
467///
468/// let mut html_buf = String::new();
469/// html::push_html(&mut html_buf, parser);
470///
471/// assert_eq!(html_buf, r#"<h1>hello</h1>
472/// <ul>
473/// <li>alpha</li>
474/// <li>beta</li>
475/// </ul>
476/// "#);
477/// ```
478pub fn push_html<'a, I>(s: &mut String, iter: I)
479where
480    I: Iterator<Item = Event<'a>>,
481{
482    HtmlWriter::new(iter, s).run().unwrap();
483}
484
485/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
486/// write it out to a writable stream.
487///
488/// **Note**: using this function with an unbuffered writer like a file or socket
489/// will result in poor performance. Wrap these in a
490/// [`BufWriter`](https://doc.rust-lang.org/std/io/struct.BufWriter.html) to
491/// prevent unnecessary slowdowns.
492///
493/// # Examples
494///
495/// ```
496/// use pulldown_cmark_fork::{html, Parser};
497/// use std::io::Cursor;
498///
499/// let markdown_str = r#"
500/// hello
501/// =====
502///
503/// * alpha
504/// * beta
505/// "#;
506/// let mut bytes = Vec::new();
507/// let parser = Parser::new(markdown_str);
508///
509/// html::write_html(Cursor::new(&mut bytes), parser);
510///
511/// assert_eq!(&String::from_utf8_lossy(&bytes)[..], r#"<h1>hello</h1>
512/// <ul>
513/// <li>alpha</li>
514/// <li>beta</li>
515/// </ul>
516/// "#);
517/// ```
518pub fn write_html<'a, I, W>(writer: W, iter: I) -> io::Result<()>
519where
520    I: Iterator<Item = Event<'a>>,
521    W: Write,
522{
523    HtmlWriter::new(iter, WriteWrapper(writer)).run()
524}