gazetta_render_ext/
markdown.rs

1//  Copyright (C) 2015 Steven Allen
2//
3//  This file is part of gazetta.
4//
5//  This program is free software: you can redistribute it and/or modify it under the terms of the
6//  GNU General Public License as published by the Free Software Foundation version 3 of the
7//  License.
8//
9//  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
10//  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
11//  the GNU General Public License for more details.
12//
13//  You should have received a copy of the GNU General Public License along with this program.  If
14//  not, see <http://www.gnu.org/licenses/>.
15//
16
17use std::collections::HashMap;
18use std::fmt;
19
20use horrorshow::Join;
21use horrorshow::html;
22use horrorshow::prelude::*;
23use pulldown_cmark::HeadingLevel;
24use pulldown_cmark::{CowStr, Event, Options, Parser};
25
26#[cfg(feature = "syntax-highlighting")]
27use crate::highlight::SyntaxHighlight;
28
29/// Markdown renderer
30#[derive(Debug, Copy, Clone, PartialEq, Eq)]
31pub struct Markdown<'a> {
32    data: &'a str,
33    root: &'a str,
34    path: &'a str,
35    highlight: bool,
36}
37
38impl<'a> Markdown<'a> {
39    /// Create a new markdown renderer.
40    ///
41    /// `data` should contain the markdown to be rendered and `path` should specify a relative url
42    /// prefix (for relative links and images).
43    ///
44    /// Note: `path` will only affect markdown links and images, not inline html ones.
45    pub fn new(
46        data: &'a str,
47        root: Option<&'a str>,
48        path: &'a str,
49        highlight: bool,
50    ) -> Markdown<'a> {
51        let path = path.trim_end_matches('/'); // we always join with a slash.
52        let root = root.unwrap_or_default();
53        Markdown {
54            data,
55            root,
56            path,
57            highlight,
58        }
59    }
60}
61
62impl<'a> RenderOnce for Markdown<'a> {
63    #[inline]
64    fn render_once(self, tmpl: &mut TemplateBuffer) {
65        self.render(tmpl)
66    }
67}
68
69impl<'a> RenderMut for Markdown<'a> {
70    #[inline]
71    fn render_mut(&mut self, tmpl: &mut TemplateBuffer) {
72        self.render(tmpl)
73    }
74}
75
76impl<'a> Render for Markdown<'a> {
77    #[inline]
78    fn render(&self, tmpl: &mut TemplateBuffer) {
79        tmpl << RenderMarkdown {
80            footnotes: HashMap::new(),
81            iter: Parser::new_ext(
82                self.data,
83                Options::ENABLE_TABLES
84                    | Options::ENABLE_FOOTNOTES
85                    | Options::ENABLE_STRIKETHROUGH
86                    | Options::ENABLE_SMART_PUNCTUATION
87                    | Options::ENABLE_DEFINITION_LIST
88                    | Options::ENABLE_TASKLISTS
89                    | Options::ENABLE_GFM,
90            ),
91            path: self.path,
92            root: self.root,
93            syntax_highlight: self.highlight,
94        }
95    }
96}
97
98struct RenderMarkdown<'a, I> {
99    iter: I,
100    footnotes: HashMap<CowStr<'a>, u32>,
101    path: &'a str,
102    root: &'a str,
103    #[cfg_attr(not(feature = "syntax-highlighting"), allow(dead_code))]
104    syntax_highlight: bool,
105}
106
107struct RelativeUrl<'a> {
108    root: &'a str,
109    path: &'a str,
110    href: &'a str,
111}
112
113fn is_absolute_url(href: &str) -> bool {
114    let mut bytes = href.bytes();
115    if !matches!(bytes.next(), Some(b'a'..=b'z' | b'A'..=b'Z')) {
116        return false;
117    }
118    for b in bytes {
119        match b {
120            b':' => return true,
121            b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'+' | b'-' | b'.' => {}
122            _ => return false,
123        }
124    }
125    false
126}
127
128#[test]
129fn test_is_absolute_url() {
130    // Absolute URLs with different schemes
131    assert!(is_absolute_url("http://example.com"));
132    assert!(is_absolute_url("https://example.com/path"));
133    assert!(is_absolute_url("ftp://example.com"));
134    assert!(is_absolute_url("file:///path/to/file"));
135    assert!(is_absolute_url("mailto:user@example.com"));
136
137    // Relative URLs
138    assert!(!is_absolute_url("/path/to/resource"));
139    assert!(!is_absolute_url("./relative/path"));
140    assert!(!is_absolute_url("../parent/path"));
141    assert!(!is_absolute_url("path/to/resource"));
142    assert!(!is_absolute_url(""));
143
144    // Edge cases
145    assert!(!is_absolute_url("://missing-scheme.com"));
146    assert!(is_absolute_url("git+ssh://example.com"));
147}
148
149impl<'a> fmt::Display for RelativeUrl<'a> {
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        if is_absolute_url(self.href) {
152            return f.write_str(self.href);
153        }
154        if !self.root.is_empty() {
155            f.write_str(self.root)?;
156            if !self.root.ends_with("/") {
157                f.write_str("/")?;
158            }
159        }
160        if let Some(href) = self.href.strip_prefix("./") {
161            if !self.path.is_empty() {
162                f.write_str(self.path)?;
163                f.write_str("/")?;
164            }
165            f.write_str(href)?;
166        } else {
167            f.write_str(self.href)?;
168        }
169        Ok(())
170    }
171}
172
173impl Render for RelativeUrl<'_> {
174    fn render(&self, tmpl: &mut horrorshow::TemplateBuffer<'_>) {
175        tmpl.write_fmt(format_args!("{}", self))
176    }
177}
178
179impl RenderMut for RelativeUrl<'_> {
180    fn render_mut(&mut self, tmpl: &mut horrorshow::TemplateBuffer<'_>) {
181        self.render(tmpl)
182    }
183}
184
185impl RenderOnce for RelativeUrl<'_> {
186    fn render_once(self, tmpl: &mut horrorshow::TemplateBuffer<'_>)
187    where
188        Self: Sized,
189    {
190        self.render(tmpl)
191    }
192
193    fn size_hint(&self) -> usize {
194        self.root.len() + self.path.len() + self.href.len() + 2
195    }
196}
197
198impl<'a, I> RenderMarkdown<'a, I> {
199    fn footnote(&mut self, name: CowStr<'a>) -> u32 {
200        let next_idx = (self.footnotes.len() as u32) + 1;
201        *self.footnotes.entry(name).or_insert(next_idx)
202    }
203
204    fn make_relative<'b>(&self, href: &'b str) -> RelativeUrl<'b>
205    where
206        'a: 'b,
207    {
208        RelativeUrl {
209            root: self.root,
210            path: self.path.trim_matches('/'),
211            href,
212        }
213    }
214}
215
216impl<'a, I: Iterator<Item = Event<'a>>> RenderOnce for RenderMarkdown<'a, I> {
217    fn render_once(mut self, tmpl: &mut TemplateBuffer) {
218        self.render_mut(tmpl)
219    }
220}
221
222fn class_list<'a>(classes: &'a [CowStr<'a>]) -> Option<impl RenderOnce + 'a> {
223    if classes.is_empty() {
224        None
225    } else {
226        Some(Join(" ", classes.iter().map(AsRef::as_ref)))
227    }
228}
229
230#[inline(always)]
231fn inner_text<'a>(iter: &mut impl Iterator<Item = Event<'a>>, escape: bool) -> impl RenderOnce {
232    use pulldown_cmark::Event::*;
233    FnRenderer::new(move |tmpl| {
234        let mut nest = 0;
235        for event in iter {
236            match event {
237                Start(_) => nest += 1,
238                End(_) if nest == 0 => break,
239                End(_) => nest -= 1,
240                Text(txt) | Code(txt) => {
241                    if escape {
242                        tmpl.write_str(&txt)
243                    } else {
244                        tmpl.write_raw(&txt)
245                    }
246                }
247                SoftBreak | HardBreak => tmpl.write_raw(" "),
248                Rule => tmpl.write_raw("\n"),
249                // Ignored
250                TaskListMarker(_) | FootnoteReference(_) | Html(_) | InlineHtml(_)
251                | InlineMath(_) | DisplayMath(_) => (),
252            }
253        }
254    })
255}
256
257impl<'a, I: Iterator<Item = Event<'a>>> RenderMut for RenderMarkdown<'a, I> {
258    fn render_mut(&mut self, tmpl: &mut TemplateBuffer) {
259        use pulldown_cmark::BlockQuoteKind::*;
260        use pulldown_cmark::Event::*;
261        use pulldown_cmark::{CodeBlockKind, Tag};
262
263        #[cfg(feature = "syntax-highlighting")]
264        let syntax_highlight = self.syntax_highlight;
265
266        while let Some(event) = self.iter.next() {
267            // manually reborrow
268            let tmpl = &mut *tmpl;
269            match event {
270                Start(tag) => {
271                    // Because rust doesn't reborrow? (WTF?)
272                    let s: &mut Self = &mut *self;
273                    match tag {
274                        Tag::FootnoteDefinition(name) => {
275                            tmpl << html! {
276                                div(class="footnote", id=format_args!("footnote-{}", name)) {
277                                    sup(class="footnote-label") : s.footnote(name);
278                                    : s;
279                                }
280                            }
281                        }
282                        Tag::Paragraph => tmpl << html! { p : s },
283                        Tag::BlockQuote(kind) => {
284                            tmpl << html! {
285                                blockquote(class ?= kind.map(|k| match k {
286                                    Note => "note",
287                                    Tip => "tip",
288                                    Important => "important",
289                                    Warning => "warning",
290                                    Caution => "caution",
291                                })) : s;
292                            }
293                        }
294                        Tag::Table(_) => tmpl << html! { table : s },
295                        Tag::TableHead => tmpl << html! { thead { tr : s } },
296                        Tag::TableRow => tmpl << html! { tr : s },
297                        Tag::TableCell => tmpl << html! { td : s },
298                        Tag::List(Some(0)) => tmpl << html! { ol : s },
299                        Tag::List(Some(start)) => tmpl << html! { ol(start = start) : s },
300                        Tag::List(None) => tmpl << html! { ul : s },
301                        Tag::Item => tmpl << html! { li : s },
302                        Tag::Emphasis => tmpl << html! { em: s },
303                        Tag::Strikethrough => tmpl << html! { s: s },
304                        Tag::Strong => tmpl << html! { strong: s },
305                        Tag::Heading {
306                            level,
307                            id,
308                            classes,
309                            attrs: _, // TODO
310                        } => match level {
311                            HeadingLevel::H1 => {
312                                tmpl << html! { h1 (id? = id.as_deref(), class ?= class_list(&classes)): s }
313                            }
314                            HeadingLevel::H2 => {
315                                tmpl << html! { h2 (id? = id.as_deref(), class ?= class_list(&classes)): s }
316                            }
317                            HeadingLevel::H3 => {
318                                tmpl << html! { h3 (id? = id.as_deref(), class ?= class_list(&classes)): s }
319                            }
320                            HeadingLevel::H4 => {
321                                tmpl << html! { h4 (id? = id.as_deref(), class ?= class_list(&classes)): s }
322                            }
323                            HeadingLevel::H5 => {
324                                tmpl << html! { h5 (id? = id.as_deref(), class ?= class_list(&classes)): s }
325                            }
326                            HeadingLevel::H6 => {
327                                tmpl << html! { h6 (id? = id.as_deref(), class ?= class_list(&classes)): s }
328                            }
329                        },
330                        Tag::Link {
331                            link_type: _,
332                            dest_url,
333                            title,
334                            id,
335                            ..
336                        } => {
337                            tmpl << html! {
338                                // TODO: Escape href?
339                                a(href = s.make_relative(&dest_url),
340                                  title? = if !title.is_empty() { Some(&*title) } else { None },
341                                  id ?= if !id.is_empty() { Some(&*id) } else { None }) : s
342                            }
343                        }
344                        Tag::Image {
345                            link_type: _,
346                            dest_url,
347                            title,
348                            id,
349                        } => {
350                            tmpl << html! {
351                                img(src = s.make_relative(&dest_url),
352                                    title? = if !title.is_empty() { Some(&*title) } else { None },
353                                    id ?= if !id.is_empty() { Some(&*id) } else { None },
354                                    alt = inner_text(&mut s.iter, true))
355                            }
356                        }
357                        Tag::CodeBlock(ref kind) => {
358                            let lang = match kind {
359                                CodeBlockKind::Fenced(info) => {
360                                    let lang = info.split(' ').next().unwrap();
361                                    (!lang.is_empty()).then_some(lang)
362                                }
363                                CodeBlockKind::Indented => None,
364                            };
365
366                            match lang {
367                                #[cfg(feature = "syntax-highlighting")]
368                                Some(lang) if syntax_highlight => {
369                                    tmpl << html! {
370                                        pre {
371                                            code(class = format_args!("lang-{lang}")) : SyntaxHighlight {
372                                                code: &inner_text(&mut s.iter, false).into_string().unwrap(),
373                                                lang,
374                                            }
375                                        }
376                                    }
377                                }
378                                Some(lang) => {
379                                    tmpl << html! { pre { code(class = format_args!("lang-{lang}")) : s } }
380                                }
381                                None => tmpl << html! { pre { code : s } },
382                            }
383                        }
384
385                        Tag::DefinitionList => tmpl << html! { dl : s },
386                        Tag::DefinitionListTitle => tmpl << html! { dt : s },
387                        Tag::DefinitionListDefinition => tmpl << html! { dd : s },
388
389                        Tag::HtmlBlock => tmpl << html! { : s },
390                        Tag::Superscript => tmpl << html! { sup : s },
391                        Tag::Subscript => tmpl << html! { sub : s },
392                        Tag::MetadataBlock(_) => {
393                            panic!("metadata blocks should not have been enabled")
394                        }
395                    }
396                }
397                End(_) => break,
398                Code(s) => tmpl << html! { code: s.as_ref() },
399                Rule => tmpl << html! { hr; },
400                TaskListMarker(checked) => {
401                    tmpl << html! {
402                        input(type="checkbox", checked?=checked, disabled?=true);
403                    }
404                }
405                FootnoteReference(name) => {
406                    tmpl << html! {
407                        sup(class="footnote-reference") {
408                            a(href=format_args!("{}/#footnote-{}", self.path, name)) : self.footnote(name);
409                        }
410                    }
411                }
412                Text(text) => tmpl << &*text,
413                InlineHtml(html) | Html(html) => tmpl << Raw(html),
414                SoftBreak => tmpl << "\n",
415                HardBreak => tmpl << html! { br },
416                InlineMath(_) | DisplayMath(_) => {
417                    panic!("math blocks should not have been enabled")
418                }
419            };
420        }
421    }
422}