yar_markdown/
lib.rs

1#![allow(clippy::missing_errors_doc)]
2#![allow(clippy::missing_panics_doc)]
3
4mod shortcodes;
5
6use std::path::Path;
7
8use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
9use color_eyre::Result;
10use minijinja::Environment;
11use pulldown_cmark::{
12    CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd, html::push_html,
13};
14use serde::{Deserialize, Serialize};
15use smol_str::SmolStr;
16use syntect::{
17    highlighting::{Theme, ThemeSet},
18    html::highlighted_html_for_string,
19    parsing::SyntaxSet,
20};
21
22use crate::shortcodes::evaluate_all_shortcodes;
23
24/// The frontmatter metadata for a parsed markdown document.
25#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
26pub struct Frontmatter {
27    pub title: String,
28    pub tags: Vec<SmolStr>,
29    pub template: Option<String>,
30    pub date: Option<String>,
31    pub updated: Option<String>,
32    pub slug: Option<String>,
33    #[serde(default)]
34    pub draft: bool,
35    #[serde(default)]
36    pub requires: Vec<String>,
37}
38
39#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
40pub struct TOCHeading {
41    pub id: Option<String>,
42    pub text: String,
43}
44
45impl TOCHeading {
46    const fn new(id: Option<String>, text: String) -> Self {
47        Self { id, text }
48    }
49
50    fn to_html(&self) -> String {
51        let id = self.id.as_ref().unwrap_or(&self.text);
52        let html = format!("<h2><a id=\"{id}\" href=\"{id}\">{}</a></h2>", self.text);
53
54        html
55    }
56}
57
58/// A parsed markdown document.
59#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
60pub struct Document {
61    pub date: DateTime<Utc>,
62    pub updated: DateTime<Utc>,
63    pub content: String,
64    pub toc: Vec<TOCHeading>,
65    pub summary: String,
66    pub frontmatter: Frontmatter,
67}
68
69#[derive(Debug)]
70struct CodeBlock {
71    lang: String,
72    text: String,
73}
74
75impl CodeBlock {
76    pub const fn new(lang: String) -> Self {
77        Self {
78            lang,
79            text: String::new(),
80        }
81    }
82}
83
84enum Summary {
85    Complete,
86    Incomplete,
87    FinalElement,
88}
89
90/// Used to parse and format a markdown document.
91///
92/// Stores all the required context.
93#[derive(Debug)]
94pub struct MarkdownRenderer {
95    syntax_set: SyntaxSet,
96    theme: Theme,
97    options: Options,
98}
99
100impl MarkdownRenderer {
101    pub fn new<P: AsRef<Path>>(theme_path: Option<P>, theme: Option<&str>) -> Result<Self> {
102        let syntax_set = SyntaxSet::load_defaults_newlines();
103        let theme_set = theme_path.map_or_else(
104            || Ok(ThemeSet::load_defaults()),
105            |p| ThemeSet::load_from_folder(p),
106        )?;
107        let theme = theme_set.themes[theme.unwrap_or("base16-ocean.dark")].clone();
108
109        let mut options = Options::empty();
110        options.insert(Options::ENABLE_TABLES);
111        options.insert(Options::ENABLE_FOOTNOTES);
112        options.insert(Options::ENABLE_STRIKETHROUGH);
113        options.insert(Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
114        options.insert(Options::ENABLE_MATH);
115        options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
116
117        Ok(Self {
118            syntax_set,
119            theme,
120            options,
121        })
122    }
123
124    #[allow(clippy::too_many_lines)]
125    /// Parse markdown and create a `Document` form a given string.
126    pub fn parse_from_string(&self, content: &str, env: &Environment) -> Result<Document> {
127        let frontmatter = parse_frontmatter(content)?;
128
129        let mut html_output = String::new();
130        let parser = Parser::new_ext(content, self.options);
131
132        let mut codeblock = None;
133
134        let mut current_heading = None;
135        let mut headings = Vec::new();
136
137        let mut character_count = 0;
138        let mut summary_status = Summary::Incomplete;
139        let mut summary_events = Vec::new();
140
141        let mut in_frontmatter = false;
142
143        let mut in_shortcode = false;
144        let mut current_shortcode = String::new();
145
146        let parser = parser.filter_map(|event| -> Option<Event<'_>> {
147            // If there are currently less than 150 characters of text that have been parsed, add the
148            // node to the summary. Additionally, make sure that the summary doesn't include unclosed tags and the like.
149            if character_count >= 150 && !matches!(summary_status, Summary::Complete) {
150                summary_status = Summary::FinalElement;
151            }
152
153            let e = match event {
154                // TODO: Highlight line by line.
155                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) => {
156                    let lang = lang.trim();
157                    let begin_html =
158                        format!("<pre lang=\"{lang}\"><code class=\"language-{lang}\">");
159                    codeblock = Some(CodeBlock::new(lang.into()));
160                    Some(Event::Html(begin_html.into()))
161                }
162                Event::End(TagEnd::CodeBlock) => {
163                    if let Some(cb) = &codeblock {
164                        let syntax = self
165                            .syntax_set
166                            .find_syntax_by_extension(&cb.lang)
167                            .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text());
168                        let mut html = highlighted_html_for_string(
169                            &cb.text,
170                            &self.syntax_set,
171                            syntax,
172                            &self.theme,
173                        )
174                        .ok()?;
175
176                        codeblock = None;
177
178                        html.push_str("</code></pre>\n");
179                        Some(Event::Html(html.into()))
180                    } else {
181                        None
182                    }
183                }
184                Event::Start(Tag::Heading {
185                    level: HeadingLevel::H2,
186                    ref id,
187                    ..
188                }) => {
189                    current_heading = Some(TOCHeading::new(
190                        id.as_ref().map(std::string::ToString::to_string),
191                        String::new(),
192                    ));
193                    None
194                }
195                Event::End(TagEnd::Heading(HeadingLevel::H2)) => {
196                    let heading = current_heading.take().expect("Heading end before start?");
197                    let html = heading.to_html();
198                    headings.push(heading);
199
200                    Some(Event::Html(html.into()))
201                }
202                Event::Start(Tag::MetadataBlock(_)) => {
203                    in_frontmatter = true;
204                    Some(event)
205                }
206                Event::End(TagEnd::MetadataBlock(_)) => {
207                    in_frontmatter = false;
208                    Some(event)
209                }
210                Event::Text(ref t) => {
211                    if t.contains("{{!") && !t.contains("{{! end !}}") {
212                        in_shortcode = true;
213                    }
214
215                    let shortcode_event = if t.contains("{{! end !}}") {
216                        assert!(in_shortcode, "Stray shortcode closing tag.");
217
218                        current_shortcode.push_str(t);
219                        in_shortcode = false;
220                        let evaluated = evaluate_all_shortcodes(&current_shortcode, env, self)
221                            .expect("Error while parsing shortcodes.");
222                        current_shortcode.clear();
223
224                        Some(Event::Html(evaluated.into()))
225                    } else {
226                        None
227                    };
228
229                    let text = if let Some(Event::Html(ref html)) = shortcode_event {
230                        html
231                    } else {
232                        t
233                    };
234
235                    if in_shortcode {
236                        current_shortcode.push_str(text);
237                        None
238                    } else if let Some(cb) = &mut codeblock {
239                        cb.text.push_str(text);
240                        None
241                    } else if let Some(h) = &mut current_heading {
242                        h.text.push_str(text);
243                        None
244                    } else {
245                        if !in_frontmatter {
246                            character_count += text.len();
247                        }
248
249                        Some(shortcode_event.unwrap_or(event))
250                    }
251                }
252                Event::Code(ref s)
253                | Event::InlineMath(ref s)
254                | Event::DisplayMath(ref s)
255                | Event::InlineHtml(ref s) => {
256                    if let Some(h) = &mut current_heading {
257                        h.text.push_str(s);
258                        return None;
259                    }
260                    Some(event)
261                }
262                _ => Some(event),
263            };
264
265            match summary_status {
266                Summary::Incomplete => summary_events.push(e.clone()),
267                Summary::FinalElement => {
268                    summary_events.push(e.clone());
269                    if matches!(e, Some(Event::End(_))) {
270                        summary_status = Summary::Complete;
271                    }
272                }
273                Summary::Complete => (),
274            }
275
276            e
277        });
278
279        push_html(&mut html_output, parser);
280
281        let mut summary = String::new();
282        push_html(&mut summary, summary_events.into_iter().flatten());
283
284        // Extract dates from frontmatter
285        let date = frontmatter.date.as_ref().map_or(
286            Ok::<DateTime<Utc>, color_eyre::Report>(Utc::now()),
287            |d| {
288                let parsed = d.parse::<NaiveDateTime>()?;
289                Ok(Utc.from_utc_datetime(&parsed))
290            },
291        )?;
292
293        let updated = frontmatter.updated.as_ref().map_or(
294            Ok::<DateTime<Utc>, color_eyre::Report>(date),
295            |d| {
296                let parsed = d.parse::<NaiveDateTime>()?;
297                Ok(Utc.from_utc_datetime(&parsed))
298            },
299        )?;
300
301        Ok(Document {
302            date,
303            updated,
304            content: html_output,
305            toc: headings,
306            summary,
307            frontmatter,
308        })
309    }
310
311    /// Render a one-off string to markdown. Doesn't create a `Document`.
312    pub fn render_one_off(&self, content: &str) -> String {
313        let mut html_output = String::new();
314        let parser = Parser::new_ext(content, self.options);
315        push_html(&mut html_output, parser);
316        html_output
317    }
318}
319
320fn parse_frontmatter(content: &str) -> Result<Frontmatter> {
321    let mut opening_delim = false;
322    let mut frontmatter_content = String::new();
323
324    for line in content.lines() {
325        if line.trim() == "---" {
326            if opening_delim {
327                break;
328            }
329
330            opening_delim = true;
331            continue;
332        }
333
334        frontmatter_content.push_str(line);
335        frontmatter_content.push('\n');
336    }
337
338    let frontmatter = toml::from_str(&frontmatter_content)?;
339    Ok(frontmatter)
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    fn get_date() -> Result<DateTime<Utc>> {
347        let date = NaiveDateTime::parse_from_str("2025-01-01T6:00:00", "%Y-%m-%dT%H:%M:%S")?;
348        Ok(Utc.from_utc_datetime(&date))
349    }
350
351    #[test]
352    fn test_render_markdown() -> Result<()> {
353        let content = r#"
354---
355title = "Test"
356tags = ["a", "b", "c"]
357---
358
359Hello World
360        "#;
361
362        let document = MarkdownRenderer::new::<&str>(None, None)?
363            .parse_from_string(content, &Environment::empty())?;
364        insta::assert_yaml_snapshot!(document, {
365            ".date" => get_date().unwrap().to_string(),
366            ".updated" => get_date().unwrap().to_string()
367        });
368
369        Ok(())
370    }
371
372    #[test]
373    fn test_summary() -> Result<()> {
374        let content = r#"
375---
376title = "Test"
377tags = ["a", "b", "c"]
378---
379Day 2 was pretty straightforward, and there isn't all that much I want to say about it, so I'll get straight to the problem.
380
381# Part 1
382
383The puzzle gives us an input that consists of rows of reports, each of which is made up of a list of levels, which are just numbers.
384
385# Part 2
386
387hello world
388        "#;
389
390        let document = MarkdownRenderer::new::<&str>(None, None)?
391            .parse_from_string(content, &Environment::empty())?;
392        insta::assert_yaml_snapshot!(document, {
393            ".date" => get_date().unwrap().to_string(),
394            ".updated" => get_date().unwrap().to_string()
395        });
396        Ok(())
397    }
398
399    #[test]
400    fn test_toc() -> Result<()> {
401        let content = r#"
402---
403title = "Test"
404tags = ["a", "b", "c"]
405---
406
407Hello World
408
409## Part 1
410
411Some Content
412
413## Part 2
414
415Some More Content
416
417## Part 3 {#part3}
418
419Even More Content
420
421        "#;
422
423        let document = MarkdownRenderer::new::<&str>(None, None)?
424            .parse_from_string(content, &Environment::empty())?;
425        insta::assert_yaml_snapshot!(document, {
426            ".date" => get_date().unwrap().to_string(),
427            ".updated" => get_date().unwrap().to_string()
428        });
429        Ok(())
430    }
431
432    #[test]
433    fn test_frontmatter() -> Result<()> {
434        let content = r#"
435---
436title = "Test"
437tags = ["a", "b", "c"]
438template = "foo.html"
439date = "2025-01-01T6:00:00"
440updated = "2025-03-12T8:00:00"
441slug = "some-slug"
442draft = true
443
444[series]
445part = 3
446---
447
448Lorem ipsum dolor sit amet, consectetur adipiscing elit. 
449Suspendisse ut mattis felis. Mauris sed ex vitae est pharetra 
450scelerisque. Ut ut sem arcu. Morbi molestie dictum venenatis. 
451Quisque sit amet consequat libero. Cras id tellus diam. 
452
453Cras pulvinar tristique nisl vel porttitor. Fusce enim magna, porta 
454sed nisl non, dignissim ultrices massa. Sed ultrices tempus dolor sit 
455amet fringilla. Proin at mauris porta, efficitur magna sit amet, 
456rutrum elit. In efficitur vitae erat id scelerisque. Cras laoreet 
457elit eu neque condimentum auctor. Lorem ipsum dolor sit amet, 
458consectetur adipiscing elit. Vivamus nec auctor neque, at 
459consectetur velit. Maecenas at massa ante.
460
461        "#;
462
463        let document = MarkdownRenderer::new::<&str>(None, None)?
464            .parse_from_string(content, &Environment::empty())?;
465        insta::assert_yaml_snapshot!(document);
466        Ok(())
467    }
468
469    #[test]
470    fn test_codeblock() -> Result<()> {
471        let content = r#"
472---
473title = "Test"
474tags = ["a", "b", "c"]
475---
476
477```py
478print("Hello World")
479if __name__ == "__main__":
480    print("yay")
481```        "#;
482
483        let document = MarkdownRenderer::new::<&str>(None, None)?
484            .parse_from_string(content, &Environment::empty())?;
485        insta::assert_yaml_snapshot!(document, {
486            ".date" => get_date().unwrap().to_string(),
487            ".updated" => get_date().unwrap().to_string()
488        });
489
490        Ok(())
491    }
492
493    #[test]
494    fn test_with_shortcode() -> Result<()> {
495        let content = r#"
496---
497title = "Test"
498tags = ["a", "b", "c"]
499---
500
501# Hello World
502
503{{! note !}}
504this is a note!
505{{! end !}}
506
507This is some more text.
508
509{{! fancy(title="testing") !}}
510this is a note!
511{{! end !}}
512       "#;
513
514        let note_str = r#"
515<div class="note">
516{{ body }}
517</div>
518        "#;
519        let fancy_str = r#"
520<div class="fancy">
521<h1> {{ arguments.title }} </h1>
522{{ body }}
523</div>
524        "#;
525
526        let mut env = Environment::new();
527        env.add_template("note.html", note_str)?;
528        env.add_template("fancy.html", fancy_str)?;
529
530        let document =
531            MarkdownRenderer::new::<&str>(None, None)?.parse_from_string(content, &env)?;
532        insta::assert_yaml_snapshot!(document, {
533            ".date" => get_date().unwrap().to_string(),
534            ".updated" => get_date().unwrap().to_string()
535        });
536
537        Ok(())
538    }
539}