Skip to main content

scrybe_render/
html.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Shawn Hartsock and contributors
3
4//! Markdown-to-HTML rendering with syntax highlighting.
5
6use std::sync::OnceLock;
7
8use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
9use syntect::easy::HighlightLines;
10use syntect::highlighting::ThemeSet;
11use syntect::html::{styled_line_to_highlighted_html, IncludeBackground};
12use syntect::parsing::SyntaxSet;
13
14use scrybe_core::Document;
15
16use crate::math::{extract_math, inject_math};
17use crate::mermaid::inject_mermaid_wrappers;
18use crate::theme::Theme;
19use crate::RenderOutput;
20
21static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
22static THEME_SET: OnceLock<ThemeSet> = OnceLock::new();
23
24fn syntax_set() -> &'static SyntaxSet {
25    SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines)
26}
27
28fn theme_set() -> &'static ThemeSet {
29    THEME_SET.get_or_init(ThemeSet::load_defaults)
30}
31
32/// Renders a [`Document`] to HTML using the given [`Theme`].
33///
34/// The returned [`RenderOutput`] contains:
35/// - `html`: the full fragment with `<style>` prepended.
36/// - `body_html`: just the body content without CSS.
37pub fn render_html(doc: &Document, theme: Theme) -> RenderOutput {
38    // 1. Extract math placeholders before pulldown-cmark sees the source.
39    let (processed_source, math_placeholders) = extract_math(&doc.source);
40
41    // 2. Parse + render with syntax highlighting.
42    let body_html = render_with_highlighting(&processed_source);
43
44    // 3. Re-inject math elements.
45    let body_html = inject_math(&body_html, &math_placeholders);
46
47    // 4. Post-process Mermaid blocks.
48    let body_html = inject_mermaid_wrappers(&body_html);
49
50    // 5. Prepend theme CSS.
51    let html = format!("<style>{}</style>\n{}", theme.css(), body_html);
52
53    RenderOutput { html, body_html }
54}
55
56/// Runs pulldown-cmark with custom syntax-highlighted code blocks.
57fn render_with_highlighting(source: &str) -> String {
58    let opts = Options::all();
59    let parser = Parser::new_ext(source, opts);
60
61    let mut output = String::new();
62    let mut in_code_block = false;
63    let mut current_lang: Option<String> = None;
64    let mut code_buf = String::new();
65
66    for event in parser {
67        match event {
68            Event::Start(Tag::CodeBlock(kind)) => {
69                in_code_block = true;
70                current_lang = match kind {
71                    CodeBlockKind::Fenced(lang) => {
72                        let s = lang.to_string();
73                        if s.is_empty() {
74                            None
75                        } else {
76                            Some(s)
77                        }
78                    }
79                    CodeBlockKind::Indented => None,
80                };
81                code_buf.clear();
82            }
83            Event::End(TagEnd::CodeBlock) => {
84                in_code_block = false;
85                let highlighted = highlight_code(&code_buf, current_lang.as_deref());
86                output.push_str(&highlighted);
87                current_lang = None;
88                code_buf.clear();
89            }
90            Event::Text(text) if in_code_block => {
91                code_buf.push_str(&text);
92            }
93            other => {
94                // For all non-code-block events, let pulldown-cmark render them.
95                let mut fragment = String::new();
96                pulldown_cmark::html::push_html(&mut fragment, std::iter::once(other));
97                output.push_str(&fragment);
98            }
99        }
100    }
101
102    output
103}
104
105/// Produces a highlighted `<pre><code>` block for the given `code` and optional `lang`.
106fn highlight_code(code: &str, lang: Option<&str>) -> String {
107    // Mermaid blocks must not be syntax-highlighted — Mermaid.js needs raw source,
108    // not syntect's span-wrapped output.
109    if lang == Some("mermaid") {
110        let escaped = code
111            .replace('&', "&amp;")
112            .replace('<', "&lt;")
113            .replace('>', "&gt;");
114        return format!(
115            r#"<pre class="code-block"><code class="language-mermaid">{escaped}</code></pre>"#
116        );
117    }
118
119    let ss = syntax_set();
120    let ts = theme_set();
121
122    let syntax = lang
123        .and_then(|l| ss.find_syntax_by_token(l))
124        .unwrap_or_else(|| ss.find_syntax_plain_text());
125
126    let syntect_theme = ts
127        .themes
128        .get("InspiredGitHub")
129        .or_else(|| ts.themes.values().next())
130        .expect("syntect ships at least one theme");
131
132    let mut h = HighlightLines::new(syntax, syntect_theme);
133
134    let lang_class = lang
135        .map(|l| format!(r#" class="language-{l}""#))
136        .unwrap_or_default();
137
138    let mut html = format!(r#"<pre class="code-block"><code{lang_class}>"#);
139
140    for line in syntect::util::LinesWithEndings::from(code) {
141        let ranges = h.highlight_line(line, ss).unwrap_or_default();
142        let highlighted = styled_line_to_highlighted_html(&ranges, IncludeBackground::No)
143            .unwrap_or_else(|_| {
144                // Fallback: HTML-escape the raw line.
145                line.replace('&', "&amp;")
146                    .replace('<', "&lt;")
147                    .replace('>', "&gt;")
148            });
149        html.push_str(&highlighted);
150    }
151
152    html.push_str("</code></pre>\n");
153    html
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    fn doc(src: &str) -> Document {
161        Document::new(src)
162    }
163
164    // --- existing tests ---
165
166    #[test]
167    fn test_render_heading() {
168        let out = render_html(&doc("# Hello Scrybe"), Theme::Default);
169        assert!(out.html.contains("<h1>"));
170        assert!(out.html.contains("Hello Scrybe"));
171    }
172
173    #[test]
174    fn test_render_empty() {
175        let out = render_html(&doc(""), Theme::Default);
176        // body_html should be empty or at least not an error
177        assert!(!out.body_html.contains("Error"));
178    }
179
180    // --- new tests ---
181
182    #[test]
183    fn test_syntax_highlighting_rust() {
184        let md = "```rust\nfn main() {}\n```\n";
185        let out = render_html(&doc(md), Theme::Default);
186        // syntect emits <span> elements
187        assert!(
188            out.body_html.contains("<span"),
189            "expected <span elements from syntect, got: {}",
190            &out.body_html[..out.body_html.len().min(400)]
191        );
192    }
193
194    #[test]
195    fn test_syntax_highlighting_unknown_lang() {
196        // Unknown language must not panic and must produce a code block.
197        let md = "```xyzzy-nonexistent\nsome code\n```\n";
198        let out = render_html(&doc(md), Theme::Default);
199        assert!(out.body_html.contains("some code"));
200    }
201
202    #[test]
203    fn test_math_inline_extracted() {
204        let out = render_html(&doc("Here is $x^2$ inline."), Theme::Default);
205        assert!(
206            out.body_html.contains(r#"class="math-inline""#),
207            "body_html: {}",
208            out.body_html
209        );
210    }
211
212    #[test]
213    fn test_math_block_extracted() {
214        let out = render_html(&doc("$$\\int f$$"), Theme::Default);
215        assert!(
216            out.body_html.contains(r#"class="math-block""#),
217            "body_html: {}",
218            out.body_html
219        );
220    }
221
222    #[test]
223    fn test_mermaid_wrapper() {
224        let md = "```mermaid\ngraph TD; A-->B;\n```\n";
225        let out = render_html(&doc(md), Theme::Default);
226        assert!(
227            out.body_html.contains(r#"class="mermaid""#),
228            "body_html: {}",
229            out.body_html
230        );
231        assert!(!out.body_html.contains("<pre>"));
232    }
233
234    #[test]
235    fn test_theme_css_injected() {
236        let out = render_html(&doc("# hi"), Theme::Default);
237        assert!(
238            out.html.contains("<style>"),
239            "html should contain <style> tag"
240        );
241        // body_html should not contain the style tag
242        assert!(!out.body_html.starts_with("<style>"));
243    }
244}