Skip to main content

dioxus_mdx/parser/
syntax.rs

1//! Syntax highlighting for code blocks using syntect.
2//!
3//! Generates HTML with CSS classes for code syntax highlighting.
4//! Token colors are defined via CSS custom properties so they adapt
5//! to both light and dark DaisyUI themes.
6
7use std::sync::LazyLock;
8use syntect::html::{ClassStyle, ClassedHTMLGenerator};
9use syntect::parsing::SyntaxSet;
10use syntect::util::LinesWithEndings;
11
12/// Lazily loaded syntax set with default syntaxes.
13static SYNTAX_SET: LazyLock<SyntaxSet> = LazyLock::new(SyntaxSet::load_defaults_newlines);
14
15/// Apply syntax highlighting to code.
16///
17/// Returns HTML string with CSS classes for syntax highlighting.
18/// Token spans use classes like `sy-keyword`, `sy-string`, etc.
19/// that are styled via CSS custom properties (see `syntax_highlight_css()`).
20/// Falls back to plain escaped code if highlighting fails.
21pub fn highlight_code(code: &str, language: Option<&str>) -> String {
22    let lang = language.unwrap_or("txt");
23
24    // Map common language aliases
25    let syntax_name = map_language(lang);
26
27    // Find syntax definition
28    let syntax = SYNTAX_SET
29        .find_syntax_by_extension(syntax_name)
30        .or_else(|| SYNTAX_SET.find_syntax_by_name(syntax_name))
31        .or_else(|| SYNTAX_SET.find_syntax_by_extension(lang))
32        .or_else(|| SYNTAX_SET.find_syntax_by_name(lang))
33        .unwrap_or_else(|| SYNTAX_SET.find_syntax_plain_text());
34
35    // Use ClassedHTMLGenerator to emit CSS classes instead of inline styles.
36    // This lets us control colors via CSS custom properties that adapt to the
37    // active DaisyUI theme.
38    let mut generator = ClassedHTMLGenerator::new_with_class_style(
39        syntax,
40        &SYNTAX_SET,
41        ClassStyle::SpacedPrefixed { prefix: "sy-" },
42    );
43
44    for line in LinesWithEndings::from(code) {
45        if generator
46            .parse_html_for_line_which_includes_newline(line)
47            .is_err()
48        {
49            return escape_html(code);
50        }
51    }
52
53    generator.finalize()
54}
55
56/// Returns CSS content (without `<style>` tags) for syntax highlighting.
57///
58/// Defines CSS custom properties for token colors under two selectors:
59/// - `[data-theme="dark"]` — colors for dark backgrounds
60/// - `[data-theme="light"]` — colors for light backgrounds
61///
62/// Inject this once via `document::Style` in your layout component.
63pub fn syntax_highlight_css() -> &'static str {
64    include_str!("syntax_highlight.css")
65}
66
67/// Map common language aliases to syntect syntax names.
68/// Returns a static string if there's a known mapping, otherwise returns the original.
69fn map_language(lang: &str) -> &str {
70    // Use case-insensitive matching via eq_ignore_ascii_case
71    // JavaScript variants
72    if lang.eq_ignore_ascii_case("js") || lang.eq_ignore_ascii_case("javascript") {
73        return "JavaScript";
74    }
75    if lang.eq_ignore_ascii_case("jsx") {
76        return "JavaScript (JSX)";
77    }
78    if lang.eq_ignore_ascii_case("ts") || lang.eq_ignore_ascii_case("typescript") {
79        return "TypeScript";
80    }
81    if lang.eq_ignore_ascii_case("tsx") {
82        return "TypeScript (TSX)";
83    }
84
85    // Shell variants
86    if lang.eq_ignore_ascii_case("sh")
87        || lang.eq_ignore_ascii_case("bash")
88        || lang.eq_ignore_ascii_case("shell")
89        || lang.eq_ignore_ascii_case("zsh")
90    {
91        return "Bash";
92    }
93
94    // Rust
95    if lang.eq_ignore_ascii_case("rs") || lang.eq_ignore_ascii_case("rust") {
96        return "Rust";
97    }
98
99    // Python
100    if lang.eq_ignore_ascii_case("py") || lang.eq_ignore_ascii_case("python") {
101        return "Python";
102    }
103
104    // Ruby
105    if lang.eq_ignore_ascii_case("rb") || lang.eq_ignore_ascii_case("ruby") {
106        return "Ruby";
107    }
108
109    // Go
110    if lang.eq_ignore_ascii_case("go") || lang.eq_ignore_ascii_case("golang") {
111        return "Go";
112    }
113
114    // JSON
115    if lang.eq_ignore_ascii_case("json") || lang.eq_ignore_ascii_case("jsonc") {
116        return "JSON";
117    }
118
119    // YAML
120    if lang.eq_ignore_ascii_case("yml") || lang.eq_ignore_ascii_case("yaml") {
121        return "YAML";
122    }
123
124    // HTML/CSS
125    if lang.eq_ignore_ascii_case("html") || lang.eq_ignore_ascii_case("htm") {
126        return "HTML";
127    }
128    if lang.eq_ignore_ascii_case("css") {
129        return "CSS";
130    }
131    if lang.eq_ignore_ascii_case("scss") {
132        return "SCSS";
133    }
134    if lang.eq_ignore_ascii_case("sass") {
135        return "Sass";
136    }
137
138    // Config files
139    if lang.eq_ignore_ascii_case("toml") {
140        return "TOML";
141    }
142    if lang.eq_ignore_ascii_case("ini") {
143        return "INI";
144    }
145    if lang.eq_ignore_ascii_case("env") {
146        return "Bourne Again Shell (bash)";
147    }
148
149    // Markdown
150    if lang.eq_ignore_ascii_case("md") || lang.eq_ignore_ascii_case("markdown") {
151        return "Markdown";
152    }
153
154    // SQL
155    if lang.eq_ignore_ascii_case("sql") {
156        return "SQL";
157    }
158
159    // C/C++
160    if lang.eq_ignore_ascii_case("c") || lang.eq_ignore_ascii_case("h") {
161        return "C";
162    }
163    if lang.eq_ignore_ascii_case("cpp")
164        || lang.eq_ignore_ascii_case("cc")
165        || lang.eq_ignore_ascii_case("cxx")
166        || lang.eq_ignore_ascii_case("hpp")
167    {
168        return "C++";
169    }
170
171    // Java
172    if lang.eq_ignore_ascii_case("java") {
173        return "Java";
174    }
175
176    // C#
177    if lang.eq_ignore_ascii_case("cs") || lang.eq_ignore_ascii_case("csharp") {
178        return "C#";
179    }
180
181    // PHP
182    if lang.eq_ignore_ascii_case("php") {
183        return "PHP";
184    }
185
186    // Swift
187    if lang.eq_ignore_ascii_case("swift") {
188        return "Swift";
189    }
190
191    // Kotlin
192    if lang.eq_ignore_ascii_case("kt") || lang.eq_ignore_ascii_case("kotlin") {
193        return "Kotlin";
194    }
195
196    // Dockerfile
197    if lang.eq_ignore_ascii_case("dockerfile") || lang.eq_ignore_ascii_case("docker") {
198        return "Dockerfile";
199    }
200
201    // Plain text
202    if lang.eq_ignore_ascii_case("txt") || lang.eq_ignore_ascii_case("text") {
203        return "Plain Text";
204    }
205
206    // Default: return the original language string
207    lang
208}
209
210/// Escape HTML special characters.
211fn escape_html(text: &str) -> String {
212    text.replace('&', "&amp;")
213        .replace('<', "&lt;")
214        .replace('>', "&gt;")
215        .replace('"', "&quot;")
216        .replace('\'', "&#39;")
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_highlight_rust() {
225        let code = r#"fn main() {
226    println!("Hello, world!");
227}"#;
228        let html = highlight_code(code, Some("rust"));
229        // Should contain syntax highlighting spans with class attributes
230        assert!(html.contains("<span"));
231        assert!(html.contains("sy-"));
232        assert!(html.contains("fn"));
233    }
234
235    #[test]
236    fn test_highlight_javascript() {
237        let code = "const x = 42;";
238        let html = highlight_code(code, Some("js"));
239        assert!(html.contains("<span"));
240    }
241
242    #[test]
243    fn test_highlight_no_inline_styles() {
244        let code = "let x = 42;";
245        let html = highlight_code(code, Some("rust"));
246        // Should NOT contain inline style attributes
247        assert!(!html.contains("style="));
248    }
249
250    #[test]
251    fn test_highlight_unknown_language() {
252        let code = "some text";
253        let html = highlight_code(code, Some("unknown_lang_xyz"));
254        // Should still return something
255        assert!(!html.is_empty());
256    }
257
258    #[test]
259    fn test_highlight_no_language() {
260        let code = "plain text";
261        let html = highlight_code(code, None);
262        assert!(!html.is_empty());
263    }
264
265    #[test]
266    fn test_escape_html() {
267        assert_eq!(escape_html("<div>"), "&lt;div&gt;");
268        assert_eq!(escape_html("a & b"), "a &amp; b");
269    }
270}