Skip to main content

markdown_it/plugins/extra/
syntect.rs

1//! Syntax highlighting for code blocks using `syntect`
2//!
3//! This plugin will highlight indented code blocks and fenced code blocks.
4//! Fenced block will read the first token as language, for example `` ```rust ``.
5//! unknown language and indented code blocks will be rendered as plain text.
6//!
7//! This plugin use `InspiredGitHub` theme and render inline styles by default.
8//! Use [`set_theme`] to select another built-in syntect theme.
9//! It will panic when get an unknown theme.
10//! Use [`available_themes`] to view all available themes.
11//!
12//! Use [`set_to_classed`] or [`set_to_classed_with_prefix`] to switch classed mode.
13//! In this mode, you need to provide yourself styles.
14//! You can also use [`theme_css`] get the CSS for selected built-in theme.
15//! In inline mode, it will return `None`.
16//!
17//! Fenced code blocks can mark highlighted lines with a `{...}` line spec in the info string,
18//! such as ` ```rust {1, 3-5} `.
19//! Line number started with 1.
20//!
21//! ```rust
22//! let mut md = markdown_it::MarkdownIt::new();
23//! markdown_it::plugins::cmark::add(&mut md);
24//! markdown_it::plugins::extra::syntect::add(&mut md);
25//! markdown_it::plugins::extra::syntect::set_theme(&mut md, "base16-ocean.dark");
26//!
27//! let html = md.parse("```rust\nfn main() {}\n```").render();
28//! assert!(html.contains(r#"class="language-rust""#));
29//! ```
30
31use std::collections::HashSet;
32
33use syntect::easy::HighlightLines;
34use syntect::highlighting::{Theme, ThemeSet};
35use syntect::html::{
36    append_highlighted_html_for_styled_line,
37    css_for_theme_with_class_style,
38    line_tokens_to_classed_spans,
39    ClassStyle,
40    IncludeBackground,
41};
42use syntect::parsing::{ParseState, Scope, ScopeStack, SyntaxReference, SyntaxSet};
43use syntect::util::LinesWithEndings;
44
45use crate::common::utils::{escape_html, unescape_all};
46use crate::parser::core::CoreRule;
47use crate::parser::extset::MarkdownItExt;
48use crate::plugins::cmark::block::code::CodeBlock;
49use crate::plugins::cmark::block::fence::CodeFence;
50use crate::{MarkdownIt, Node, NodeValue, Renderer};
51
52// --- render ---
53
54/// Rendered HTML produced by the syntect plugin.
55///
56/// This node will replace parsed code block nodes.
57/// Its `html` field is emitted as raw HTML during rendering.
58#[derive(Debug)]
59pub struct SyntectSnippet {
60    /// Highlighted HTML
61    pub html: String,
62}
63
64impl NodeValue for SyntectSnippet {
65    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
66        fmt.text_raw(&self.html);
67    }
68}
69
70// --- setting ---
71
72#[derive(Debug, Clone, Copy)]
73enum SyntectMode {
74    Inline,
75    Classed,
76}
77
78#[derive(Debug, Clone)]
79struct SyntectSettings {
80    theme: String,
81    mode: SyntectMode,
82    prefix: &'static str,
83}
84
85impl MarkdownItExt for SyntectSettings {}
86
87impl Default for SyntectSettings {
88    fn default() -> Self {
89        Self {
90            theme: "InspiredGitHub".to_owned(),
91            mode: SyntectMode::Inline,
92            prefix: "syntect-",
93        }
94    }
95}
96
97struct FenceMeta {
98    language: Option<String>,
99    // highlight some lines
100    // it looks like:
101    //
102    // ```rust {1, 3-4}
103    // fn main() {
104    //     print!("Hello world!");
105    //     Ok(())
106    // }
107    // ```
108    highlighted_lines: HashSet<usize>,
109}
110
111impl FenceMeta {
112    // parse "{1, 4-7}" -> Set[1, 4, 5, 6, 7]
113    fn parse_line_spec(spec: &str) -> HashSet<usize> {
114        let mut lines = HashSet::new();
115        for item in spec.split(',').map(str::trim).filter(|s| !s.is_empty()) {
116            if let Some((start, end)) = item.split_once('-') {
117                // parse "4-7"
118                if let (Ok(start), Ok(end)) = (start.parse::<usize>(), end.parse::<usize>()) {
119                    if start <= end {
120                        lines.extend(start..=end);
121                    }
122                }
123            } else if let Ok(line) = item.parse::<usize>() {
124                // parse "1"
125                lines.insert(line);
126            }
127        }
128
129        lines
130    }
131
132    // parse "rust{1, 3}rs" -> "{1, 3}"
133    fn extract_highlight_spec(info: &str) -> Option<&str> {
134        let start = info.find('{')?;
135        let rest = &info[start + 1..];
136        let end = rest.find('}')?;
137        Some(&rest[..end])
138    }
139
140    fn parse_fence_meta(data: &CodeFence) -> FenceMeta {
141        // ```rust {1,3-5}   <-- CodeFence.info
142        let info = unescape_all(&data.info);
143        let trimmed = info.trim();
144
145        let mut parts = trimmed.splitn(2, |c: char| c.is_whitespace());
146        let first_part = parts.next().unwrap_or("");
147        let rest_part = parts.next().unwrap_or("");
148        let (language, meta_part) = if first_part.starts_with('{') || first_part.is_empty() {
149            // not any language provide
150            (None, trimmed)
151        } else if let Some(highlight_start) = first_part.find('{') {
152            // support attached line specs such as ```rust{1,3}
153            (
154                Some(first_part[..highlight_start].to_string()),
155                &first_part[highlight_start..],
156            )
157        } else {
158            // language + other mark
159            (Some(first_part.to_string()), rest_part)
160        };
161
162        let highlighted_lines = Self::extract_highlight_spec(meta_part)
163            .map(Self::parse_line_spec)
164            .unwrap_or_default();
165
166        FenceMeta {
167            language,
168            highlighted_lines,
169        }
170    }
171}
172
173// --- behavior ---
174
175/// Replaces code blocks with syntect highlighted HTML.
176pub struct SyntectRule;
177
178impl CoreRule for SyntectRule {
179    fn run(root: &mut Node, md: &MarkdownIt) {
180        let ss = SyntaxSet::load_defaults_newlines();
181        let ts = ThemeSet::load_defaults();
182        let settings = load_syntect_settings(md);
183        // why panic here? avoid change original behavior
184        // `let theme = &ts.themes[md.ext.get::<SyntectSettings>().copied().unwrap_or_default().0];`
185        let theme = resolve_theme(&ts, &settings)
186            .unwrap_or_else(|| panic!("unknown syntect theme: {}", settings.theme));
187
188        root.walk_mut(|node, _| {
189            let mut content = None;
190            let mut language = None::<String>;
191            let mut lang_prefix = None::<&'static str>;
192            let mut highlighted_lines = HashSet::new();
193
194            if let Some(data) = node.cast::<CodeBlock>() {
195                content = Some(data.content.as_str());
196            } else if let Some(data) = node.cast::<CodeFence>() {
197                let meta = FenceMeta::parse_fence_meta(data);
198                language = meta.language;
199                highlighted_lines = meta.highlighted_lines;
200                content = Some(data.content.as_str());
201                lang_prefix = Some(data.lang_prefix);
202            }
203
204            if let Some(content) = content {
205                let syntax = language
206                    .as_deref()
207                    .and_then(|lang| ss.find_syntax_by_token(lang))
208                    .unwrap_or_else(|| ss.find_syntax_plain_text());
209
210                let html = match settings.mode {
211                    SyntectMode::Inline => render_inline_html(
212                        content,
213                        &ss,
214                        syntax,
215                        theme,
216                        language.as_deref(),
217                        lang_prefix.unwrap_or("language-"),
218                        settings.prefix,
219                        &highlighted_lines,
220                    ),
221                    SyntectMode::Classed => render_classed_html(
222                        content,
223                        &ss,
224                        syntax,
225                        language.as_deref(),
226                        lang_prefix.unwrap_or("language-"),
227                        settings.prefix,
228                        &highlighted_lines,
229                    ),
230                };
231
232                if let Some(html) = html {
233                    node.replace(SyntectSnippet { html });
234                }
235            }
236        });
237    }
238}
239
240// --- public method ---
241
242/// Add the syntect highlighting rule.
243///
244/// The rule will replace [`CodeBlock`] and [`CodeFence`] nodes with syntect rendered HTML snippets.
245pub fn add(md: &mut MarkdownIt) {
246    md.add_rule::<SyntectRule>();
247}
248
249/// Return the names of all built-in syntect themes available to this plugin.
250pub fn available_themes() -> Vec<String> {
251    let ts = ThemeSet::load_defaults();
252    let mut themes: Vec<_> = ts.themes.keys().cloned().collect();
253    themes.sort();
254    themes
255}
256
257/// Set the theme used for syntax highlighting.
258///
259/// The names should match one of returned by [`available_themes`].
260/// If not, it will panic.
261pub fn set_theme(md: &mut MarkdownIt, theme: impl Into<String>) {
262    update_syntect_settings(md, |settings| settings.theme = theme.into());
263}
264
265/// switch to stylesheet-based highlighting mode with default `syntect-` prefix.
266///
267/// In this mode, rendered code will use CSS class instead of inline styles.
268/// Use [`theme_css`] to generate CSS for the selected theme.
269///
270/// ```rust
271/// let mut md = markdown_it::MarkdownIt::new();
272/// markdown_it::plugins::cmark::add(&mut md);
273/// markdown_it::plugins::extra::syntect::add(&mut md);
274/// markdown_it::plugins::extra::syntect::set_to_classed(&mut md);
275///
276/// let css = markdown_it::plugins::extra::syntect::theme_css(&mut md);
277/// assert!(css.is_some())
278/// ```
279pub fn set_to_classed(md: &mut MarkdownIt) {
280    set_to_classed_with_prefix(md, "syntect-");
281}
282
283/// Switch to stylesheet-based highlighting with a custom class prefix.
284pub fn set_to_classed_with_prefix(md: &mut MarkdownIt, prefix: &'static str) {
285    update_syntect_settings(md, |settings| {
286        settings.mode = SyntectMode::Classed;
287        settings.prefix = prefix;
288    });
289}
290
291/// Set the class prefix used for line highlighting and classed mode.
292pub fn set_prefix(md: &mut MarkdownIt, prefix: &'static str) {
293    update_syntect_settings(md, |settings| settings.prefix = prefix);
294}
295
296/// Generate CSS for selected built-in theme
297///
298/// # Panics
299///
300/// Panics if the configured theme not found in built-in themes
301pub fn theme_css(md: &MarkdownIt) -> Option<String> {
302    let ts = ThemeSet::load_defaults();
303    let settings = load_syntect_settings(md);
304    let theme = resolve_theme(&ts, &settings)
305        .unwrap_or_else(|| panic!("unknown syntect theme: {}", settings.theme));
306
307    match settings.mode {
308        SyntectMode::Inline => None,
309        SyntectMode::Classed => css_for_theme_with_class_style(
310            theme,
311            ClassStyle::SpacedPrefixed {
312                prefix: settings.prefix,
313            },
314        )
315        .ok(),
316    }
317}
318
319// --- helper method ---
320
321fn load_syntect_settings(md: &MarkdownIt) -> SyntectSettings {
322    md.ext.get::<SyntectSettings>().cloned().unwrap_or_default()
323}
324
325fn update_syntect_settings(md: &mut MarkdownIt, f: impl FnOnce(&mut SyntectSettings)) {
326    let mut settings = md.ext.remove::<SyntectSettings>().unwrap_or_default();
327    f(&mut settings);
328    md.ext.insert(settings);
329}
330
331fn resolve_theme<'a>(themes: &'a ThemeSet, settings: &SyntectSettings) -> Option<&'a Theme> {
332    themes.themes.get(settings.theme.as_str())
333}
334
335fn render_inline_html(
336    content: &str,
337    ss: &SyntaxSet,
338    syntax: &SyntaxReference,
339    theme: &Theme,
340    language: Option<&str>,
341    lang_prefix: &'static str,
342    prefix: &'static str,
343    highlight_lines: &HashSet<usize>,
344) -> Option<String> {
345    let mut highlighter = HighlightLines::new(syntax, theme);
346    let bg = theme
347        .settings
348        .background
349        .unwrap_or(syntect::highlighting::Color::WHITE);
350    let mut class_attr = String::new();
351    if let Some(lang) = language {
352        if !lang.is_empty() {
353            class_attr.push_str(lang_prefix);
354            class_attr.push_str(lang);
355        }
356    }
357
358    // it look like `<pre><code>` or `<pre><code class="language-{lang}">`
359    let mut html = String::from("<pre><code");
360    if !class_attr.is_empty() {
361        html.push_str(" class=\"");
362        html.push_str(&escape_html(&class_attr));
363        html.push('"');
364    }
365    html.push('>');
366
367    // it looks like `<span class="syntect-line [syntect-line-highlighted]" style="...">{code}</span>`
368    for (idx, line) in LinesWithEndings::from(content).enumerate() {
369        let line_no = idx + 1;
370        let regions = highlighter.highlight_line(line, ss).ok()?;
371
372        // use syntect process code
373        let mut line_html = String::new();
374        append_highlighted_html_for_styled_line(
375            &regions[..],
376            IncludeBackground::IfDifferent(bg),
377            &mut line_html,
378        )
379        .ok()?;
380
381        // splicing HTML
382        html.push_str("<span class=\"");
383        html.push_str(prefix);
384        html.push_str("line");
385        if highlight_lines.contains(&line_no) {
386            // mark as highlighted line. you may need to add styles to this class yourself
387            html.push_str(" ");
388            html.push_str(prefix);
389            html.push_str("line-highlighted");
390        }
391        html.push_str("\">");
392        html.push_str(&line_html);
393        html.push_str("</span>");
394    }
395
396    // close html
397    html.push_str("</code></pre>");
398
399    Some(html)
400}
401
402fn render_classed_html(
403    content: &str,
404    ss: &SyntaxSet,
405    syntax: &SyntaxReference,
406    language: Option<&str>,
407    lang_prefix: &'static str,
408    prefix: &'static str,
409    highlighted_lines: &HashSet<usize>,
410) -> Option<String> {
411    let mut parse_state = ParseState::new(syntax);
412    let mut scope_stack = ScopeStack::new();
413
414    let mut class_attr = format!("{prefix}code");
415    if let Some(lang) = language {
416        if !lang.is_empty() {
417            class_attr.push(' ');
418            class_attr.push_str(lang_prefix);
419            class_attr.push_str(lang);
420        }
421    }
422
423    // splicing HTML
424    // head, it looks like `<pre><code class="syntect-code language-rust">`
425    let mut html = String::from("<pre><code class=\"");
426    html.push_str(&escape_html(&class_attr));
427    html.push_str("\">");
428
429    for (idx, line) in LinesWithEndings::from(content).enumerate() {
430        let line_no = idx + 1;
431        let active_scopes = scope_stack.scopes.clone();
432
433        // it looks like `<span class="syntect-line [syntect-line-highlighted]">`
434        html.push_str("<span class=\"");
435        html.push_str(prefix);
436        html.push_str("line");
437        if highlighted_lines.contains(&line_no) {
438            html.push(' ');
439            html.push_str(prefix);
440            html.push_str("line-highlighted");
441        }
442        html.push_str("\">");
443
444        // too complex here
445
446        // reopen the scope
447        reopen_scopes(&mut html, &active_scopes, prefix);
448
449        // use syntect process the line
450        let ops = parse_state.parse_line(line, ss).ok()?;
451        let (line_html, _) = line_tokens_to_classed_spans(
452            line,
453            ops.as_slice(),
454            ClassStyle::SpacedPrefixed { prefix },
455            &mut scope_stack,
456        )
457        .ok()?;
458        html.push_str(&line_html);
459
460        // close all scope <span>
461        close_n_spans(&mut html, scope_stack.scopes.len());
462
463        // close the <span> we added
464        html.push_str("</span>");
465    }
466
467    // close
468    html.push_str("</code></pre>");
469
470    Some(html)
471}
472
473fn reopen_scopes(html: &mut String, scopes: &[Scope], prefix: &'static str) {
474    for &scope in scopes {
475        html.push_str("<span class=\"");
476        push_scope_classes(html, scope, prefix);
477        html.push_str("\">");
478    }
479}
480
481fn close_n_spans(html: &mut String, count: usize) {
482    for _ in 0..count {
483        html.push_str("</span>");
484    }
485}
486
487fn push_scope_classes(html: &mut String, scope: Scope, prefix: &'static str) {
488    let scope_text = scope.to_string();
489    for (idx, atom) in scope_text.split('.').enumerate() {
490        if idx != 0 {
491            html.push(' ');
492        }
493        html.push_str(prefix);
494        html.push_str(atom);
495    }
496}