mdpdf-core 0.0.0-alpha.0

Markdown parsing, frontmatter extraction, and syntax highlighting for mdpdf
Documentation
use std::collections::HashMap;
use std::io::{self, Write};

use comrak::adapters::SyntaxHighlighterAdapter;
use syntect::highlighting::ThemeSet;
use syntect::html::{styled_line_to_highlighted_html, IncludeBackground};
use syntect::parsing::SyntaxSet;
use syntect::easy::HighlightLines;
use syntect::util::LinesWithEndings;

/// Syntax highlighter backed by syntect, implementing comrak's adapter trait.
pub struct SyntectHighlighter {
    syntax_set: SyntaxSet,
    theme_name: String,
    theme_set: ThemeSet,
}

impl SyntectHighlighter {
    pub fn new(theme_name: &str) -> Self {
        Self {
            syntax_set: SyntaxSet::load_defaults_newlines(),
            theme_name: theme_name.to_owned(),
            theme_set: ThemeSet::load_defaults(),
        }
    }

    fn find_syntax(&self, lang: &str) -> Option<&syntect::parsing::SyntaxReference> {
        // Try exact match first
        self.syntax_set.find_syntax_by_token(lang)
            // Common aliases
            .or_else(|| match lang {
                "tsx" | "typescriptreact" => self.syntax_set.find_syntax_by_token("typescript"),
                "jsx" => self.syntax_set.find_syntax_by_token("javascript"),
                "sh" | "shell" | "zsh" => self.syntax_set.find_syntax_by_token("bash"),
                "yml" => self.syntax_set.find_syntax_by_token("yaml"),
                "wit" => self.syntax_set.find_syntax_by_token("rust"),
                _ => None,
            })
    }
}

impl SyntaxHighlighterAdapter for SyntectHighlighter {
    fn write_highlighted(
        &self,
        output: &mut dyn Write,
        lang: Option<&str>,
        code: &str,
    ) -> io::Result<()> {
        let syntax = lang
            .and_then(|l| self.find_syntax(l))
            .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text());

        let theme = match self.theme_set.themes.get(&self.theme_name) {
            Some(t) => t,
            None => {
                // Fallback: write plain text
                return output.write_all(code.as_bytes());
            }
        };

        let mut highlighter = HighlightLines::new(syntax, theme);

        for line in LinesWithEndings::from(code) {
            let ranges = highlighter
                .highlight_line(line, &self.syntax_set)
                .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
            let html = styled_line_to_highlighted_html(&ranges[..], IncludeBackground::No)
                .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
            output.write_all(html.as_bytes())?;
        }

        Ok(())
    }

    fn write_pre_tag(
        &self,
        output: &mut dyn Write,
        attributes: HashMap<String, String>,
    ) -> io::Result<()> {
        // Emit extra attributes (e.g., data-lang) if present
        let mut attrs = String::new();
        for (k, v) in &attributes {
            attrs.push_str(&format!(" {}=\"{}\"", k, v));
        }
        write!(output, "<pre class=\"code-block\"{attrs}>")
    }

    fn write_code_tag(
        &self,
        output: &mut dyn Write,
        _attributes: HashMap<String, String>,
    ) -> io::Result<()> {
        write!(output, "<code>")
    }
}