docgen-core 0.1.0

Core Markdown processing and page model for docgen, the Cargo-only static documentation-site generator
Documentation
use std::sync::OnceLock;

use comrak::nodes::AstNode;
use comrak::options::Plugins;
use comrak::plugins::syntect::{SyntectAdapter, SyntectAdapterBuilder};
use comrak::{format_html_with_plugins, markdown_to_html_with_plugins, Options};

/// CSS class on the highlighted `<pre>` wrapper. Syntect runs in **class-based**
/// mode (`ClassStyle::Spaced`): spans carry space-separated, lowercased TextMate
/// scope atoms (e.g. `keyword`, `string`, `constant numeric`) and the colors live
/// in the shipped `code.css`, theme-aware for both light and dark. There is no
/// embedded syntect theme and no inline `style="color:…"`.
pub const CODE_PRE_CLASS: &str = "docgen-code";

/// The syntect adapter loads/builds syntect's syntax set, which is the single
/// most expensive object in the pipeline. It is immutable and reusable, so build
/// it once and share `&adapter` across every document. Built in class-based mode
/// (`.css()`), so it emits scope-class spans rather than inline-styled ones.
fn syntect_adapter() -> &'static SyntectAdapter {
    static ADAPTER: OnceLock<SyntectAdapter> = OnceLock::new();
    ADAPTER.get_or_init(|| SyntectAdapterBuilder::new().css().build())
}

/// Comrak's class-based adapter wraps highlighted code in
/// `<pre class="syntax-highlighting">`. Rewrite that wrapper class to our
/// canonical `docgen-code` so `code.css` can scope token colors under
/// `.docgen-doc-content pre.docgen-code`. The literal only appears as the
/// highlighter's own wrapper, so a plain replace is safe.
fn rewrite_code_pre_class(html: String) -> String {
    html.replace(
        r#"<pre class="syntax-highlighting">"#,
        &format!(r#"<pre class="{CODE_PRE_CLASS}">"#),
    )
}

/// The comrak options used across the whole pipeline (GFM + P0 extensions).
/// Single source of truth so the AST pass (Cluster B) and the one-shot render agree.
pub fn comrak_options() -> Options<'static> {
    let mut options = Options::default();
    options.extension.strikethrough = true;
    options.extension.table = true;
    options.extension.tasklist = true;
    options.extension.autolink = true;
    options.extension.footnotes = true;
    // Math: `$inline$` / `$$display$$` and the `` $`inline`$ `` code-math form.
    // The AST math pass (Cluster B) renders these to KaTeX HTML at build time.
    options.extension.math_dollars = true;
    options.extension.math_code = true;
    // Allow raw inline HTML through: the wikilink AST pass injects `HtmlInline`
    // nodes (resolved anchors / broken spans) that must render, not be omitted.
    options.render.r#unsafe = true;
    options
}

/// Render a markdown body (frontmatter already stripped) to HTML with GFM
/// extensions and server-side syntect syntax highlighting of fenced code.
pub fn render_markdown(body: &str) -> String {
    let options = comrak_options();
    let mut plugins = Plugins::default();
    plugins.render.codefence_syntax_highlighter = Some(syntect_adapter());
    rewrite_code_pre_class(markdown_to_html_with_plugins(body, &options, &plugins))
}

/// Format an already-parsed (and possibly transformed) AST to HTML with syntect.
pub fn format_ast<'a>(root: &'a AstNode<'a>, options: &Options) -> String {
    let mut plugins = Plugins::default();
    plugins.render.codefence_syntax_highlighter = Some(syntect_adapter());
    let mut out = String::new();
    format_html_with_plugins(root, options, &mut out, &plugins).expect("format AST to HTML");
    rewrite_code_pre_class(out)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn renders_heading_to_html() {
        let html = render_markdown("# Title");
        assert!(html.contains("<h1>"));
        assert!(html.contains("Title"));
    }

    #[test]
    fn renders_gfm_table() {
        let md = "| a | b |\n| - | - |\n| 1 | 2 |\n";
        let html = render_markdown(md);
        assert!(html.contains("<table>"));
    }

    #[test]
    fn renders_strikethrough() {
        let html = render_markdown("~~gone~~");
        assert!(html.contains("<del>"));
    }

    #[test]
    fn renders_task_list() {
        let html = render_markdown("- [x] done\n- [ ] todo\n");
        assert!(html.contains("type=\"checkbox\""));
        assert!(html.contains("checked"));
    }

    #[test]
    fn renders_autolink() {
        let html = render_markdown("see https://example.com here\n");
        assert!(html.contains(r#"href="https://example.com""#));
    }

    #[test]
    fn renders_footnote() {
        let html = render_markdown("text[^1]\n\n[^1]: a note\n");
        assert!(html.contains("<sup"));
        assert!(html.contains("footnote"));
    }

    #[test]
    fn highlights_fenced_rust_code() {
        let md = "```rust\nfn main() { let x = 1; }\n```\n";
        let html = render_markdown(md);
        // Class-based syntect: spans carry scope classes (no inline styles), inside
        // the canonical `pre.docgen-code` wrapper. Token colors live in code.css.
        assert!(html.contains(r#"<pre class="docgen-code">"#));
        assert!(!html.contains("style=\"color:"));
        // The keyword `fn` is highlighted as its own classed span.
        assert!(html.contains(r#"<span class="keyword"#));
    }

    #[test]
    fn unknown_language_does_not_crash_and_still_wraps_pre() {
        let md = "```not-a-real-lang\nplain text\n```\n";
        let html = render_markdown(md);
        assert!(html.contains("<pre"));
        assert!(html.contains("plain text"));
    }

    #[test]
    fn math_extension_is_enabled_in_shared_options() {
        let opts = comrak_options();
        assert!(opts.extension.math_dollars);
        assert!(opts.extension.math_code);
    }

    #[test]
    fn comrak_options_is_shared_source_of_truth() {
        // The shared options keep the P0 GFM extensions on.
        let html = render_markdown("~~gone~~\n");
        assert!(html.contains("<del>"));
    }
}