mdpdf-core 0.0.0-alpha.0

Markdown parsing, frontmatter extraction, and syntax highlighting for mdpdf
Documentation
//! Directive preprocessor that transforms `<!-- directive -->` HTML comments
//! into layout HTML, operating on comrak's AST rather than raw strings.
//!
//! This guarantees:
//!   - Comments inside code blocks are never processed (they're `CodeBlock` nodes)
//!   - Whitespace/spacing is handled by comrak's parser
//!   - Only actual HTML block nodes are inspected
//!
//! Supported directives:
//!   <!-- grid -->           Start a 2-column panel grid
//!   <!-- grid 3 -->         Start a 3-column grid
//!   <!-- cell: Title -->    Grid cell with label
//!   <!-- label: Title -->   Sub-label within a cell
//!   <!-- /grid -->          End grid
//!   <!-- stats -->          Start stat row
//!   <!-- stat COLOR: VAL | LABEL -->  Stat pill
//!   <!-- /stats -->         End stat row
//!   <!-- callout TYPE -->   Start callout (warn, info)
//!   <!-- /callout -->       End callout
//!   <!-- muted -->          Start muted note
//!   <!-- /muted -->         End muted note

use comrak::nodes::{AstNode, NodeValue};

/// Walk the parsed AST and replace directive HTML comments with layout HTML.
///
/// Only inspects `NodeValue::HtmlBlock` nodes — code blocks, inline code,
/// and other node types are left untouched.
pub fn transform<'a>(root: &'a AstNode<'a>) {
    let mut in_cell = false;

    for node in root.descendants() {
        let mut ast = node.data.borrow_mut();
        if let NodeValue::HtmlBlock(ref mut block) = ast.value {
            if let Some(directive) = extract_directive(&block.literal) {
                block.literal = expand(directive, &mut in_cell);
            }
        }
    }
}

// ── Directive enum ──────────────────────────────────────────────────────────

enum Directive {
    GridStart(usize),
    Cell(String),
    Label(String),
    GridEnd,
    StatsStart,
    Stat { color: String, value: String, label: String },
    StatsEnd,
    CalloutStart(String),
    CalloutEnd,
    MutedStart,
    MutedEnd,
}

// ── Extraction ──────────────────────────────────────────────────────────────

/// Extract a directive from an HTML block's literal, if it is one.
///
/// An HTML block literal may contain the comment plus surrounding whitespace
/// and newlines. We strip those before matching.
fn extract_directive(literal: &str) -> Option<Directive> {
    // HTML block literals can span multiple lines — take the first
    // non-empty line that looks like a comment.
    let comment_body = literal
        .lines()
        .map(str::trim)
        .find(|l| l.starts_with("<!--") && l.ends_with("-->"))?;

    let inner = comment_body
        .strip_prefix("<!--")?
        .strip_suffix("-->")?
        .trim();

    parse_inner(inner)
}

fn parse_inner(inner: &str) -> Option<Directive> {
    // Grid
    if inner == "/grid" {
        return Some(Directive::GridEnd);
    }
    if inner == "grid" {
        return Some(Directive::GridStart(2));
    }
    if let Some(rest) = inner.strip_prefix("grid ") {
        let cols = rest.trim().parse().unwrap_or(2);
        return Some(Directive::GridStart(cols));
    }

    // Cell
    if let Some(title) = inner.strip_prefix("cell:") {
        return Some(Directive::Cell(title.trim().to_owned()));
    }

    // Label
    if let Some(title) = inner.strip_prefix("label:") {
        return Some(Directive::Label(title.trim().to_owned()));
    }

    // Stats
    if inner == "stats" {
        return Some(Directive::StatsStart);
    }
    if inner == "/stats" {
        return Some(Directive::StatsEnd);
    }
    if let Some(rest) = inner.strip_prefix("stat ") {
        let (color_val, label) = rest.split_once('|')?;
        let (color, value) = color_val.split_once(':')?;
        return Some(Directive::Stat {
            color: color.trim().to_owned(),
            value: value.trim().to_owned(),
            label: label.trim().to_owned(),
        });
    }

    // Callout
    if inner == "/callout" {
        return Some(Directive::CalloutEnd);
    }
    if let Some(typ) = inner.strip_prefix("callout ") {
        return Some(Directive::CalloutStart(typ.trim().to_owned()));
    }
    if inner == "callout" {
        return Some(Directive::CalloutStart("warn".to_owned()));
    }

    // Muted
    if inner == "muted" {
        return Some(Directive::MutedStart);
    }
    if inner == "/muted" {
        return Some(Directive::MutedEnd);
    }

    None
}

// ── Expansion ───────────────────────────────────────────────────────────────

fn expand(directive: Directive, in_cell: &mut bool) -> String {
    match directive {
        Directive::GridStart(cols) => {
            *in_cell = false;
            format!(
                "<div class=\"panel-grid\" style=\"grid-template-columns:repeat({cols},1fr)\">\n"
            )
        }
        Directive::Cell(title) => {
            let close = if *in_cell { "</div>\n" } else { "" };
            *in_cell = true;
            format!(
                "{close}<div class=\"cell\">\n<div class=\"lbl\">{title}</div>\n"
            )
        }
        Directive::Label(title) => {
            format!("<div class=\"lbl-sub\">{title}</div>\n")
        }
        Directive::GridEnd => {
            let close = if *in_cell { "</div>\n" } else { "" };
            *in_cell = false;
            format!("{close}</div>\n")
        }
        Directive::StatsStart => {
            "<div class=\"stat-row\">\n".to_owned()
        }
        Directive::Stat { color, value, label } => {
            format!(
                "<div class=\"stat-pill stat-{color}\">\
                 <div class=\"num\">{value}</div>\
                 <div class=\"txt\">{label}</div></div>\n"
            )
        }
        Directive::StatsEnd => {
            "</div>\n".to_owned()
        }
        Directive::CalloutStart(typ) => {
            format!("<div class=\"callout-{typ}\">\n")
        }
        Directive::CalloutEnd => {
            "</div>\n".to_owned()
        }
        Directive::MutedStart => {
            "<div class=\"muted-note\">\n".to_owned()
        }
        Directive::MutedEnd => {
            "</div>\n".to_owned()
        }
    }
}

#[cfg(test)]
mod tests {
    use comrak::{parse_document, format_html, Arena, Options};

    use super::*;

    fn process(src: &str) -> String {
        let arena = Arena::new();
        let mut options = Options::default();
        options.render.unsafe_ = true;
        let root = parse_document(&arena, src, &options);
        transform(root);
        let mut output = Vec::new();
        format_html(root, &options, &mut output).unwrap();
        String::from_utf8(output).unwrap()
    }

    #[test]
    fn grid_with_cells() {
        let src = r#"## Title

<!-- grid -->
<!-- cell: Resolver -->

```rust
fn main() {}
```

<!-- cell: Source -->

hello

<!-- /grid -->
"#;
        let html = process(src);
        assert!(html.contains("panel-grid"));
        assert!(html.contains("class=\"cell\""));
        assert!(html.contains("class=\"lbl\">Resolver</div>"));
        assert!(html.contains("class=\"lbl\">Source</div>"));
        assert!(html.contains("fn main()"));
        // No raw <!-- directive comments remain
        assert!(!html.contains("<!-- grid"));
        assert!(!html.contains("<!-- cell"));
    }

    #[test]
    fn code_block_comments_untouched() {
        let src = r#"
```html
<!-- grid -->
<!-- cell: This should NOT be processed -->
```
"#;
        let html = process(src);
        // These are inside a code block — must NOT become panel-grid
        assert!(!html.contains("panel-grid"));
        assert!(html.contains("&lt;!-- grid --&gt;"));
    }

    #[test]
    fn stat_pills() {
        let src = r#"<!-- stats -->
<!-- stat red: 60-80% | Agent failures -->
<!-- stat blue: 4.4× | Token compression -->
<!-- /stats -->"#;
        let html = process(src);
        assert!(html.contains("stat-row"));
        assert!(html.contains("stat-red"));
        assert!(html.contains("stat-blue"));
        assert!(html.contains("60-80%"));
    }

    #[test]
    fn three_column_grid() {
        let src = r#"<!-- grid 3 -->
<!-- cell: A -->

a

<!-- cell: B -->

b

<!-- cell: C -->

c

<!-- /grid -->"#;
        let html = process(src);
        assert!(html.contains("repeat(3,1fr)"));
    }

    #[test]
    fn regular_comments_unchanged() {
        let src = "<!-- this is a normal comment -->\n\nSome text\n";
        let html = process(src);
        assert!(html.contains("<!-- this is a normal comment -->"));
    }

    #[test]
    fn callout_and_muted() {
        let src = r#"<!-- callout warn -->

**Important:** something

<!-- /callout -->

<!-- muted -->

Small note.

<!-- /muted -->
"#;
        let html = process(src);
        assert!(html.contains("callout-warn"));
        assert!(html.contains("muted-note"));
    }
}