markdown2pdf 1.4.0

Create PDF with Markdown files (a md to pdf transpiler)
Documentation
//! PDF renderer for markdown2pdf.
//!
//! Owns the full path from a [`Token`] stream to PDF bytes. Built
//! directly on `printpdf 0.9` — no layout-engine abstraction between
//! us and the PDF backend.
//!
//! # Architecture
//!
//! ```text
//! Token (lexer output)
//!     │  lower::lower
//!//! Vec<Block>            ← block IR (heading, paragraph, code, hr)
//!//!     │  layout::lay_out_pages
//!     │      ↑ font::FontMetricsCache (glyph widths via ttf-parser)
//!//! Vec<PdfPage>          ← printpdf 0.9 page operation streams
//!     │  PdfDocument::with_pages + save
//!//! Vec<u8>               ← serialized PDF bytes
//! ```
//!
//! What the renderer covers today:
//!
//! - Headings (levels 1–6), paragraphs with glyph-advance wrapping,
//!   fenced code blocks, horizontal rules with per-style dash + width
//! - Inline emphasis (bold / italic / inline code / strike / underline)
//!   with per-variant external Unicode font selection
//! - Unordered, ordered, and task lists with configurable bullets and
//!   loose-vs-tight spacing
//! - Block backgrounds, per-side borders, padding, line-height —
//!   all reading from the resolved style schema
//! - Blockquotes with configurable border (replacing the old hardcoded
//!   left rule)
//! - GFM tables with per-column alignment, header repeat across pages
//! - Local-file images (PNG / JPEG); URL fetch is gated under the
//!   `fetch` feature
//! - Hyperlinks as PDF link annotations
//! - Block-level HTML treated as monospace; comment-only blocks are
//!   invisible per CommonMark §4.6
//! - Page splits with header repeat for tables
//!
//! Known gaps:
//!
//! - Real justified text (Knuth-Plass) — `text_align = justify`
//!   silently degrades to left
//! - Cross-page block background fragments — a paragraph that spans
//!   pages paints its background only on the starting page
//! - URL image fetching, inline link tooltips, footnotes, headers /
//!   footers, page numbers, TOC, bookmarks — all roadmap items

mod font;
mod hyphenate;
mod ir;
mod layout;
mod lower;
mod math;
mod postprocess;
mod preprocess;

use crate::markdown::Token;
use crate::styling::ResolvedStyle;
use crate::{MdpError, fonts::FontConfig};

use printpdf::{PdfDocument, PdfSaveOptions};

/// Render a token stream to a PDF file at `path`.
pub fn render_to_file(
    tokens: Vec<Token>,
    style: ResolvedStyle,
    font_config: Option<&FontConfig>,
    path: impl AsRef<std::path::Path>,
) -> Result<(), MdpError> {
    let path = path.as_ref();
    let bytes = render_to_bytes(tokens, style, font_config)?;
    std::fs::write(path, bytes).map_err(|e| MdpError::PdfError {
        message: e.to_string(),
        path: Some(path.display().to_string()),
        suggestion: Some(
            "Check that the output directory exists and you have write permissions".to_string(),
        ),
    })
}

/// Render a token stream to PDF bytes.
pub fn render_to_bytes(
    mut tokens: Vec<Token>,
    style: ResolvedStyle,
    font_config: Option<&FontConfig>,
) -> Result<Vec<u8>, MdpError> {
    // Recognise inline `<a href="…">…</a>` HTML up front so the
    // renderer's normal link path (and the tooltip post-pass below)
    // handles it like any markdown link.
    preprocess::rewrite_html_anchors(&mut tokens);

    let doc_title = style
        .metadata
        .title
        .clone()
        .unwrap_or_else(|| "markdown2pdf".to_string());
    let mut doc = PdfDocument::new(&doc_title);

    {
        let info = &mut doc.metadata.info;
        info.document_title = doc_title.clone();
        if let Some(a) = &style.metadata.author {
            info.author = a.clone();
        }
        if let Some(s) = &style.metadata.subject {
            info.subject = s.clone();
        }
        if let Some(c) = &style.metadata.creator {
            info.creator = c.clone();
        }
        if !style.metadata.keywords.is_empty() {
            info.keywords = style.metadata.keywords.clone();
        }
    }

    let body_text = Token::collect_all_text(&tokens);
    let used_codepoints: Vec<char> = {
        let mut chars: Vec<char> = body_text.chars().collect();
        chars.sort();
        chars.dedup();
        chars
    };

    let blocks = lower::lower(&tokens);
    let mut usage = ir::VariantUsage::analyze(&blocks);
    // Headings and blockquotes get their weight / slant from the
    // theme, not from per-run flags, so the IR walk above can't see
    // them. Without this, an external font would skip loading the
    // bold (or italic) face and these blocks would render regular.
    for block_style in style.headings.iter().chain([&style.blockquote]) {
        if block_style.is_bold() && block_style.is_italic() {
            usage.body_bold_italic = true;
        } else if block_style.is_bold() {
            usage.body_bold = true;
        } else if block_style.is_italic() {
            usage.body_italic = true;
        }
    }
    let cb = &style.code_block;
    if cb.is_bold() && cb.is_italic() {
        usage.mono_bold_italic = true;
    } else if cb.is_bold() {
        usage.mono_bold = true;
    } else if cb.is_italic() {
        usage.mono_italic = true;
    }
    // Load a distinct inline-code family only when `[code_inline]
    // font_family` is set AND differs from `[code_block] font_family`.
    // Otherwise inline and block code share the same path (so the
    // default theme, which spells both `"Courier"`, stays byte-
    // identical to the pre-feature output).
    let code_inline_font = match (
        style.code_inline.font_family.as_deref(),
        style.code_block.font_family.as_deref(),
    ) {
        (Some(ci), Some(cb)) if ci.eq_ignore_ascii_case(cb) => None,
        (ci, _) => ci,
    };
    let font_set = font::FontSet::load_with_style_fallbacks(
        font_config,
        &style.fallback_fonts,
        code_inline_font,
        &used_codepoints,
        usage,
        &mut doc,
    );
    let pages = layout::lay_out_pages(&blocks, &style, &font_set, &mut doc);

    let (fallback_w, fallback_h) = layout::page_dimensions_mm(&style.page);
    let pages = if pages.is_empty() {
        vec![printpdf::PdfPage::new(
            printpdf::Mm(fallback_w),
            printpdf::Mm(fallback_h),
            Vec::new(),
        )]
    } else {
        pages
    };

    let mut warnings = Vec::new();
    let bytes = doc
        .with_pages(pages)
        .save(&PdfSaveOptions::default(), &mut warnings);

    for w in &warnings {
        log::warn!("printpdf: {:?}", w);
    }

    // Inject `/Contents` (tooltip) entries on link annotations using
    // titles from `[text](url "title")`. printpdf 0.9 doesn't expose
    // `/Contents` on its `LinkAnnotation` struct, so we parse the
    // serialized bytes back with lopdf and patch them in.
    let tooltips = postprocess::collect_link_tooltips(&tokens);
    let bytes = postprocess::inject_link_tooltips(bytes, &tooltips);

    // Catalog `/Lang` for accessibility — printpdf 0.9 doesn't expose
    // it. No-op when no language is configured.
    let bytes = match &style.metadata.language {
        Some(lang) => postprocess::inject_lang(bytes, lang),
        None => bytes,
    };

    // printpdf 0.9 never compresses streams; deflate them ourselves
    // (math vector outlines make raw page streams very large).
    let bytes = postprocess::compress(bytes);

    Ok(bytes)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::markdown::Token;

    fn default_style() -> ResolvedStyle {
        ResolvedStyle::default()
    }

    #[test]
    fn empty_token_stream_produces_valid_pdf() {
        let bytes = render_to_bytes(vec![], default_style(), None).unwrap();
        assert!(bytes.starts_with(b"%PDF-"));
    }

    #[test]
    fn paragraph_produces_valid_pdf() {
        let tokens = vec![Token::Text("hello world".to_string())];
        let bytes = render_to_bytes(tokens, default_style(), None).unwrap();
        assert!(bytes.starts_with(b"%PDF-"));
    }

    #[test]
    fn heading_produces_valid_pdf() {
        let tokens = vec![Token::Heading(vec![Token::Text("Hi".into())], 1)];
        let bytes = render_to_bytes(tokens, default_style(), None).unwrap();
        assert!(bytes.starts_with(b"%PDF-"));
    }

    #[test]
    fn long_document_splits_pages() {
        let mut tokens = Vec::new();
        for i in 0..150 {
            tokens.push(Token::Text(format!("paragraph {}", i)));
            tokens.push(Token::Newline);
            tokens.push(Token::Newline);
        }
        let bytes = render_to_bytes(tokens, default_style(), None).unwrap();
        assert!(bytes.starts_with(b"%PDF-"));
        assert!(bytes.len() > 1000);
    }

    #[test]
    fn render_to_file_creates_file() {
        let path = std::env::temp_dir().join("m2p_phase1.pdf");
        let path_s = path.to_str().unwrap();
        let tokens = vec![
            Token::Heading(vec![Token::Text("Hello".into())], 1),
            Token::Text("World".into()),
        ];
        render_to_file(tokens, default_style(), None, path_s).unwrap();
        assert!(path.exists());
        let bytes = std::fs::read(&path).unwrap();
        assert!(bytes.starts_with(b"%PDF-"));
        let _ = std::fs::remove_file(&path);
    }
}