zim-studio 1.5.0

A Terminal-Based Audio Project Scaffold and Metadata System
Documentation
//! Markdown → LaTeX conversion for PDF export.
//!
//! Strips a YAML frontmatter block, peels the first `# H1` to use as a
//! file-level title, demotes all remaining headings by `heading_shift`, and
//! emits a body string of LaTeX commands. The visitor handles paragraphs,
//! lists, code blocks, inline emphasis/strong/code/links, blockquotes, and
//! horizontal rules. Tables, raw HTML, and footnote definitions are skipped
//! with a placeholder so v1 never crashes on real-world markdown.

use crate::constants::YAML_DELIMITER;
use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
use std::collections::HashMap;
use std::error::Error;
use std::fs;
use std::path::Path;

/// One markdown file, prepared for LaTeX assembly.
pub struct PreparedFile {
    pub frontmatter_title: Option<String>,
    pub first_h1: Option<String>,
    pub body_latex: String,
}

/// Read a markdown file and convert it.
pub fn prepare_file(path: &Path, heading_shift: i32) -> Result<PreparedFile, Box<dyn Error>> {
    let content = fs::read_to_string(path)?;
    Ok(prepare_md(&content, heading_shift))
}

/// Convert raw markdown content to a prepared LaTeX body.
pub fn prepare_md(raw: &str, heading_shift: i32) -> PreparedFile {
    let (frontmatter, body) = split_frontmatter(raw);
    let frontmatter_title = frontmatter.and_then(yaml_title);
    let (first_h1, body_latex) = render_body(body, heading_shift);
    PreparedFile {
        frontmatter_title,
        first_h1,
        body_latex,
    }
}

/// Escape a string for use in a LaTeX argument or paragraph.
pub fn escape(text: &str) -> String {
    let mut out = String::with_capacity(text.len());
    for c in text.chars() {
        match c {
            '\\' => out.push_str("\\textbackslash{}"),
            '&' => out.push_str("\\&"),
            '%' => out.push_str("\\%"),
            '$' => out.push_str("\\$"),
            '#' => out.push_str("\\#"),
            '_' => out.push_str("\\_"),
            '{' => out.push_str("\\{"),
            '}' => out.push_str("\\}"),
            '~' => out.push_str("\\textasciitilde{}"),
            '^' => out.push_str("\\textasciicircum{}"),
            '<' => out.push_str("\\textless{}"),
            '>' => out.push_str("\\textgreater{}"),
            _ => out.push(c),
        }
    }
    out
}

fn split_frontmatter(raw: &str) -> (Option<&str>, &str) {
    if !raw.starts_with(YAML_DELIMITER) {
        return (None, raw);
    }
    let rest = &raw[YAML_DELIMITER.len()..];
    let closer = format!("\n{YAML_DELIMITER}");
    match rest.find(&closer) {
        Some(end) => (Some(&rest[..end]), &rest[end + closer.len()..]),
        None => (None, raw),
    }
}

fn yaml_title(yaml: &str) -> Option<String> {
    let map: HashMap<String, serde_yaml::Value> = serde_yaml::from_str(yaml).ok()?;
    map.get("title")
        .and_then(|v| v.as_str())
        .map(|s| s.to_string())
}

fn heading_int(level: HeadingLevel) -> i32 {
    match level {
        HeadingLevel::H1 => 1,
        HeadingLevel::H2 => 2,
        HeadingLevel::H3 => 3,
        HeadingLevel::H4 => 4,
        HeadingLevel::H5 => 5,
        HeadingLevel::H6 => 6,
    }
}

fn heading_command(level: i32) -> &'static str {
    match level.clamp(1, 6) {
        1 => "\\section",
        2 => "\\subsection",
        3 => "\\subsubsection",
        4 => "\\paragraph",
        5 => "\\subparagraph",
        _ => "\\textbf",
    }
}

fn render_body(md: &str, heading_shift: i32) -> (Option<String>, String) {
    let parser = Parser::new_ext(md, Options::all());
    let mut out = String::new();
    let mut first_h1: Option<String> = None;

    let mut h1_buf: Option<String> = None;
    let mut heading_buf: Option<String> = None;
    let mut current_heading_level: i32 = 0;
    let mut in_code_block = false;
    let mut list_stack: Vec<bool> = Vec::new(); // true = ordered

    for ev in parser {
        match ev {
            Event::Start(tag) => match tag {
                Tag::Paragraph => {}
                Tag::Heading { level, .. } => {
                    let lvl = heading_int(level);
                    if lvl == 1 && first_h1.is_none() && h1_buf.is_none() {
                        h1_buf = Some(String::new());
                    } else {
                        current_heading_level = lvl + heading_shift;
                        heading_buf = Some(String::new());
                    }
                }
                Tag::BlockQuote(_) => out.push_str("\\begin{quote}\n"),
                Tag::CodeBlock(_) => {
                    in_code_block = true;
                    out.push_str("\\begin{verbatim}\n");
                }
                Tag::List(start) => {
                    let ordered = start.is_some();
                    list_stack.push(ordered);
                    out.push_str(if ordered {
                        "\\begin{enumerate}\n"
                    } else {
                        "\\begin{itemize}\n"
                    });
                }
                Tag::Item => out.push_str("\\item "),
                Tag::Emphasis => out.push_str("\\emph{"),
                Tag::Strong => out.push_str("\\textbf{"),
                Tag::Strikethrough => out.push_str("\\sout{"),
                Tag::Link { dest_url, .. } => {
                    out.push_str("\\href{");
                    out.push_str(&escape_url(&dest_url));
                    out.push_str("}{");
                }
                Tag::Image { title, .. } => {
                    out.push_str("\\emph{[image: ");
                    out.push_str(&escape(&title));
                }
                Tag::Table(_) => out.push_str("\\par\\emph{[table omitted]}\\par\n"),
                Tag::HtmlBlock | Tag::FootnoteDefinition(_) => {}
                _ => {}
            },
            Event::End(end) => match end {
                TagEnd::Paragraph => out.push_str("\n\n"),
                TagEnd::Heading(_) => {
                    if let Some(buf) = h1_buf.take() {
                        first_h1 = Some(buf.trim().to_string());
                    } else if let Some(buf) = heading_buf.take() {
                        let cmd = heading_command(current_heading_level);
                        if current_heading_level >= 6 {
                            out.push_str(&format!("\\par{cmd}{{{buf}}}\\par\n"));
                        } else {
                            out.push_str(&format!("{cmd}*{{{buf}}}\n"));
                        }
                    }
                }
                TagEnd::BlockQuote(_) => out.push_str("\\end{quote}\n"),
                TagEnd::CodeBlock => {
                    in_code_block = false;
                    out.push_str("\\end{verbatim}\n");
                }
                TagEnd::List(_) => {
                    let ordered = list_stack.pop().unwrap_or(false);
                    out.push_str(if ordered {
                        "\\end{enumerate}\n"
                    } else {
                        "\\end{itemize}\n"
                    });
                }
                TagEnd::Item => out.push('\n'),
                TagEnd::Emphasis | TagEnd::Strong | TagEnd::Strikethrough => out.push('}'),
                TagEnd::Link => out.push('}'),
                TagEnd::Image => out.push_str("]}"),
                _ => {}
            },
            Event::Text(t) => {
                if let Some(buf) = h1_buf.as_mut() {
                    buf.push_str(&t);
                } else if let Some(buf) = heading_buf.as_mut() {
                    buf.push_str(&escape(&t));
                } else if in_code_block {
                    out.push_str(&t);
                } else {
                    out.push_str(&escape(&t));
                }
            }
            Event::Code(c) => {
                if let Some(buf) = h1_buf.as_mut() {
                    buf.push_str(&c);
                } else if let Some(buf) = heading_buf.as_mut() {
                    buf.push_str(&format!("\\texttt{{{}}}", escape(&c)));
                } else {
                    out.push_str(&format!("\\texttt{{{}}}", escape(&c)));
                }
            }
            Event::SoftBreak => out.push(' '),
            Event::HardBreak => out.push_str("\\\\\n"),
            Event::Rule => out.push_str("\n\\par\\noindent\\hrulefill\\par\n"),
            Event::TaskListMarker(done) => {
                out.push_str(if done { "[x] " } else { "[ ] " });
            }
            Event::Html(_) | Event::InlineHtml(_) | Event::FootnoteReference(_) => {}
            Event::InlineMath(s) => out.push_str(&format!("${s}$")),
            Event::DisplayMath(s) => out.push_str(&format!("\\[{s}\\]")),
        }
    }

    (first_h1, out)
}

fn escape_url(url: &str) -> String {
    url.replace('\\', "\\\\")
        .replace('%', "\\%")
        .replace('#', "\\#")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn frontmatter_split_handles_missing_block() {
        let (fm, body) = split_frontmatter("# Hi\n\nbody");
        assert!(fm.is_none());
        assert_eq!(body, "# Hi\n\nbody");
    }

    #[test]
    fn frontmatter_split_extracts_block() {
        let raw = "---\ntitle: Foo\n---\n# Hi";
        let (fm, body) = split_frontmatter(raw);
        assert_eq!(fm, Some("title: Foo"));
        assert_eq!(body, "# Hi");
    }

    #[test]
    fn yaml_title_round_trip() {
        assert_eq!(yaml_title("title: Foo"), Some("Foo".to_string()));
        assert_eq!(yaml_title("other: bar"), None);
    }

    #[test]
    fn escape_handles_special_chars() {
        assert_eq!(escape("a & b"), "a \\& b");
        assert_eq!(escape("100% _underscore_"), "100\\% \\_underscore\\_");
    }

    #[test]
    fn first_h1_is_peeled() {
        let pf = prepare_md("# Title\n\nbody text\n", 1);
        assert_eq!(pf.first_h1.as_deref(), Some("Title"));
        assert!(pf.body_latex.contains("body text"));
        assert!(!pf.body_latex.contains("Title"));
    }

    #[test]
    fn heading_demotion_shifts_levels() {
        let pf = prepare_md("## Sub\n", 2);
        assert!(pf.body_latex.contains("\\paragraph*{Sub}"));
    }

    #[test]
    fn frontmatter_title_round_trip() {
        let pf = prepare_md("---\ntitle: Set\n---\n\nbody\n", 0);
        assert_eq!(pf.frontmatter_title.as_deref(), Some("Set"));
    }

    #[test]
    fn list_emits_itemize() {
        let pf = prepare_md("- one\n- two\n", 0);
        assert!(pf.body_latex.contains("\\begin{itemize}"));
        assert!(pf.body_latex.contains("\\item one"));
    }

    #[test]
    fn code_block_uses_verbatim_unescaped() {
        let pf = prepare_md("```\nlet x = 1 & 2;\n```\n", 0);
        assert!(pf.body_latex.contains("\\begin{verbatim}"));
        assert!(pf.body_latex.contains("let x = 1 & 2;"));
    }

    #[test]
    fn link_emits_href() {
        let pf = prepare_md("[click](https://x.test)\n", 0);
        assert!(pf.body_latex.contains("\\href{https://x.test}{click}"));
    }
}