mdpdf-core 0.0.0-alpha.0

Markdown parsing, frontmatter extraction, and syntax highlighting for mdpdf
Documentation
use std::collections::HashMap;

/// Parsed frontmatter metadata from a markdown document.
#[derive(Debug, Default)]
pub struct Frontmatter {
    pub title: Option<String>,
    pub subtitle: Option<String>,
    pub date: Option<String>,
    pub mode: Option<String>,
    pub toc: Option<String>,
    pub extra: HashMap<String, String>,
}

impl Frontmatter {
    pub fn is_dense(&self) -> bool {
        self.mode.as_deref() == Some("dense")
    }

    pub fn skip_toc(&self) -> bool {
        self.toc.as_deref() == Some("false")
    }
}

/// Parse YAML-ish frontmatter from markdown source.
///
/// Returns the parsed frontmatter and the body (everything after the closing `---`).
/// Handles simple `key: value` and `key: "value"` formats.
pub fn parse_frontmatter(src: &str) -> (Frontmatter, &str) {
    let trimmed = src.trim_start_matches('\u{feff}'); // strip BOM
    if !trimmed.starts_with("---\n") && !trimmed.starts_with("---\r\n") {
        return (Frontmatter::default(), src);
    }

    let after_fence = &trimmed[4..];
    let end = after_fence.find("\n---\n")
        .or_else(|| after_fence.find("\n---\r\n"))
        .or_else(|| {
            if after_fence.ends_with("\n---") {
                Some(after_fence.len() - 4)
            } else {
                None
            }
        });

    let end = match end {
        Some(e) => e,
        None => return (Frontmatter::default(), src),
    };

    let yaml_block = &after_fence[..end];
    let body_start = 4 + end + 5; // "---\n" + yaml + "\n---\n"
    let body = if body_start <= trimmed.len() {
        &trimmed[body_start..]
    } else {
        ""
    };

    let mut fm = Frontmatter::default();
    for line in yaml_block.lines() {
        let line = line.trim();
        if line.is_empty() || line.starts_with('#') {
            continue;
        }
        if let Some((key, val)) = parse_kv(line) {
            match key {
                "title" => fm.title = Some(val),
                "subtitle" => fm.subtitle = Some(val),
                "date" => fm.date = Some(val),
                "mode" => fm.mode = Some(val),
                "toc" => fm.toc = Some(val),
                _ => { fm.extra.insert(key.to_owned(), val); }
            }
        }
    }

    (fm, body)
}

fn parse_kv(line: &str) -> Option<(&str, String)> {
    let colon = line.find(':')?;
    let key = line[..colon].trim();
    let val = line[colon + 1..].trim();

    // Strip surrounding quotes
    let val = if (val.starts_with('"') && val.ends_with('"'))
        || (val.starts_with('\'') && val.ends_with('\''))
    {
        val[1..val.len() - 1].to_owned()
    } else {
        val.to_owned()
    };

    Some((key, val))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_basic_frontmatter() {
        let src = "---\ntitle: \"Hello World\"\nmode: dense\ntoc: false\n---\n# Body";
        let (fm, body) = parse_frontmatter(src);
        assert_eq!(fm.title.as_deref(), Some("Hello World"));
        assert!(fm.is_dense());
        assert!(fm.skip_toc());
        assert_eq!(body, "# Body");
    }

    #[test]
    fn no_frontmatter() {
        let src = "# Just a heading\nSome text";
        let (fm, body) = parse_frontmatter(src);
        assert!(fm.title.is_none());
        assert_eq!(body, src);
    }
}