use crate::constants::YAML_DELIMITER;
use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
use std::collections::HashMap;
use std::error::Error;
use std::fs;
use std::path::Path;
pub struct PreparedFile {
pub frontmatter_title: Option<String>,
pub first_h1: Option<String>,
pub body_latex: String,
}
pub fn prepare_file(path: &Path, heading_shift: i32) -> Result<PreparedFile, Box<dyn Error>> {
let content = fs::read_to_string(path)?;
Ok(prepare_md(&content, heading_shift))
}
pub fn prepare_md(raw: &str, heading_shift: i32) -> PreparedFile {
let (frontmatter, body) = split_frontmatter(raw);
let frontmatter_title = frontmatter.and_then(yaml_title);
let (first_h1, body_latex) = render_body(body, heading_shift);
PreparedFile {
frontmatter_title,
first_h1,
body_latex,
}
}
pub fn escape(text: &str) -> String {
let mut out = String::with_capacity(text.len());
for c in text.chars() {
match c {
'\\' => out.push_str("\\textbackslash{}"),
'&' => out.push_str("\\&"),
'%' => out.push_str("\\%"),
'$' => out.push_str("\\$"),
'#' => out.push_str("\\#"),
'_' => out.push_str("\\_"),
'{' => out.push_str("\\{"),
'}' => out.push_str("\\}"),
'~' => out.push_str("\\textasciitilde{}"),
'^' => out.push_str("\\textasciicircum{}"),
'<' => out.push_str("\\textless{}"),
'>' => out.push_str("\\textgreater{}"),
_ => out.push(c),
}
}
out
}
fn split_frontmatter(raw: &str) -> (Option<&str>, &str) {
if !raw.starts_with(YAML_DELIMITER) {
return (None, raw);
}
let rest = &raw[YAML_DELIMITER.len()..];
let closer = format!("\n{YAML_DELIMITER}");
match rest.find(&closer) {
Some(end) => (Some(&rest[..end]), &rest[end + closer.len()..]),
None => (None, raw),
}
}
fn yaml_title(yaml: &str) -> Option<String> {
let map: HashMap<String, serde_yaml::Value> = serde_yaml::from_str(yaml).ok()?;
map.get("title")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
fn heading_int(level: HeadingLevel) -> i32 {
match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
}
}
fn heading_command(level: i32) -> &'static str {
match level.clamp(1, 6) {
1 => "\\section",
2 => "\\subsection",
3 => "\\subsubsection",
4 => "\\paragraph",
5 => "\\subparagraph",
_ => "\\textbf",
}
}
fn render_body(md: &str, heading_shift: i32) -> (Option<String>, String) {
let parser = Parser::new_ext(md, Options::all());
let mut out = String::new();
let mut first_h1: Option<String> = None;
let mut h1_buf: Option<String> = None;
let mut heading_buf: Option<String> = None;
let mut current_heading_level: i32 = 0;
let mut in_code_block = false;
let mut list_stack: Vec<bool> = Vec::new();
for ev in parser {
match ev {
Event::Start(tag) => match tag {
Tag::Paragraph => {}
Tag::Heading { level, .. } => {
let lvl = heading_int(level);
if lvl == 1 && first_h1.is_none() && h1_buf.is_none() {
h1_buf = Some(String::new());
} else {
current_heading_level = lvl + heading_shift;
heading_buf = Some(String::new());
}
}
Tag::BlockQuote(_) => out.push_str("\\begin{quote}\n"),
Tag::CodeBlock(_) => {
in_code_block = true;
out.push_str("\\begin{verbatim}\n");
}
Tag::List(start) => {
let ordered = start.is_some();
list_stack.push(ordered);
out.push_str(if ordered {
"\\begin{enumerate}\n"
} else {
"\\begin{itemize}\n"
});
}
Tag::Item => out.push_str("\\item "),
Tag::Emphasis => out.push_str("\\emph{"),
Tag::Strong => out.push_str("\\textbf{"),
Tag::Strikethrough => out.push_str("\\sout{"),
Tag::Link { dest_url, .. } => {
out.push_str("\\href{");
out.push_str(&escape_url(&dest_url));
out.push_str("}{");
}
Tag::Image { title, .. } => {
out.push_str("\\emph{[image: ");
out.push_str(&escape(&title));
}
Tag::Table(_) => out.push_str("\\par\\emph{[table omitted]}\\par\n"),
Tag::HtmlBlock | Tag::FootnoteDefinition(_) => {}
_ => {}
},
Event::End(end) => match end {
TagEnd::Paragraph => out.push_str("\n\n"),
TagEnd::Heading(_) => {
if let Some(buf) = h1_buf.take() {
first_h1 = Some(buf.trim().to_string());
} else if let Some(buf) = heading_buf.take() {
let cmd = heading_command(current_heading_level);
if current_heading_level >= 6 {
out.push_str(&format!("\\par{cmd}{{{buf}}}\\par\n"));
} else {
out.push_str(&format!("{cmd}*{{{buf}}}\n"));
}
}
}
TagEnd::BlockQuote(_) => out.push_str("\\end{quote}\n"),
TagEnd::CodeBlock => {
in_code_block = false;
out.push_str("\\end{verbatim}\n");
}
TagEnd::List(_) => {
let ordered = list_stack.pop().unwrap_or(false);
out.push_str(if ordered {
"\\end{enumerate}\n"
} else {
"\\end{itemize}\n"
});
}
TagEnd::Item => out.push('\n'),
TagEnd::Emphasis | TagEnd::Strong | TagEnd::Strikethrough => out.push('}'),
TagEnd::Link => out.push('}'),
TagEnd::Image => out.push_str("]}"),
_ => {}
},
Event::Text(t) => {
if let Some(buf) = h1_buf.as_mut() {
buf.push_str(&t);
} else if let Some(buf) = heading_buf.as_mut() {
buf.push_str(&escape(&t));
} else if in_code_block {
out.push_str(&t);
} else {
out.push_str(&escape(&t));
}
}
Event::Code(c) => {
if let Some(buf) = h1_buf.as_mut() {
buf.push_str(&c);
} else if let Some(buf) = heading_buf.as_mut() {
buf.push_str(&format!("\\texttt{{{}}}", escape(&c)));
} else {
out.push_str(&format!("\\texttt{{{}}}", escape(&c)));
}
}
Event::SoftBreak => out.push(' '),
Event::HardBreak => out.push_str("\\\\\n"),
Event::Rule => out.push_str("\n\\par\\noindent\\hrulefill\\par\n"),
Event::TaskListMarker(done) => {
out.push_str(if done { "[x] " } else { "[ ] " });
}
Event::Html(_) | Event::InlineHtml(_) | Event::FootnoteReference(_) => {}
Event::InlineMath(s) => out.push_str(&format!("${s}$")),
Event::DisplayMath(s) => out.push_str(&format!("\\[{s}\\]")),
}
}
(first_h1, out)
}
fn escape_url(url: &str) -> String {
url.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('#', "\\#")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn frontmatter_split_handles_missing_block() {
let (fm, body) = split_frontmatter("# Hi\n\nbody");
assert!(fm.is_none());
assert_eq!(body, "# Hi\n\nbody");
}
#[test]
fn frontmatter_split_extracts_block() {
let raw = "---\ntitle: Foo\n---\n# Hi";
let (fm, body) = split_frontmatter(raw);
assert_eq!(fm, Some("title: Foo"));
assert_eq!(body, "# Hi");
}
#[test]
fn yaml_title_round_trip() {
assert_eq!(yaml_title("title: Foo"), Some("Foo".to_string()));
assert_eq!(yaml_title("other: bar"), None);
}
#[test]
fn escape_handles_special_chars() {
assert_eq!(escape("a & b"), "a \\& b");
assert_eq!(escape("100% _underscore_"), "100\\% \\_underscore\\_");
}
#[test]
fn first_h1_is_peeled() {
let pf = prepare_md("# Title\n\nbody text\n", 1);
assert_eq!(pf.first_h1.as_deref(), Some("Title"));
assert!(pf.body_latex.contains("body text"));
assert!(!pf.body_latex.contains("Title"));
}
#[test]
fn heading_demotion_shifts_levels() {
let pf = prepare_md("## Sub\n", 2);
assert!(pf.body_latex.contains("\\paragraph*{Sub}"));
}
#[test]
fn frontmatter_title_round_trip() {
let pf = prepare_md("---\ntitle: Set\n---\n\nbody\n", 0);
assert_eq!(pf.frontmatter_title.as_deref(), Some("Set"));
}
#[test]
fn list_emits_itemize() {
let pf = prepare_md("- one\n- two\n", 0);
assert!(pf.body_latex.contains("\\begin{itemize}"));
assert!(pf.body_latex.contains("\\item one"));
}
#[test]
fn code_block_uses_verbatim_unescaped() {
let pf = prepare_md("```\nlet x = 1 & 2;\n```\n", 0);
assert!(pf.body_latex.contains("\\begin{verbatim}"));
assert!(pf.body_latex.contains("let x = 1 & 2;"));
}
#[test]
fn link_emits_href() {
let pf = prepare_md("[click](https://x.test)\n", 0);
assert!(pf.body_latex.contains("\\href{https://x.test}{click}"));
}
}