use pulldown_cmark::{Options, Parser, html};
use regex::Regex;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
const SVG_MAX_BYTES: u64 = 512 * 1024;
#[derive(Debug, Clone)]
pub struct RenderResult {
pub body_html: String,
pub aside_html: String,
pub diagrams_inlined: u32,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone)]
pub enum RenderError {
DiagramOutOfBounds(PathBuf),
}
impl std::fmt::Display for RenderError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RenderError::DiagramOutOfBounds(p) => write!(
f,
"diagram path '{}' resolves outside session_dir/diagrams/",
p.display()
),
}
}
}
impl std::error::Error for RenderError {}
pub fn render_body(md: &str, session_dir: &Path) -> Result<RenderResult, RenderError> {
let md = strip_scaffolding(md);
let html = markdown_to_html(&md);
let (html, aside_html, aside_count) = extract_aside(&html);
let (html, diagrams_inlined, mut warnings) = inline_diagrams(&html, session_dir)?;
let html = apply_section_numbers(&html);
if aside_count > 1 {
warnings.push("aside_multiple".to_string());
}
Ok(RenderResult {
body_html: html,
aside_html,
diagrams_inlined,
warnings,
})
}
pub fn render_wiki_page(md: &str, session_dir: &Path) -> Result<RenderResult, RenderError> {
let html = markdown_to_html(md);
let (html, diagrams_inlined, warnings) = inline_diagrams(&html, session_dir)?;
Ok(RenderResult {
body_html: html,
aside_html: String::new(),
diagrams_inlined,
warnings,
})
}
fn markdown_to_html(md: &str) -> String {
let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
let parser = Parser::new_ext(md, opts);
let mut out = String::with_capacity(md.len() * 2);
html::push_html(&mut out, parser);
out
}
fn strip_scaffolding(md: &str) -> String {
let sections = split_on_headings(md);
let mut out = String::with_capacity(md.len());
let mut seen_overview = false;
for sec in sections {
let heading = sec.heading.trim();
if heading.starts_with("## Overview") {
seen_overview = true;
}
if !seen_overview {
continue;
}
if heading.starts_with("## Sources") {
continue;
}
if section_body_is_empty_or_placeholder(&sec.body) {
continue;
}
if !sec.heading.is_empty() {
out.push_str(&sec.heading);
out.push('\n');
}
out.push_str(&sec.body);
}
out
}
struct Section {
heading: String,
body: String,
}
fn split_on_headings(md: &str) -> Vec<Section> {
let mut out: Vec<Section> = Vec::new();
let mut current = Section {
heading: String::new(),
body: String::new(),
};
for line in md.lines() {
let trimmed = line.trim_start();
if trimmed.starts_with("## ") {
if !current.heading.is_empty() || !current.body.is_empty() {
out.push(std::mem::replace(
&mut current,
Section {
heading: String::new(),
body: String::new(),
},
));
}
current.heading = line.to_string();
} else {
current.body.push_str(line);
current.body.push('\n');
}
}
if !current.heading.is_empty() || !current.body.is_empty() {
out.push(current);
}
out
}
fn section_body_is_empty_or_placeholder(body: &str) -> bool {
let meaningful: Vec<&str> = body
.lines()
.map(|l| l.trim())
.filter(|l| !l.is_empty())
.filter(|l| !(l.starts_with("<!--") && l.ends_with("-->")))
.collect();
meaningful.is_empty()
}
fn aside_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(
r"(?s)<blockquote>\s*<p>\s*<strong>aside:</strong>\s*(?P<body>.*?)</p>\s*</blockquote>\s*",
)
.expect("aside regex must compile")
})
}
fn extract_aside(html: &str) -> (String, String, usize) {
let re = aside_re();
let total = re.find_iter(html).count();
if total == 0 {
return (html.to_string(), String::new(), 0);
}
let first = re.captures(html).unwrap();
let body = first.name("body").unwrap().as_str().trim().to_string();
let full_match = first.get(0).unwrap();
let mut rewritten = String::with_capacity(html.len());
rewritten.push_str(&html[..full_match.start()]);
rewritten.push_str(&html[full_match.end()..]);
let aside_html = format!("<p class=\"aside\">{body}</p>");
(rewritten, aside_html, total)
}
fn img_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"<img src="(?P<src>[^"]+)" alt="(?P<alt>[^"]*)"(?: title="[^"]*")?\s*/?>"#)
.expect("img regex must compile")
})
}
fn inline_diagrams(
html: &str,
session_dir: &Path,
) -> Result<(String, u32, Vec<String>), RenderError> {
let re = img_re();
let mut out = String::with_capacity(html.len());
let mut last = 0usize;
let mut inlined_count = 0u32;
let mut warnings = Vec::new();
let diagrams_root = session_dir.join("diagrams");
for m in re.captures_iter(html) {
let full = m.get(0).unwrap();
out.push_str(&html[last..full.start()]);
last = full.end();
let src = m.name("src").unwrap().as_str();
let alt = m.name("alt").map(|m| m.as_str()).unwrap_or("");
if !src.starts_with("diagrams/") || !src.to_ascii_lowercase().ends_with(".svg") {
out.push_str(full.as_str());
continue;
}
match resolve_diagram(src, &diagrams_root)? {
DiagramResolve::Inlinable(abs_path) => {
match std::fs::read_to_string(&abs_path) {
Ok(svg) => {
out.push_str("<div class=\"diagram\">");
out.push_str(&svg);
if !alt.is_empty() {
out.push_str("<p class=\"caption\">");
out.push_str(alt);
out.push_str("</p>");
}
out.push_str("</div>");
inlined_count += 1;
}
Err(_) => {
out.push_str(full.as_str());
warnings.push("diagram_fallback_img".into());
}
}
}
DiagramResolve::TooLarge | DiagramResolve::Missing | DiagramResolve::NotSvg => {
let fname = src.strip_prefix("diagrams/").unwrap_or(src);
let label = if alt.is_empty() { fname } else { alt };
out.push_str(&format!(
r#"<div class="diagram diagram-missing"><p class="diagram-missing-label">diagram pending</p><p class="caption">{label} — <code>{src}</code></p></div>"#
));
warnings.push("diagram_fallback_img".into());
}
}
}
out.push_str(&html[last..]);
Ok((out, inlined_count, warnings))
}
enum DiagramResolve {
Inlinable(PathBuf),
TooLarge,
Missing,
NotSvg,
}
fn resolve_diagram(src: &str, diagrams_root: &Path) -> Result<DiagramResolve, RenderError> {
let suffix = src.strip_prefix("diagrams/").unwrap_or(src);
let candidate = diagrams_root.join(suffix);
if suffix.contains("..") {
return Err(RenderError::DiagramOutOfBounds(candidate));
}
match std::fs::canonicalize(&candidate) {
Ok(canonical) => {
let root_canonical = std::fs::canonicalize(diagrams_root)
.unwrap_or_else(|_| diagrams_root.to_path_buf());
if !canonical.starts_with(&root_canonical) {
return Err(RenderError::DiagramOutOfBounds(canonical));
}
let meta = match std::fs::metadata(&canonical) {
Ok(m) => m,
Err(_) => return Ok(DiagramResolve::Missing),
};
if meta.len() > SVG_MAX_BYTES {
return Ok(DiagramResolve::TooLarge);
}
if canonical
.extension()
.and_then(|e| e.to_str())
.map(|e| e.eq_ignore_ascii_case("svg"))
!= Some(true)
{
return Ok(DiagramResolve::NotSvg);
}
Ok(DiagramResolve::Inlinable(canonical))
}
Err(_) => Ok(DiagramResolve::Missing),
}
}
fn section_num_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r"<h(?P<lvl>[1-6])>(?P<num>\d{1,2})\s*·\s*(?P<title>.+?)</h[1-6]>")
.expect("section num regex must compile")
})
}
fn apply_section_numbers(html: &str) -> String {
let re = section_num_re();
re.replace_all(html, |caps: ®ex::Captures| {
let lvl = caps.name("lvl").unwrap().as_str();
let num = caps.name("num").unwrap().as_str();
let title = caps.name("title").unwrap().as_str();
format!("<h{lvl}><span class=\"section-num\">{num}</span><span>{title}</span></h{lvl}>")
})
.into_owned()
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn render(md: &str) -> RenderResult {
let tmp = TempDir::new().unwrap();
render_body(md, tmp.path()).unwrap()
}
#[test]
fn aside_single_block_extracted() {
let md = "## Overview\nbody\n\n> **aside:** The less you build, the more it works.\n\nmore body\n";
let r = render(md);
assert!(r.aside_html.contains("The less you build"));
assert!(r.aside_html.starts_with("<p class=\"aside\">"));
assert!(!r.body_html.contains("The less you build"));
assert!(r.body_html.contains("more body"));
}
#[test]
fn aside_absent_leaves_body_untouched() {
let md = "## Overview\nno aside\n\n> just a plain quote\n";
let r = render(md);
assert!(r.aside_html.is_empty());
assert!(r.body_html.contains("plain quote"));
}
#[test]
fn multiple_asides_warn_and_keep_first() {
let md = "## Overview\nx\n\n> **aside:** first\n\nmid\n\n> **aside:** second\n";
let r = render(md);
assert!(r.aside_html.contains("first"));
assert!(!r.aside_html.contains("second"));
assert!(r.warnings.iter().any(|w| w == "aside_multiple"));
assert!(r.body_html.contains("<strong>aside:</strong>"));
}
#[test]
fn section_number_wrapped() {
let md = "## Overview\ncontent\n\n## 01 · WHY\nintro\n";
let r = render(md);
assert!(
r.body_html
.contains("<span class=\"section-num\">01</span>")
);
assert!(r.body_html.contains("<span>WHY</span>"));
assert!(!r.body_html.contains("<h2>01 ·"));
}
#[test]
fn section_without_pattern_unchanged() {
let md = "## Overview\n\n## Regular heading\nx\n";
let r = render(md);
assert!(r.body_html.contains("<h2>Regular heading</h2>"));
assert!(!r.body_html.contains("section-num"));
}
#[test]
fn diagram_inline_happy_path() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join("diagrams")).unwrap();
std::fs::write(
tmp.path().join("diagrams/foo.svg"),
"<svg xmlns=\"http://www.w3.org/2000/svg\"><circle r=\"5\"/></svg>",
)
.unwrap();
let md = "## Overview\nx\n\n\n";
let r = render_body(md, tmp.path()).unwrap();
assert_eq!(r.diagrams_inlined, 1);
assert!(r.body_html.contains("<div class=\"diagram\">"));
assert!(r.body_html.contains("<circle r=\"5\"/>"));
assert!(r.body_html.contains("<p class=\"caption\">Fig · demo</p>"));
assert!(!r.body_html.contains("<img"));
}
#[test]
fn render_wiki_page_keeps_body_without_overview_heading() {
let tmp = TempDir::new().unwrap();
let md = "# Scheduler\n\nThe scheduler coordinates workers.\n\nSecond paragraph.\n";
let r = render_wiki_page(md, tmp.path()).unwrap();
assert!(r.body_html.contains("<h1>Scheduler</h1>"));
assert!(r.body_html.contains("coordinates workers"));
assert!(r.body_html.contains("Second paragraph"));
}
#[test]
fn diagram_missing_renders_placeholder() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join("diagrams")).unwrap();
let md = "## Overview\nx\n\n\n";
let r = render_body(md, tmp.path()).unwrap();
assert_eq!(r.diagrams_inlined, 0);
assert!(!r.body_html.contains("<img src=\"diagrams/nope.svg\""));
assert!(r.body_html.contains(r#"class="diagram diagram-missing""#));
assert!(r.body_html.contains("diagram pending"));
assert!(r.warnings.iter().any(|w| w == "diagram_fallback_img"));
}
#[test]
fn diagram_out_of_bounds_is_fatal() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join("diagrams")).unwrap();
let md = "## Overview\nx\n\n\n";
let err = render_body(md, tmp.path()).unwrap_err();
matches!(err, RenderError::DiagramOutOfBounds(_));
}
#[test]
fn diagram_too_large_falls_back() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join("diagrams")).unwrap();
let big = "<svg>".to_string() + &"x".repeat((SVG_MAX_BYTES + 1) as usize) + "</svg>";
std::fs::write(tmp.path().join("diagrams/big.svg"), big).unwrap();
let md = "## Overview\nx\n\n\n";
let r = render_body(md, tmp.path()).unwrap();
assert_eq!(r.diagrams_inlined, 0);
assert!(r.warnings.iter().any(|w| w == "diagram_fallback_img"));
}
#[test]
fn non_diagram_img_untouched() {
let tmp = TempDir::new().unwrap();
let md = "## Overview\nx\n\n\n";
let r = render_body(md, tmp.path()).unwrap();
assert!(
r.body_html
.contains("<img src=\"https://example.com/pic.png\"")
);
assert_eq!(r.warnings.len(), 0);
}
#[test]
fn strip_scaffolding_removes_preamble() {
let md = "# Research: X\n\n## Objective\nfoo\n\n## Preset\ntech\n\n## Sources\n<!-- research:sources-start -->\n- foo\n<!-- research:sources-end -->\n\n## Overview\nthe real thing\n";
let r = render(md);
assert!(!r.body_html.contains("Research: X"));
assert!(!r.body_html.contains("Preset"));
assert!(!r.body_html.contains("research:sources-start"));
assert!(r.body_html.contains("the real thing"));
}
}