use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context, Result};
use rust_embed::RustEmbed;
use typst::foundations::{Bytes, Dict, IntoValue};
use typst_as_lib::typst_kit_options::TypstKitFontOptions;
use typst_as_lib::TypstEngine;
#[derive(RustEmbed)]
#[folder = "assets/typst/"]
struct TemplateAssets;
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DocFormat {
Pdf,
Html,
Md,
}
impl DocFormat {
pub fn parse(s: &str) -> Result<Self> {
match s.to_ascii_lowercase().as_str() {
"pdf" => Ok(Self::Pdf),
"html" => Ok(Self::Html),
"md" | "markdown" => Ok(Self::Md),
other => Err(anyhow!("unknown doc format: {other}")),
}
}
pub fn extension(self) -> &'static str {
match self {
Self::Pdf => "pdf",
Self::Html => "html",
Self::Md => "md",
}
}
}
#[derive(Debug, Clone)]
pub struct ExportMeta {
pub title: String,
pub version: String,
pub generated: String,
}
pub fn export(
markdown: &str,
meta: &ExportMeta,
format: DocFormat,
image_cache_dir: Option<&Path>,
) -> Result<Vec<u8>> {
match format {
DocFormat::Md => Ok(markdown.as_bytes().to_vec()),
DocFormat::Pdf => render_pdf(markdown, meta, image_cache_dir),
DocFormat::Html => render_html(markdown, meta),
}
}
fn render_pdf(
markdown: &str,
meta: &ExportMeta,
image_cache_dir: Option<&Path>,
) -> Result<Vec<u8>> {
let doc = compile(markdown, meta, image_cache_dir)?;
let options = Default::default();
typst_pdf::pdf(&doc, &options).map_err(|e| anyhow!("typst pdf export failed: {:?}", e))
}
fn render_html(markdown: &str, _meta: &ExportMeta) -> Result<Vec<u8>> {
use pulldown_cmark::{html, Options, Parser};
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
opts.insert(Options::ENABLE_FOOTNOTES);
let parser = Parser::new_ext(markdown, opts);
let mut out = String::with_capacity(markdown.len() * 2);
out.push_str("<!doctype html><html><head><meta charset=\"utf-8\">");
out.push_str("<title>");
out.push_str(&html_escape(&_meta.title));
out.push_str("</title>");
out.push_str("<style>body{font-family:system-ui,sans-serif;max-width:48rem;\
margin:2rem auto;padding:0 1rem;line-height:1.55;color:#222}\
code{background:#f2f2f4;padding:0.1em 0.3em;border-radius:3px;\
font-family:ui-monospace,Menlo,Consolas,monospace}\
pre{background:#f6f6f8;padding:0.8em;border-radius:5px;overflow:auto}\
pre code{background:transparent;padding:0}\
table{border-collapse:collapse}td,th{border:1px solid #ddd;padding:4px 8px}\
h1,h2,h3{font-weight:600}a{color:#0b66c2}</style></head><body>");
html::push_html(&mut out, parser);
out.push_str("</body></html>");
Ok(out.into_bytes())
}
fn compile(
markdown: &str,
meta: &ExportMeta,
image_cache_dir: Option<&Path>,
) -> Result<typst::layout::PagedDocument> {
let template = TemplateAssets::get("report.typ")
.ok_or_else(|| anyhow!("bundled typst template missing"))?;
let template_src = std::str::from_utf8(&template.data)
.context("template not utf-8")?
.to_owned();
let mut images = ImageResolver::new(image_cache_dir);
let body_typst = md_to_typst_with(markdown, &mut images);
let mut builder = TypstEngine::builder()
.main_file(template_src)
.search_fonts_with(
TypstKitFontOptions::default()
.include_system_fonts(false)
.include_embedded_fonts(true),
);
if !images.assets.is_empty() {
let pairs: Vec<(&str, Bytes)> = images
.assets
.iter()
.map(|(path, bytes)| (path.as_str(), Bytes::new(bytes.clone())))
.collect();
builder = builder.with_static_file_resolver(pairs);
}
let engine = builder.build();
let mut inputs = Dict::new();
inputs.insert("title".into(), meta.title.clone().into_value());
inputs.insert("version".into(), meta.version.clone().into_value());
inputs.insert("generated".into(), meta.generated.clone().into_value());
inputs.insert("body".into(), body_typst.into_value());
let result = engine.compile_with_input(inputs);
let doc = result
.output
.map_err(|err| anyhow!("typst compile failed: {:?}", err))?;
Ok(doc)
}
pub struct ImageResolver {
cache_dir: Option<PathBuf>,
assets: HashMap<String, Vec<u8>>,
}
impl ImageResolver {
fn new(cache_dir: Option<&Path>) -> Self {
Self {
cache_dir: cache_dir.map(|p| p.to_path_buf()),
assets: HashMap::new(),
}
}
fn resolve(&mut self, url: &str) -> Option<String> {
if !(url.starts_with("http://") || url.starts_with("https://")) {
return None;
}
let cache_dir = self.cache_dir.as_ref()?;
if std::fs::create_dir_all(cache_dir).is_err() {
return None;
}
let ext = guess_image_ext(url);
let key = url_hash(url);
let file_name = format!("{key}.{ext}");
let disk_path = cache_dir.join(&file_name);
let early_virtual = format!("/img/{file_name}");
if self.assets.contains_key(&early_virtual) {
return Some(early_virtual);
}
let raw_bytes = if let Ok(b) = std::fs::read(&disk_path) {
b
} else {
match fetch_url(url) {
Ok(b) => {
let _ = std::fs::write(&disk_path, &b);
b
}
Err(_) => return None,
}
};
let real_ext = sniff_image_ext(&raw_bytes).unwrap_or(ext);
let (final_ext, final_bytes) = match real_ext {
"png" | "gif" => match transcode_to_webp(&raw_bytes) {
Some(b) if b.len() < raw_bytes.len() => ("webp", b),
_ => (real_ext, raw_bytes),
},
other => (other, raw_bytes),
};
let final_name = format!("{key}.{final_ext}");
if final_ext == "webp" {
let webp_path = cache_dir.join(&final_name);
if !webp_path.exists() {
let _ = std::fs::write(&webp_path, &final_bytes);
}
}
let virtual_path = format!("/img/{final_name}");
if self.assets.contains_key(&virtual_path) {
return Some(virtual_path);
}
self.assets.insert(virtual_path.clone(), final_bytes);
Some(virtual_path)
}
}
fn transcode_to_webp(bytes: &[u8]) -> Option<Vec<u8>> {
let img = image::load_from_memory(bytes).ok()?;
let mut out = std::io::Cursor::new(Vec::with_capacity(bytes.len() / 2));
img.write_to(&mut out, image::ImageFormat::WebP).ok()?;
Some(out.into_inner())
}
fn sniff_image_ext(bytes: &[u8]) -> Option<&'static str> {
if bytes.len() >= 8 && &bytes[..8] == b"\x89PNG\r\n\x1a\n" {
Some("png")
} else if bytes.len() >= 3 && &bytes[..3] == b"\xff\xd8\xff" {
Some("jpg")
} else if bytes.len() >= 6 && (&bytes[..6] == b"GIF87a" || &bytes[..6] == b"GIF89a") {
Some("gif")
} else if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WEBP" {
Some("webp")
} else if bytes.len() >= 5 && (&bytes[..5] == b"<?xml" || &bytes[..4] == b"<svg") {
Some("svg")
} else {
None
}
}
fn fetch_url(url: &str) -> Result<Vec<u8>> {
let resp = ureq::AgentBuilder::new()
.timeout(std::time::Duration::from_secs(15))
.user_agent("nornir-docs-export/0.1")
.redirects(5)
.build()
.get(url)
.set("Accept", "image/webp,image/png,image/jpeg,image/*;q=0.8,*/*;q=0.5")
.call()
.map_err(|e| anyhow!("fetch {url}: {e}"))?;
if resp.status() / 100 != 2 {
return Err(anyhow!("fetch {url}: HTTP {}", resp.status()));
}
let mut buf = Vec::with_capacity(64 * 1024);
use std::io::Read;
let mut reader = resp.into_reader().take(5 * 1024 * 1024);
reader
.read_to_end(&mut buf)
.map_err(|e| anyhow!("read {url}: {e}"))?;
Ok(buf)
}
fn url_hash(url: &str) -> String {
let mut h = std::collections::hash_map::DefaultHasher::new();
url.hash(&mut h);
format!("{:016x}", h.finish())
}
fn guess_image_ext(url: &str) -> &'static str {
let path = url.split(['?', '#']).next().unwrap_or(url);
let lower = path.to_ascii_lowercase();
if lower.ends_with(".png") {
"png"
} else if lower.ends_with(".jpg") || lower.ends_with(".jpeg") {
"jpg"
} else if lower.ends_with(".gif") {
"gif"
} else if lower.ends_with(".svg") {
"svg"
} else if lower.ends_with(".webp") {
"webp"
} else {
"png"
}
}
pub fn md_to_typst(md: &str) -> String {
let mut noop = ImageResolver::new(None);
md_to_typst_with(md, &mut noop)
}
pub fn md_to_typst_with(md: &str, images: &mut ImageResolver) -> String {
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
let mut out = String::with_capacity(md.len() * 2);
let mut list_stack: Vec<Option<u64>> = Vec::new();
let mut in_code: Option<String> = None;
let mut table_cells: Vec<String> = Vec::new();
let mut cell_buf: Option<String> = None;
let mut link_url: Option<String> = None;
let push = |out: &mut String, cell_buf: &mut Option<String>, s: &str| {
if let Some(b) = cell_buf {
b.push_str(s);
} else {
out.push_str(s);
}
};
for ev in Parser::new_ext(md, opts) {
match ev {
Event::Start(Tag::Heading { level, .. }) => {
let n = match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
};
out.push('\n');
for _ in 0..n {
out.push('=');
}
out.push(' ');
}
Event::End(TagEnd::Heading(_)) => out.push_str("\n\n"),
Event::Start(Tag::Paragraph) => {}
Event::End(TagEnd::Paragraph) => out.push_str("\n\n"),
Event::Start(Tag::BlockQuote(_)) => out.push_str("#quote(block: true)[\n"),
Event::End(TagEnd::BlockQuote(_)) => out.push_str("]\n\n"),
Event::Start(Tag::CodeBlock(kind)) => {
let lang = match kind {
CodeBlockKind::Fenced(l) => l.to_string(),
CodeBlockKind::Indented => String::new(),
};
in_code = Some(lang);
out.push_str("\n```");
if let Some(l) = &in_code {
if !l.is_empty() {
out.push_str(l);
}
}
out.push('\n');
}
Event::End(TagEnd::CodeBlock) => {
in_code = None;
out.push_str("```\n\n");
}
Event::Start(Tag::List(start)) => {
list_stack.push(start);
out.push('\n');
}
Event::End(TagEnd::List(_)) => {
list_stack.pop();
out.push('\n');
}
Event::Start(Tag::Item) => {
let indent = " ".repeat(list_stack.len().saturating_sub(1));
out.push_str(&indent);
match list_stack.last().copied().flatten() {
Some(_) => out.push_str("+ "),
None => out.push_str("- "),
}
}
Event::End(TagEnd::Item) => out.push('\n'),
Event::Start(Tag::Emphasis) => push(&mut out, &mut cell_buf, "_"),
Event::End(TagEnd::Emphasis) => push(&mut out, &mut cell_buf, "_"),
Event::Start(Tag::Strong) => push(&mut out, &mut cell_buf, "*"),
Event::End(TagEnd::Strong) => push(&mut out, &mut cell_buf, "*"),
Event::Start(Tag::Strikethrough) => push(&mut out, &mut cell_buf, "#strike[ "),
Event::End(TagEnd::Strikethrough) => push(&mut out, &mut cell_buf, " ]"),
Event::Start(Tag::Link { dest_url, .. }) => {
link_url = Some(dest_url.to_string());
push(&mut out, &mut cell_buf, "#link(\"");
push(&mut out, &mut cell_buf, &escape_typst_str(&dest_url));
push(&mut out, &mut cell_buf, "\")[");
}
Event::End(TagEnd::Link) => {
link_url = None;
push(&mut out, &mut cell_buf, "]");
}
Event::Start(Tag::Image { dest_url, title, .. }) => {
let _ = title;
let url = dest_url.to_string();
let is_remote = url.starts_with("http://") || url.starts_with("https://");
if is_remote {
match images.resolve(&url) {
Some(virt) => {
push(&mut out, &mut cell_buf, "#image(\"");
push(&mut out, &mut cell_buf, &escape_typst_str(&virt));
push(&mut out, &mut cell_buf, "\")");
}
None => {
push(&mut out, &mut cell_buf, "_[remote image omitted]_");
}
}
} else {
push(&mut out, &mut cell_buf, "#image(\"");
push(&mut out, &mut cell_buf, &escape_typst_str(&url));
push(&mut out, &mut cell_buf, "\")");
}
}
Event::End(TagEnd::Image) => {}
Event::Start(Tag::Table(aligns)) => {
let cols = aligns.len().max(1);
table_cells.clear();
out.push_str(&format!("\n#table(\n columns: {},\n", cols));
}
Event::End(TagEnd::Table) => {
for c in table_cells.drain(..) {
out.push_str(" [");
out.push_str(&c);
out.push_str("],\n");
}
out.push_str(")\n\n");
}
Event::Start(Tag::TableHead) | Event::Start(Tag::TableRow) => {}
Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {}
Event::Start(Tag::TableCell) => {
cell_buf = Some(String::new());
}
Event::End(TagEnd::TableCell) => {
if let Some(b) = cell_buf.take() {
table_cells.push(b);
}
}
Event::Text(t) => {
if in_code.is_some() {
out.push_str(&t);
} else if link_url.is_some() {
push(&mut out, &mut cell_buf, &escape_typst_text(&t));
} else {
push(&mut out, &mut cell_buf, &escape_typst_text(&t));
}
}
Event::Code(t) => {
push(&mut out, &mut cell_buf, "`");
push(&mut out, &mut cell_buf, &t);
push(&mut out, &mut cell_buf, "`");
}
Event::Html(h) | Event::InlineHtml(h) => {
let s = h.to_string();
if s.contains("nornir:gen:") || s.contains("nornir:generated") {
continue;
}
let _ = s;
}
Event::SoftBreak => push(&mut out, &mut cell_buf, " "),
Event::HardBreak => push(&mut out, &mut cell_buf, " \\\n"),
Event::Rule => out.push_str("\n#line(length: 100%, stroke: 0.4pt + gray)\n\n"),
Event::FootnoteReference(_)
| Event::TaskListMarker(_)
| Event::InlineMath(_)
| Event::DisplayMath(_) => {}
Event::Start(_) | Event::End(_) => {}
}
}
out
}
fn escape_typst_text(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'\\' | '#' | '$' | '*' | '_' | '`' | '<' | '>' | '@' | '[' | ']' => {
out.push('\\');
out.push(c);
}
_ => out.push(c),
}
}
out
}
fn escape_typst_str(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
_ => out.push(c),
}
}
out
}
fn html_escape(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
}
pub fn export_repo(repo_root: &Path, format: DocFormat) -> Result<Vec<u8>> {
let md_path = repo_root.join("README.md");
let md = std::fs::read_to_string(&md_path)
.with_context(|| format!("read {}", md_path.display()))?;
let meta = read_meta(repo_root)?;
let cache_dir = repo_root.join(".nornir/cache/images");
export(&md, &meta, format, Some(&cache_dir))
}
fn read_meta(repo_root: &Path) -> Result<ExportMeta> {
let cargo_toml = repo_root.join("Cargo.toml");
let content = std::fs::read_to_string(&cargo_toml)
.with_context(|| format!("read {}", cargo_toml.display()))?;
let parsed: toml::Value = toml::from_str(&content)?;
let pkg = parsed
.get("package")
.or_else(|| parsed.get("workspace").and_then(|w| w.get("package")));
let dir_name = repo_root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("project")
.to_string();
let (title, version) = if let Some(p) = pkg {
let name = p
.get("name")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.unwrap_or_else(|| dir_name.clone());
let version = p
.get("version")
.and_then(|v| v.as_str())
.unwrap_or("0.0.0")
.to_string();
(name, version)
} else {
(dir_name, "0.0.0".to_string())
};
let generated = chrono::Utc::now().format("%Y-%m-%d").to_string();
Ok(ExportMeta {
title,
version,
generated,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn md_to_typst_basic() {
let md = "# Title\n\nSome *bold* text and `code`.\n\n- one\n- two\n";
let out = md_to_typst(md);
assert!(out.contains("= Title"));
assert!(out.contains("`code`"));
assert!(out.contains("- one"));
}
#[test]
fn md_to_typst_table() {
let md = "| a | b |\n|---|---|\n| 1 | 2 |\n";
let out = md_to_typst(md);
assert!(out.contains("#table("));
assert!(out.contains("columns: 2"));
}
#[test]
fn md_to_typst_link() {
let md = "see [docs](https://example.com)";
let out = md_to_typst(md);
assert!(out.contains("#link(\"https://example.com\")"));
}
#[test]
fn md_to_typst_code_fence() {
let md = "```rust\nfn main() {}\n```\n";
let out = md_to_typst(md);
assert!(out.contains("```rust"));
assert!(out.contains("fn main() {}"));
}
#[test]
fn format_parse() {
assert_eq!(DocFormat::parse("pdf").unwrap(), DocFormat::Pdf);
assert_eq!(DocFormat::parse("HTML").unwrap(), DocFormat::Html);
assert_eq!(DocFormat::parse("md").unwrap(), DocFormat::Md);
assert!(DocFormat::parse("xml").is_err());
}
#[test]
fn md_to_typst_remote_image_placeholder_without_cache() {
let md = "\n";
let out = md_to_typst(md);
assert!(
out.contains("remote image omitted"),
"expected placeholder, got: {out}"
);
}
#[test]
fn md_to_typst_local_image_kept() {
let md = "\n";
let out = md_to_typst(md);
assert!(out.contains("#image(\"./assets/logo.png\")"));
}
#[test]
fn md_passthrough() {
let md = "hello";
let meta = ExportMeta {
title: "t".into(),
version: "0".into(),
generated: "g".into(),
};
let out = export(md, &meta, DocFormat::Md, None).unwrap();
assert_eq!(out, b"hello");
}
#[test]
fn html_render_smoke() {
let md = "# H\n\ntext\n";
let meta = ExportMeta {
title: "T".into(),
version: "1".into(),
generated: "g".into(),
};
let out = export(md, &meta, DocFormat::Html, None).unwrap();
let s = String::from_utf8(out).unwrap();
assert!(s.contains("<h1>H</h1>"));
assert!(s.contains("<title>T</title>"));
}
}