use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context, Result};
use rust_embed::RustEmbed;
use typst::foundations::{Bytes, Dict, IntoValue};
use typst_as_lib::typst_kit_options::TypstKitFontOptions;
use typst_as_lib::TypstEngine;
#[derive(RustEmbed)]
#[folder = "assets/typst/"]
struct TemplateAssets;
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DocFormat {
Pdf,
Html,
Md,
}
impl DocFormat {
pub fn parse(s: &str) -> Result<Self> {
match s.to_ascii_lowercase().as_str() {
"pdf" => Ok(Self::Pdf),
"html" => Ok(Self::Html),
"md" | "markdown" => Ok(Self::Md),
other => Err(anyhow!("unknown doc format: {other}")),
}
}
pub fn extension(self) -> &'static str {
match self {
Self::Pdf => "pdf",
Self::Html => "html",
Self::Md => "md",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Target {
Paged,
Html,
}
#[derive(Debug, Clone, Default)]
pub struct ExportMeta {
pub title: String,
pub version: String,
pub generated: String,
pub cover_image: String,
}
pub fn export(
markdown: &str,
meta: &ExportMeta,
format: DocFormat,
image_cache_dir: Option<&Path>,
root: Option<&Path>,
) -> Result<Vec<u8>> {
match format {
DocFormat::Md => Ok(markdown.as_bytes().to_vec()),
DocFormat::Pdf => render_pdf(markdown, meta, image_cache_dir, root),
DocFormat::Html => render_html(markdown, meta, image_cache_dir, root),
}
}
fn render_pdf(
markdown: &str,
meta: &ExportMeta,
image_cache_dir: Option<&Path>,
root: Option<&Path>,
) -> Result<Vec<u8>> {
let (engine, inputs) =
build_engine(markdown, meta, image_cache_dir, root, Target::Paged)?;
let doc: typst::layout::PagedDocument = engine
.compile_with_input(inputs)
.output
.map_err(|err| anyhow!("typst compile failed: {:?}", err))?;
let options = Default::default();
typst_pdf::pdf(&doc, &options).map_err(|e| anyhow!("typst pdf export failed: {:?}", e))
}
fn render_html(
markdown: &str,
meta: &ExportMeta,
image_cache_dir: Option<&Path>,
root: Option<&Path>,
) -> Result<Vec<u8>> {
let (engine, inputs) =
build_engine_with_template(markdown, meta, image_cache_dir, root, Target::Html, "web.typ")?;
let doc: typst_html::HtmlDocument = engine
.compile_with_input(inputs)
.output
.map_err(|err| anyhow!("typst html compile failed: {:?}", err))?;
let html = typst_html::html(&doc)
.map_err(|err| anyhow!("typst html serialization failed: {:?}", err))?;
Ok(html.into_bytes())
}
fn build_engine(
markdown: &str,
meta: &ExportMeta,
image_cache_dir: Option<&Path>,
root: Option<&Path>,
target: Target,
) -> Result<(typst_as_lib::TypstEngine<typst_as_lib::TypstTemplateMainFile>, Dict)> {
build_engine_with_template(markdown, meta, image_cache_dir, root, target, "report.typ")
}
fn build_engine_with_template(
markdown: &str,
meta: &ExportMeta,
image_cache_dir: Option<&Path>,
root: Option<&Path>,
target: Target,
template_name: &str,
) -> Result<(typst_as_lib::TypstEngine<typst_as_lib::TypstTemplateMainFile>, Dict)> {
let template = TemplateAssets::get(template_name)
.ok_or_else(|| anyhow!("bundled typst template missing: {template_name}"))?;
let template_src = std::str::from_utf8(&template.data)
.context("template not utf-8")?
.to_owned();
let mut images = ImageResolver::new(image_cache_dir);
let body_typst = md_to_typst_with(markdown, &mut images, target);
let mut builder = TypstEngine::builder()
.main_file(template_src)
.search_fonts_with(
TypstKitFontOptions::default()
.include_system_fonts(false)
.include_embedded_fonts(true),
);
if !images.assets.is_empty() {
let pairs: Vec<(&str, Bytes)> = images
.assets
.iter()
.map(|(path, bytes)| (path.as_str(), Bytes::new(bytes.clone())))
.collect();
builder = builder.with_static_file_resolver(pairs);
}
if let Some(r) = root {
builder = builder.with_file_system_resolver(r.to_path_buf());
}
let engine = builder.build();
let mut inputs = Dict::new();
inputs.insert("title".into(), meta.title.clone().into_value());
inputs.insert("version".into(), meta.version.clone().into_value());
inputs.insert("generated".into(), meta.generated.clone().into_value());
inputs.insert("cover_image".into(), meta.cover_image.clone().into_value());
inputs.insert("body".into(), body_typst.into_value());
Ok((engine, inputs))
}
pub struct ImageResolver {
cache_dir: Option<PathBuf>,
assets: HashMap<String, Vec<u8>>,
}
impl ImageResolver {
fn new(cache_dir: Option<&Path>) -> Self {
Self {
cache_dir: cache_dir.map(|p| p.to_path_buf()),
assets: HashMap::new(),
}
}
fn resolve(&mut self, url: &str) -> Option<String> {
if !(url.starts_with("http://") || url.starts_with("https://")) {
return None;
}
let cache_dir = self.cache_dir.as_ref()?;
if std::fs::create_dir_all(cache_dir).is_err() {
return None;
}
let ext = guess_image_ext(url);
let key = url_hash(url);
let file_name = format!("{key}.{ext}");
let disk_path = cache_dir.join(&file_name);
let early_virtual = format!("/img/{file_name}");
if self.assets.contains_key(&early_virtual) {
return Some(early_virtual);
}
let raw_bytes = if let Ok(b) = std::fs::read(&disk_path) {
b
} else {
match fetch_url(url) {
Ok(b) => {
let _ = std::fs::write(&disk_path, &b);
b
}
Err(_) => return None,
}
};
let real_ext = sniff_image_ext(&raw_bytes).unwrap_or(ext);
let (final_ext, final_bytes) = match real_ext {
"png" | "gif" => match transcode_to_webp(&raw_bytes) {
Some(b) if b.len() < raw_bytes.len() => ("webp", b),
_ => (real_ext, raw_bytes),
},
other => (other, raw_bytes),
};
let final_name = format!("{key}.{final_ext}");
if final_ext == "webp" {
let webp_path = cache_dir.join(&final_name);
if !webp_path.exists() {
let _ = std::fs::write(&webp_path, &final_bytes);
}
}
let virtual_path = format!("/img/{final_name}");
if self.assets.contains_key(&virtual_path) {
return Some(virtual_path);
}
self.assets.insert(virtual_path.clone(), final_bytes);
Some(virtual_path)
}
}
fn transcode_to_webp(bytes: &[u8]) -> Option<Vec<u8>> {
let img = image::load_from_memory(bytes).ok()?;
let mut out = std::io::Cursor::new(Vec::with_capacity(bytes.len() / 2));
img.write_to(&mut out, image::ImageFormat::WebP).ok()?;
Some(out.into_inner())
}
fn sniff_image_ext(bytes: &[u8]) -> Option<&'static str> {
if bytes.len() >= 8 && &bytes[..8] == b"\x89PNG\r\n\x1a\n" {
Some("png")
} else if bytes.len() >= 3 && &bytes[..3] == b"\xff\xd8\xff" {
Some("jpg")
} else if bytes.len() >= 6 && (&bytes[..6] == b"GIF87a" || &bytes[..6] == b"GIF89a") {
Some("gif")
} else if bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WEBP" {
Some("webp")
} else if bytes.len() >= 5 && (&bytes[..5] == b"<?xml" || &bytes[..4] == b"<svg") {
Some("svg")
} else {
None
}
}
fn fetch_url(url: &str) -> Result<Vec<u8>> {
let resp = ureq::AgentBuilder::new()
.timeout(std::time::Duration::from_secs(15))
.user_agent("nornir-docs-export/0.1")
.redirects(5)
.build()
.get(url)
.set("Accept", "image/webp,image/png,image/jpeg,image/*;q=0.8,*/*;q=0.5")
.call()
.map_err(|e| anyhow!("fetch {url}: {e}"))?;
if resp.status() / 100 != 2 {
return Err(anyhow!("fetch {url}: HTTP {}", resp.status()));
}
let mut buf = Vec::with_capacity(64 * 1024);
use std::io::Read;
let mut reader = resp.into_reader().take(5 * 1024 * 1024);
reader
.read_to_end(&mut buf)
.map_err(|e| anyhow!("read {url}: {e}"))?;
Ok(buf)
}
fn url_hash(url: &str) -> String {
let mut h = std::collections::hash_map::DefaultHasher::new();
url.hash(&mut h);
format!("{:016x}", h.finish())
}
fn guess_image_ext(url: &str) -> &'static str {
let path = url.split(['?', '#']).next().unwrap_or(url);
let lower = path.to_ascii_lowercase();
if lower.ends_with(".png") {
"png"
} else if lower.ends_with(".jpg") || lower.ends_with(".jpeg") {
"jpg"
} else if lower.ends_with(".gif") {
"gif"
} else if lower.ends_with(".svg") {
"svg"
} else if lower.ends_with(".webp") {
"webp"
} else {
"png"
}
}
pub fn md_to_typst(md: &str) -> String {
let mut noop = ImageResolver::new(None);
md_to_typst_with(md, &mut noop, Target::Paged)
}
pub fn md_to_typst_with(md: &str, images: &mut ImageResolver, target: Target) -> String {
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
let mut out = String::with_capacity(md.len() * 2);
let mut list_stack: Vec<Option<u64>> = Vec::new();
let mut in_code: Option<String> = None;
let mut code_buf = String::new();
let mut in_image = false;
let mut table_cells: Vec<String> = Vec::new();
let mut cell_buf: Option<String> = None;
let mut link_url: Option<String> = None;
let push = |out: &mut String, cell_buf: &mut Option<String>, s: &str| {
if let Some(b) = cell_buf {
b.push_str(s);
} else {
out.push_str(s);
}
};
for ev in Parser::new_ext(md, opts) {
match ev {
Event::Start(Tag::Heading { level, .. }) => {
let n = match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
};
out.push('\n');
for _ in 0..n {
out.push('=');
}
out.push(' ');
}
Event::End(TagEnd::Heading(_)) => out.push_str("\n\n"),
Event::Start(Tag::Paragraph) => {}
Event::End(TagEnd::Paragraph) => out.push_str("\n\n"),
Event::Start(Tag::BlockQuote(_)) => out.push_str("#quote(block: true)[\n"),
Event::End(TagEnd::BlockQuote(_)) => out.push_str("]\n\n"),
Event::Start(Tag::CodeBlock(kind)) => {
let lang = match kind {
CodeBlockKind::Fenced(l) => l.to_string(),
CodeBlockKind::Indented => String::new(),
};
in_code = Some(lang);
code_buf.clear();
}
Event::End(TagEnd::CodeBlock) => {
let lang = in_code.take().unwrap_or_default();
out.push_str(&typst_raw_block(&lang, &code_buf));
code_buf.clear();
}
Event::Start(Tag::List(start)) => {
list_stack.push(start);
out.push('\n');
}
Event::End(TagEnd::List(_)) => {
list_stack.pop();
out.push('\n');
}
Event::Start(Tag::Item) => {
let indent = " ".repeat(list_stack.len().saturating_sub(1));
out.push_str(&indent);
match list_stack.last().copied().flatten() {
Some(_) => out.push_str("+ "),
None => out.push_str("- "),
}
}
Event::End(TagEnd::Item) => out.push('\n'),
Event::Start(Tag::Emphasis) => push(&mut out, &mut cell_buf, "_"),
Event::End(TagEnd::Emphasis) => push(&mut out, &mut cell_buf, "_"),
Event::Start(Tag::Strong) => push(&mut out, &mut cell_buf, "*"),
Event::End(TagEnd::Strong) => push(&mut out, &mut cell_buf, "*"),
Event::Start(Tag::Strikethrough) => push(&mut out, &mut cell_buf, "#strike[ "),
Event::End(TagEnd::Strikethrough) => push(&mut out, &mut cell_buf, " ]"),
Event::Start(Tag::Link { dest_url, .. }) => {
link_url = Some(dest_url.to_string());
push(&mut out, &mut cell_buf, "#link(\"");
push(&mut out, &mut cell_buf, &escape_typst_str(&dest_url));
push(&mut out, &mut cell_buf, "\")[");
}
Event::End(TagEnd::Link) => {
link_url = None;
push(&mut out, &mut cell_buf, "]");
}
Event::Start(Tag::Image { dest_url, title, .. }) => {
let _ = title;
in_image = true;
let url = dest_url.to_string();
let is_remote = url.starts_with("http://") || url.starts_with("https://");
if is_remote {
match images.resolve(&url) {
Some(virt) => {
push(&mut out, &mut cell_buf, "#image(\"");
push(&mut out, &mut cell_buf, &escape_typst_str(&virt));
push(&mut out, &mut cell_buf, "\")");
}
None => {
push(&mut out, &mut cell_buf, "_[remote image omitted]_");
}
}
} else {
let local = local_image_path(&url);
push(&mut out, &mut cell_buf, "#image(\"");
push(&mut out, &mut cell_buf, &escape_typst_str(&local));
push(&mut out, &mut cell_buf, "\")");
}
}
Event::End(TagEnd::Image) => in_image = false,
Event::Start(Tag::Table(aligns)) => {
let cols = aligns.len().max(1);
table_cells.clear();
out.push_str(&format!("\n#table(\n columns: {},\n", cols));
}
Event::End(TagEnd::Table) => {
for c in table_cells.drain(..) {
out.push_str(" [");
out.push_str(&c);
out.push_str("],\n");
}
out.push_str(")\n\n");
}
Event::Start(Tag::TableHead) | Event::Start(Tag::TableRow) => {}
Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {}
Event::Start(Tag::TableCell) => {
cell_buf = Some(String::new());
}
Event::End(TagEnd::TableCell) => {
if let Some(b) = cell_buf.take() {
table_cells.push(b);
}
}
Event::Text(t) => {
if in_code.is_some() {
code_buf.push_str(&t);
} else if in_image {
} else {
let _ = &link_url;
push(&mut out, &mut cell_buf, &escape_typst_text(&t));
}
}
Event::Code(t) => {
push(&mut out, &mut cell_buf, &typst_raw_inline(&t));
}
Event::Html(h) | Event::InlineHtml(h) => {
let s = h.to_string();
if s.contains("nornir:gen:") || s.contains("nornir:generated") {
continue;
}
if let Some(src) = extract_img_src(&s) {
let is_remote = src.starts_with("http://") || src.starts_with("https://");
let path = if is_remote {
images.resolve(&src)
} else {
Some(local_image_path(&src))
};
if let Some(p) = path {
let esc = escape_typst_str(&p);
match target {
Target::Paged => {
push(&mut out, &mut cell_buf, "\n#align(center, image(\"");
push(&mut out, &mut cell_buf, &esc);
push(&mut out, &mut cell_buf, "\", width: 70%))\n\n");
}
Target::Html => {
push(&mut out, &mut cell_buf, "\n#image(\"");
push(&mut out, &mut cell_buf, &esc);
push(&mut out, &mut cell_buf, "\")\n\n");
}
}
}
}
}
Event::SoftBreak => push(&mut out, &mut cell_buf, " "),
Event::HardBreak => push(&mut out, &mut cell_buf, " \\\n"),
Event::Rule => match target {
Target::Paged => {
out.push_str("\n#line(length: 100%, stroke: 0.4pt + gray)\n\n")
}
Target::Html => out.push_str("\n#html.elem(\"hr\")\n\n"),
},
Event::FootnoteReference(_)
| Event::TaskListMarker(_)
| Event::InlineMath(_)
| Event::DisplayMath(_) => {}
Event::Start(_) | Event::End(_) => {}
}
}
out
}
fn max_backtick_run(s: &str) -> usize {
let mut max = 0;
let mut cur = 0;
for c in s.chars() {
if c == '`' {
cur += 1;
max = max.max(cur);
} else {
cur = 0;
}
}
max
}
fn typst_raw_inline(content: &str) -> String {
let fence = "`".repeat(max_backtick_run(content) + 1);
if content.starts_with('`') || content.ends_with('`') {
format!("{fence} {content} {fence}")
} else {
format!("{fence}{content}{fence}")
}
}
fn typst_raw_block(lang: &str, content: &str) -> String {
let fence = "`".repeat((max_backtick_run(content) + 1).max(3));
let mut body = content.to_string();
if !body.ends_with('\n') {
body.push('\n');
}
format!("\n{fence}{lang}\n{body}{fence}\n\n")
}
fn local_image_path(url: &str) -> String {
let trimmed = url.strip_prefix("./").unwrap_or(url);
format!("/{}", trimmed.trim_start_matches('/'))
}
fn extract_img_src(html: &str) -> Option<String> {
let lower = html.to_ascii_lowercase();
if !lower.contains("<img") {
return None;
}
let at = lower.find("src=")? + 4;
let rest = &html[at..];
let quote = rest.chars().next()?;
if quote != '"' && quote != '\'' {
return None;
}
let end = rest[1..].find(quote)?;
Some(rest[1..1 + end].to_string())
}
fn escape_typst_text(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
match c {
'\\' | '#' | '$' | '*' | '_' | '`' | '<' | '>' | '@' | '[' | ']' => {
out.push('\\');
out.push(c);
}
'/' if matches!(chars.peek().copied(), Some('/') | Some('*')) => {
out.push('\\');
out.push('/');
}
_ => out.push(c),
}
}
out
}
fn escape_typst_str(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
_ => out.push(c),
}
}
out
}
pub fn export_repo(repo_root: &Path, format: DocFormat) -> Result<Vec<u8>> {
let md_path = repo_root.join("README.md");
let md = std::fs::read_to_string(&md_path)
.with_context(|| format!("read {}", md_path.display()))?;
let meta = read_meta(repo_root)?;
let cache_dir = repo_root.join(".nornir/cache/images");
export(&md, &meta, format, Some(&cache_dir), Some(repo_root))
}
fn read_meta(repo_root: &Path) -> Result<ExportMeta> {
let cargo_toml = repo_root.join("Cargo.toml");
let content = std::fs::read_to_string(&cargo_toml)
.with_context(|| format!("read {}", cargo_toml.display()))?;
let parsed: toml::Value = toml::from_str(&content)?;
let pkg = parsed
.get("package")
.or_else(|| parsed.get("workspace").and_then(|w| w.get("package")));
let dir_name = repo_root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("project")
.to_string();
let (title, version) = if let Some(p) = pkg {
let name = p
.get("name")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.unwrap_or_else(|| dir_name.clone());
let version = p
.get("version")
.and_then(|v| v.as_str())
.unwrap_or("0.0.0")
.to_string();
(name, version)
} else {
(dir_name, "0.0.0".to_string())
};
let generated = chrono::Utc::now().format("%Y-%m-%d").to_string();
Ok(ExportMeta {
title,
version,
generated,
cover_image: String::new(),
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn md_to_typst_basic() {
let md = "# Title\n\nSome *bold* text and `code`.\n\n- one\n- two\n";
let out = md_to_typst(md);
assert!(out.contains("= Title"));
assert!(out.contains("`code`"));
assert!(out.contains("- one"));
}
#[test]
fn md_to_typst_table() {
let md = "| a | b |\n|---|---|\n| 1 | 2 |\n";
let out = md_to_typst(md);
assert!(out.contains("#table("));
assert!(out.contains("columns: 2"));
}
#[test]
fn md_to_typst_link() {
let md = "see [docs](https://example.com)";
let out = md_to_typst(md);
assert!(out.contains("#link(\"https://example.com\")"));
}
#[test]
fn md_to_typst_code_fence() {
let md = "```rust\nfn main() {}\n```\n";
let out = md_to_typst(md);
assert!(out.contains("```rust"));
assert!(out.contains("fn main() {}"));
}
#[test]
fn format_parse() {
assert_eq!(DocFormat::parse("pdf").unwrap(), DocFormat::Pdf);
assert_eq!(DocFormat::parse("HTML").unwrap(), DocFormat::Html);
assert_eq!(DocFormat::parse("md").unwrap(), DocFormat::Md);
assert!(DocFormat::parse("xml").is_err());
}
#[test]
fn md_to_typst_remote_image_placeholder_without_cache() {
let md = "\n";
let out = md_to_typst(md);
assert!(
out.contains("remote image omitted"),
"expected placeholder, got: {out}"
);
}
#[test]
fn md_to_typst_local_image_root_anchored() {
let out = md_to_typst("\n");
assert!(out.contains("#image(\"/.nornir/assets/logo.png\")"), "got: {out}");
let out2 = md_to_typst("\n");
assert!(out2.contains("#image(\"/assets/logo.png\")"), "got: {out2}");
}
#[test]
fn html_img_block_becomes_centered_image() {
let md = "<p align=\"center\"><img src=\".nornir/assets/logo.webp\" width=\"720\" /></p>\n";
let out = md_to_typst(md);
assert!(out.contains("#align(center, image(\"/.nornir/assets/logo.webp\""), "got: {out}");
}
#[test]
fn extract_img_src_handles_quotes() {
assert_eq!(extract_img_src("<img src=\"a/b.png\">").as_deref(), Some("a/b.png"));
assert_eq!(extract_img_src("<img class=x src='c.svg' />").as_deref(), Some("c.svg"));
assert_eq!(extract_img_src("<p>no image</p>"), None);
}
#[test]
fn md_passthrough() {
let md = "hello";
let meta = ExportMeta {
title: "t".into(),
version: "0".into(),
generated: "g".into(),
cover_image: String::new(),
};
let out = export(md, &meta, DocFormat::Md, None, None).unwrap();
assert_eq!(out, b"hello");
}
#[test]
fn url_in_text_does_not_open_typst_comment() {
let out = md_to_typst("**https://codeberg.org/nordisk/znippy**\n");
assert!(out.contains("https:\\//codeberg.org"), "comment-start slash not escaped: {out}");
let path = md_to_typst("see a/b/c here\n");
assert!(path.contains("a/b/c"), "lone slashes should not be escaped: {path}");
let blk = md_to_typst("a /* b\n");
assert!(!blk.contains("/*"), "bare /* would open a block comment: {blk}");
}
#[test]
fn emphasised_url_compiles_to_pdf() {
let md = "# moved\n\n**https://codeberg.org/nordisk/znippy**\n";
let meta = ExportMeta {
title: "t".into(),
version: "0".into(),
generated: "g".into(),
cover_image: String::new(),
};
let out = export(md, &meta, DocFormat::Pdf, None, None).expect("pdf compile");
assert!(!out.is_empty());
}
#[test]
fn html_render_smoke() {
let md = "# H\n\ntext\n";
let meta = ExportMeta {
title: "T".into(),
version: "1".into(),
generated: "g".into(),
cover_image: String::new(),
};
let out = export(md, &meta, DocFormat::Html, None, None).unwrap();
let s = String::from_utf8(out).unwrap();
assert!(s.contains('H'), "expected heading text in html, got: {s}");
assert!(s.to_ascii_lowercase().contains("<html"), "expected html doc, got: {s}");
}
}