use std::collections::BTreeMap;
use std::path::{Component, Path, PathBuf};
use std::sync::OnceLock;
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Parser, Tag, TagEnd};
use syntect::html::{ClassStyle, ClassedHTMLGenerator};
use syntect::parsing::SyntaxSet;
use syntect::util::LinesWithEndings;
use crate::domain::model::alert::extract_alerts;
use crate::domain::model::body::Body;
#[derive(Debug, Default, Clone)]
pub struct MarkdownContext {
pub has_mermaid: bool,
pub link_resolver: Option<LinkResolver>,
}
#[derive(Debug, Clone, Default)]
pub struct LinkResolver {
pub current_source_dir: PathBuf,
pub by_source: BTreeMap<PathBuf, String>,
}
impl LinkResolver {
pub fn resolve(&self, dest: &str) -> Option<String> {
if dest.starts_with("http://")
|| dest.starts_with("https://")
|| dest.starts_with("mailto:")
|| dest.starts_with('/')
|| dest.starts_with('#')
{
return None;
}
let (target, fragment) = match dest.find('#') {
Some(i) => (&dest[..i], Some(&dest[i..])),
None => (dest, None),
};
if !target.ends_with(".md") {
return None;
}
let joined = self.current_source_dir.join(target);
let normalised = normalise_path(&joined);
let url = self.by_source.get(&normalised)?;
match fragment {
Some(frag) => Some(format!("{url}{frag}")),
None => Some(url.clone()),
}
}
}
fn normalise_path(p: &Path) -> PathBuf {
let mut stack: Vec<Component<'_>> = Vec::new();
for comp in p.components() {
match comp {
Component::CurDir => {}
Component::ParentDir => {
stack.pop();
}
other => stack.push(other),
}
}
stack.iter().collect()
}
pub fn render_body(markdown: &str, ctx: &mut MarkdownContext) -> String {
let mut events: Vec<Event<'_>> = Vec::new();
let mut in_mermaid = false;
let mut mermaid_buf = String::new();
let mut in_code = false;
let mut code_lang = String::new();
let mut code_buf = String::new();
let mut bq_depth: usize = 0;
let mut bq_buffer: Vec<Event<'_>> = Vec::new();
let mut bq_start: usize = 0;
for (event, range) in Parser::new(markdown).into_offset_iter() {
let event = rewrite_link_event(event, ctx.link_resolver.as_ref());
match &event {
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang)))
if lang.as_ref() == "mermaid" =>
{
in_mermaid = true;
mermaid_buf.clear();
ctx.has_mermaid = true;
}
Event::End(TagEnd::CodeBlock) if in_mermaid => {
in_mermaid = false;
let html = format!(
"<pre class=\"mermaid\">{}</pre>\n",
escape_html(&mermaid_buf)
);
push_event(
&mut events,
&mut bq_buffer,
bq_depth,
Event::Html(CowStr::from(html)),
);
}
Event::Text(t) if in_mermaid => mermaid_buf.push_str(t.as_ref()),
_ if in_mermaid => {}
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) => {
in_code = true;
code_lang = lang.as_ref().to_string();
code_buf.clear();
}
Event::End(TagEnd::CodeBlock) if in_code => {
in_code = false;
let html = render_code_block(&code_lang, &code_buf);
push_event(
&mut events,
&mut bq_buffer,
bq_depth,
Event::Html(CowStr::from(html)),
);
}
Event::Text(t) if in_code => code_buf.push_str(t.as_ref()),
_ if in_code => {}
Event::Start(Tag::BlockQuote(_)) if bq_depth == 0 => {
bq_depth = 1;
bq_start = range.start;
bq_buffer.clear();
bq_buffer.push(event);
}
Event::Start(Tag::BlockQuote(_)) => {
bq_depth += 1;
bq_buffer.push(event);
}
Event::End(TagEnd::BlockQuote) if bq_depth == 1 => {
bq_depth = 0;
bq_buffer.push(event);
let slice = &markdown[bq_start..range.end];
let alerts = extract_alerts(&Body::new(slice));
if let Some(alert) = alerts.into_iter().next() {
let class = format!("alert alert-{}", alert.marker.to_lowercase());
let inner = render_body(&alert.content, ctx);
events.push(Event::Html(CowStr::from(format!(
"<aside class=\"{class}\">\n{inner}</aside>\n"
))));
bq_buffer.clear();
} else {
events.append(&mut bq_buffer);
}
}
Event::End(TagEnd::BlockQuote) => {
bq_depth -= 1;
bq_buffer.push(event);
}
_ if bq_depth > 0 => {
bq_buffer.push(event);
}
_ => events.push(event),
}
}
let mut out = String::new();
html::push_html(&mut out, events.into_iter());
out
}
fn rewrite_link_event<'a>(event: Event<'a>, resolver: Option<&LinkResolver>) -> Event<'a> {
let Some(resolver) = resolver else {
return event;
};
match event {
Event::Start(Tag::Link {
link_type,
dest_url,
title,
id,
}) => {
let new_url = resolver
.resolve(dest_url.as_ref())
.map(CowStr::from)
.unwrap_or(dest_url);
Event::Start(Tag::Link {
link_type,
dest_url: new_url,
title,
id,
})
}
other => other,
}
}
fn push_event<'a>(
events: &mut Vec<Event<'a>>,
bq_buffer: &mut Vec<Event<'a>>,
bq_depth: usize,
event: Event<'a>,
) {
if bq_depth > 0 {
bq_buffer.push(event);
} else {
events.push(event);
}
}
pub fn extract_first_paragraph(markdown: &str) -> Option<String> {
let mut depth = 0;
let mut text = String::new();
for event in Parser::new(markdown) {
match event {
Event::Start(Tag::Paragraph) => {
depth = 1;
text.clear();
}
Event::End(TagEnd::Paragraph) if depth > 0 => {
let trimmed = text.trim();
if !trimmed.is_empty() {
return Some(trimmed.to_string());
}
depth = 0;
}
Event::Text(t) | Event::Code(t) if depth > 0 => text.push_str(t.as_ref()),
Event::SoftBreak | Event::HardBreak if depth > 0 => text.push(' '),
_ => {}
}
}
None
}
fn syntax_set() -> &'static SyntaxSet {
static SET: OnceLock<SyntaxSet> = OnceLock::new();
SET.get_or_init(SyntaxSet::load_defaults_newlines)
}
fn render_code_block(lang: &str, code: &str) -> String {
let token = lang.split_whitespace().next().unwrap_or("");
let class_attr = if token.is_empty() {
String::new()
} else {
format!(" class=\"language-{}\"", escape_html(token))
};
let ss = syntax_set();
let syntax = if token.is_empty() {
None
} else {
ss.find_syntax_by_token(token)
};
let body = match syntax {
Some(syntax) => {
let mut gen = ClassedHTMLGenerator::new_with_class_style(
syntax,
ss,
ClassStyle::SpacedPrefixed { prefix: "hl-" },
);
for line in LinesWithEndings::from(code) {
if gen
.parse_html_for_line_which_includes_newline(line)
.is_err()
{
return format!(
"<pre><code{class_attr}>{}</code></pre>\n",
escape_html(code)
);
}
}
gen.finalize()
}
None => escape_html(code),
};
format!("<pre><code{class_attr}>{body}</code></pre>\n")
}
fn escape_html(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'&' => out.push_str("&"),
_ => out.push(c),
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn resolver_with(current: &str, pairs: &[(&str, &str)]) -> LinkResolver {
LinkResolver {
current_source_dir: PathBuf::from(current),
by_source: pairs
.iter()
.map(|(s, u)| (PathBuf::from(*s), (*u).to_string()))
.collect(),
}
}
fn ctx_with(resolver: LinkResolver) -> MarkdownContext {
MarkdownContext {
has_mermaid: false,
link_resolver: Some(resolver),
}
}
#[test]
fn resolver_rewrites_sibling_md_link() {
let r = resolver_with("guides", &[("guides/intro.md", "/pages/guides/intro/")]);
assert_eq!(
r.resolve("intro.md").as_deref(),
Some("/pages/guides/intro/")
);
}
#[test]
fn resolver_keeps_fragment_after_rewrite() {
let r = resolver_with("guides", &[("guides/intro.md", "/pages/guides/intro/")]);
assert_eq!(
r.resolve("intro.md#section").as_deref(),
Some("/pages/guides/intro/#section")
);
}
#[test]
fn resolver_handles_dot_prefix() {
let r = resolver_with("guides", &[("guides/intro.md", "/pages/guides/intro/")]);
assert_eq!(
r.resolve("./intro.md").as_deref(),
Some("/pages/guides/intro/")
);
}
#[test]
fn resolver_handles_parent_dir() {
let r = resolver_with("guides/sub", &[("guides/intro.md", "/pages/guides/intro/")]);
assert_eq!(
r.resolve("../intro.md").as_deref(),
Some("/pages/guides/intro/")
);
}
#[test]
fn resolver_returns_none_for_unknown_target() {
let r = resolver_with("guides", &[("guides/intro.md", "/pages/guides/intro/")]);
assert_eq!(r.resolve("missing.md"), None);
}
#[test]
fn resolver_ignores_external_urls() {
let r = resolver_with("guides", &[]);
assert_eq!(r.resolve("https://example.com"), None);
assert_eq!(r.resolve("http://example.com/foo.md"), None);
assert_eq!(r.resolve("mailto:user@example.com"), None);
}
#[test]
fn resolver_ignores_absolute_paths() {
let r = resolver_with("guides", &[]);
assert_eq!(r.resolve("/pages/other/"), None);
}
#[test]
fn resolver_ignores_pure_fragment() {
let r = resolver_with("guides", &[]);
assert_eq!(r.resolve("#section"), None);
}
#[test]
fn resolver_ignores_non_md_relative_links() {
let r = resolver_with("guides", &[]);
assert_eq!(r.resolve("image.png"), None);
}
#[test]
fn render_body_rewrites_known_md_link_with_resolver() {
let mut ctx = ctx_with(resolver_with(
"guides",
&[("guides/intro.md", "/pages/guides/intro/")],
));
let html = render_body("See [intro](intro.md).", &mut ctx);
assert!(
html.contains(r#"href="/pages/guides/intro/""#),
"got: {html}"
);
}
#[test]
fn render_body_leaves_unknown_md_link_untouched() {
let mut ctx = ctx_with(resolver_with("guides", &[]));
let html = render_body("See [missing](other.md).", &mut ctx);
assert!(html.contains(r#"href="other.md""#), "got: {html}");
}
#[test]
fn render_body_without_resolver_does_not_rewrite_links() {
let mut ctx = MarkdownContext::default();
let html = render_body("[X](foo.md)", &mut ctx);
assert!(html.contains(r#"href="foo.md""#), "got: {html}");
}
#[test]
fn render_body_empty_input_yields_empty_output() {
let mut ctx = MarkdownContext::default();
assert_eq!(render_body("", &mut ctx), "");
assert!(!ctx.has_mermaid);
}
#[test]
fn render_body_renders_a_simple_paragraph() {
let mut ctx = MarkdownContext::default();
let html = render_body("Hello world.", &mut ctx);
assert_eq!(html, "<p>Hello world.</p>\n");
assert!(!ctx.has_mermaid);
}
#[test]
fn render_body_emits_pre_class_mermaid_for_mermaid_fence() {
let md = "```mermaid\ngraph TD; A-->B;\n```";
let mut ctx = MarkdownContext::default();
let html = render_body(md, &mut ctx);
assert!(html.contains("<pre class=\"mermaid\">"));
assert!(html.contains("graph TD; A-->B;"));
assert!(!html.contains("<code"));
assert!(ctx.has_mermaid);
}
#[test]
fn render_body_highlights_rust_with_class_spans() {
let md = "```rust\nfn main() {}\n```";
let mut ctx = MarkdownContext::default();
let html = render_body(md, &mut ctx);
assert!(
html.contains("<code class=\"language-rust\""),
"got: {html}"
);
assert!(html.contains("class=\"hl-"), "no hl-* spans in: {html}");
assert!(!html.contains("class=\"mermaid\""));
assert!(!ctx.has_mermaid);
}
#[test]
fn render_body_falls_back_to_plain_code_for_unknown_language() {
let md = "```nonsenselang\nsome content\n```";
let mut ctx = MarkdownContext::default();
let html = render_body(md, &mut ctx);
assert!(
html.contains("<code class=\"language-nonsenselang\""),
"got: {html}"
);
assert!(
!html.contains("class=\"hl-"),
"should not highlight: {html}"
);
assert!(html.contains("some content"), "got: {html}");
}
#[test]
fn render_body_escapes_html_in_plain_code_fallback() {
let md = "```unknown\n<script>alert(1)</script>\n```";
let mut ctx = MarkdownContext::default();
let html = render_body(md, &mut ctx);
assert!(html.contains("<script>"), "got: {html}");
assert!(!html.contains("<script>"));
}
#[test]
fn render_body_renders_code_without_language_tag_as_plain_pre_code() {
let md = "```\nplain text\n```";
let mut ctx = MarkdownContext::default();
let html = render_body(md, &mut ctx);
assert!(html.contains("<code>plain text"), "got: {html}");
assert!(!html.contains("class=\"hl-"));
}
#[test]
fn render_body_handles_heading_paragraph_and_mermaid_together() {
let md = "# Title\n\nIntro paragraph.\n\n```mermaid\nA-->B\n```\n\nAfter.";
let mut ctx = MarkdownContext::default();
let html = render_body(md, &mut ctx);
assert!(html.contains("<h1>Title</h1>"));
assert!(html.contains("<p>Intro paragraph.</p>"));
assert!(html.contains("<pre class=\"mermaid\">"));
assert!(html.contains("<p>After.</p>"));
assert!(ctx.has_mermaid);
}
#[test]
fn render_body_does_not_set_has_mermaid_for_inline_code_with_mermaid_text() {
let md = "Use `mermaid` syntax.";
let mut ctx = MarkdownContext::default();
let _ = render_body(md, &mut ctx);
assert!(!ctx.has_mermaid);
}
#[test]
fn render_body_emits_aside_for_decision_alert() {
let md = "> [!DECISION]\n> chosen path";
let mut ctx = MarkdownContext::default();
let html = render_body(md, &mut ctx);
assert!(
html.contains("<aside class=\"alert alert-decision\">"),
"got: {html}"
);
assert!(html.contains("chosen path"));
assert!(!html.contains("[!DECISION]"));
assert!(!html.contains("<blockquote>"));
}
#[test]
fn render_body_renders_inner_markdown_inside_aside() {
let md = "> [!DECISION]\n> use **bold** and `code`";
let html = render_body(md, &mut MarkdownContext::default());
assert!(html.contains("<strong>bold</strong>"));
assert!(html.contains("<code>code</code>"));
}
#[test]
fn render_body_lowercases_marker_for_class() {
let md = "> [!CONTEXT]\n> background";
let html = render_body(md, &mut MarkdownContext::default());
assert!(
html.contains("class=\"alert alert-context\""),
"got: {html}"
);
}
#[test]
fn render_body_keeps_plain_blockquote_unchanged() {
let md = "> just a quote\n> with two lines";
let html = render_body(md, &mut MarkdownContext::default());
assert!(html.contains("<blockquote>"), "got: {html}");
assert!(!html.contains("<aside"));
}
#[test]
fn render_body_handles_alert_alongside_other_content() {
let md = "Intro paragraph.\n\n> [!DECISION]\n> the call\n\nAfter.";
let html = render_body(md, &mut MarkdownContext::default());
assert!(html.contains("<p>Intro paragraph.</p>"));
assert!(html.contains("<aside class=\"alert alert-decision\">"));
assert!(html.contains("the call"));
assert!(html.contains("<p>After.</p>"));
}
#[test]
fn first_paragraph_of_empty_input_is_none() {
assert_eq!(extract_first_paragraph(""), None);
}
#[test]
fn first_paragraph_returns_only_paragraph_text() {
assert_eq!(
extract_first_paragraph("Hello world."),
Some("Hello world.".to_string())
);
}
#[test]
fn first_paragraph_skips_leading_heading() {
let md = "# Title\n\nThe summary line.\n\nMore.";
assert_eq!(
extract_first_paragraph(md),
Some("The summary line.".to_string())
);
}
#[test]
fn first_paragraph_returns_none_when_only_heading() {
assert_eq!(extract_first_paragraph("# Only a heading"), None);
}
#[test]
fn first_paragraph_returns_none_when_only_code_block() {
let md = "```rust\nlet x = 1;\n```";
assert_eq!(extract_first_paragraph(md), None);
}
#[test]
fn first_paragraph_flattens_inline_formatting() {
let md = "This has **bold** and `code` and [a link](x.md).";
assert_eq!(
extract_first_paragraph(md),
Some("This has bold and code and a link.".to_string())
);
}
#[test]
fn first_paragraph_collapses_soft_break_to_space() {
let md = "First line\nstill same paragraph.";
assert_eq!(
extract_first_paragraph(md),
Some("First line still same paragraph.".to_string())
);
}
}