1use std::collections::HashMap;
2
3use anyhow::{Context, Result};
4use once_cell::sync::Lazy;
5use pulldown_cmark::{
6 html, CodeBlockKind, CowStr, Event, HeadingLevel, Options, Parser, Tag, TagEnd,
7};
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use syntect::easy::HighlightLines;
11use syntect::highlighting::{Theme, ThemeSet};
12use syntect::html::{styled_line_to_highlighted_html, IncludeBackground};
13use syntect::parsing::SyntaxSet;
14use syntect::util::LinesWithEndings;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct MarkdownOptions {
18 pub mermaid: bool,
19 pub code_highlight: bool,
20 pub heading_anchors: bool,
21 pub index_code: bool,
22}
23
24impl Default for MarkdownOptions {
25 fn default() -> Self {
26 Self {
27 mermaid: true,
28 code_highlight: true,
29 heading_anchors: true,
30 index_code: false,
31 }
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
36#[serde(default)]
37pub struct Frontmatter {
38 pub title: Option<String>,
39 pub layout: String,
40 pub sidebar: bool,
41 pub search: bool,
42 pub access: String,
43}
44
45impl Default for Frontmatter {
46 fn default() -> Self {
47 Self {
48 title: None,
49 layout: "doc".to_string(),
50 sidebar: true,
51 search: true,
52 access: "public".to_string(),
53 }
54 }
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
58pub struct Heading {
59 pub level: u8,
60 pub text: String,
61 pub anchor: String,
62}
63
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct Document {
66 pub frontmatter: Frontmatter,
67 pub title: String,
68 pub html: String,
69 pub headings: Vec<Heading>,
70 pub search_text: String,
71}
72
73pub fn parse_markdown(input: &str, options: MarkdownOptions) -> Result<Document> {
74 let (frontmatter, markdown) = split_frontmatter(input)?;
75 let mut frontmatter = frontmatter;
76 normalize_frontmatter(&mut frontmatter);
77
78 let mut parser_options = Options::empty();
79 parser_options.insert(Options::ENABLE_TABLES);
80 parser_options.insert(Options::ENABLE_FOOTNOTES);
81 parser_options.insert(Options::ENABLE_STRIKETHROUGH);
82 parser_options.insert(Options::ENABLE_TASKLISTS);
83 parser_options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
84
85 let events: Vec<Event<'_>> = Parser::new_ext(markdown, parser_options).collect();
86 let headings = collect_headings(&events);
87 let html = render_html(events, &headings, &options);
88 let search_text = collect_search_text(markdown, options.index_code);
89 let title = frontmatter
90 .title
91 .clone()
92 .or_else(|| headings.first().map(|heading| heading.text.clone()))
93 .unwrap_or_else(|| "Untitled".to_string());
94
95 Ok(Document {
96 frontmatter,
97 title,
98 html,
99 headings,
100 search_text,
101 })
102}
103
104fn split_frontmatter(input: &str) -> Result<(Frontmatter, &str)> {
105 let trimmed = input.strip_prefix('\u{feff}').unwrap_or(input);
106 if !trimmed.starts_with("---\n") && !trimmed.starts_with("---\r\n") {
107 return Ok((Frontmatter::default(), trimmed));
108 }
109
110 let body_start = if trimmed.starts_with("---\r\n") { 5 } else { 4 };
111 let rest = &trimmed[body_start..];
112 for marker in ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"] {
113 if let Some(index) = rest.find(marker) {
114 let yaml = &rest[..index];
115 let after_marker = &rest[index + marker.len()..];
116 let frontmatter = serde_yaml::from_str(yaml).context("failed to parse frontmatter")?;
117 return Ok((frontmatter, after_marker));
118 }
119 }
120
121 anyhow::bail!("frontmatter starts with --- but has no closing marker")
122}
123
124fn normalize_frontmatter(frontmatter: &mut Frontmatter) {
125 if frontmatter.layout.is_empty() {
126 frontmatter.layout = "doc".to_string();
127 }
128 if frontmatter.access != "public" && frontmatter.access != "masked" {
129 frontmatter.access = "public".to_string();
130 }
131}
132
133fn collect_headings(events: &[Event<'_>]) -> Vec<Heading> {
134 let mut headings = Vec::new();
135 let mut current: Option<(u8, String)> = None;
136 let mut used = HashMap::<String, usize>::new();
137
138 for event in events {
139 match event {
140 Event::Start(Tag::Heading { level, .. }) => {
141 current = Some((heading_level(*level), String::new()));
142 }
143 Event::Text(text) | Event::Code(text) => {
144 if let Some((_, current_text)) = &mut current {
145 current_text.push_str(text);
146 }
147 }
148 Event::End(TagEnd::Heading(_)) => {
149 if let Some((level, text)) = current.take() {
150 let anchor = unique_slug(&slugify(&text), &mut used);
151 headings.push(Heading {
152 level,
153 text: text.trim().to_string(),
154 anchor,
155 });
156 }
157 }
158 _ => {}
159 }
160 }
161
162 headings
163}
164
165fn render_html(events: Vec<Event<'_>>, headings: &[Heading], options: &MarkdownOptions) -> String {
166 let mut out_events = Vec::with_capacity(events.len() + headings.len());
167 let mut heading_index = 0usize;
168 let mut in_code_block = false;
169 let mut code_lang: Option<String> = None;
170 let mut code_text = String::new();
171
172 for event in events {
173 match event {
174 Event::Start(Tag::Heading { level, .. }) if options.heading_anchors => {
175 let anchor = headings
176 .get(heading_index)
177 .map(|heading| heading.anchor.as_str())
178 .unwrap_or_default();
179 heading_index += 1;
180 out_events.push(Event::Html(CowStr::from(format!(
181 "<{} id=\"{}\"><a class=\"heading-anchor\" href=\"#{}\" aria-label=\"Link to section\">#</a>",
182 heading_tag(level),
183 escape_attr(anchor),
184 escape_attr(anchor)
185 ))));
186 }
187 Event::End(TagEnd::Heading(level)) if options.heading_anchors => {
188 out_events.push(Event::Html(CowStr::from(format!(
189 "</{}>",
190 heading_tag(level)
191 ))));
192 }
193 Event::Start(Tag::CodeBlock(kind)) => {
194 let lang = match &kind {
195 CodeBlockKind::Fenced(value) => value
196 .split_whitespace()
197 .next()
198 .filter(|value| !value.is_empty())
199 .map(str::to_string),
200 CodeBlockKind::Indented => None,
201 };
202
203 if options.mermaid && lang.as_deref() == Some("mermaid") {
204 in_code_block = true;
205 code_lang = lang;
206 out_events.push(Event::Html(CowStr::from("<pre class=\"mermaid\">")));
207 } else {
208 in_code_block = true;
209 code_lang = lang;
210 code_text.clear();
211 }
212 }
213 Event::Text(text) if in_code_block && code_lang.as_deref() == Some("mermaid") => {
214 out_events.push(Event::Html(CowStr::from(escape_html(&text))));
215 }
216 Event::Text(text) | Event::Code(text) if in_code_block => {
217 code_text.push_str(&text);
218 }
219 Event::End(TagEnd::CodeBlock) if in_code_block => {
220 if code_lang.as_deref() == Some("mermaid") {
221 out_events.push(Event::Html(CowStr::from("</pre>")));
222 } else {
223 out_events.push(Event::Html(CowStr::from(render_code_block(
224 &code_text,
225 code_lang.as_deref(),
226 options.code_highlight,
227 ))));
228 code_text.clear();
229 }
230 in_code_block = false;
231 code_lang = None;
232 }
233 _ => out_events.push(event),
234 }
235 }
236
237 let mut rendered = String::new();
238 html::push_html(&mut rendered, out_events.into_iter());
239 rendered
240}
241
242fn render_code_block(code: &str, lang: Option<&str>, highlight: bool) -> String {
243 let normalized_lang = lang
244 .map(normalize_code_lang)
245 .filter(|lang| !lang.is_empty());
246 let content = if highlight {
247 highlight_code(code, normalized_lang)
248 } else {
249 escape_html(code)
250 };
251 let lang_class = normalized_lang
252 .map(|lang| format!(" language-{}", escape_attr(lang)))
253 .unwrap_or_default();
254 let header = normalized_lang
255 .map(|lang| {
256 format!(
257 r#"<div class="rp-code-header"><span>{}</span></div>"#,
258 escape_html(lang)
259 )
260 })
261 .unwrap_or_default();
262
263 format!(
264 r#"<div class="rp-code">{header}<button class="rp-code-copy" type="button" data-rp-copy-code aria-label="Copy code" title="Copy code"><svg class="rp-code-copy-icon" viewBox="0 0 24 24" aria-hidden="true"><rect x="9" y="9" width="11" height="11" rx="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg><svg class="rp-code-copy-check" viewBox="0 0 24 24" aria-hidden="true"><path d="M20 6 9 17l-5-5"></path></svg></button><pre><code class="rp-code-content{lang_class}">{content}</code></pre></div>"#
265 )
266}
267
268fn normalize_code_lang(lang: &str) -> &str {
269 lang.trim()
270 .trim_start_matches("language-")
271 .split([',', '{'])
272 .next()
273 .unwrap_or("")
274 .trim()
275}
276
277fn highlight_code(code: &str, lang: Option<&str>) -> String {
278 let syntax = lang
279 .and_then(|lang| SYNTAX_SET.find_syntax_by_token(lang))
280 .unwrap_or_else(|| SYNTAX_SET.find_syntax_plain_text());
281 let mut highlighter = HighlightLines::new(syntax, highlight_theme());
282 let mut html = String::new();
283
284 for line in LinesWithEndings::from(code) {
285 match highlighter
286 .highlight_line(line, &SYNTAX_SET)
287 .and_then(|regions| styled_line_to_highlighted_html(®ions, IncludeBackground::No))
288 {
289 Ok(line_html) => html.push_str(&line_html),
290 Err(_) => html.push_str(&escape_html(line)),
291 }
292 }
293
294 html
295}
296
297static SYNTAX_SET: Lazy<SyntaxSet> = Lazy::new(SyntaxSet::load_defaults_newlines);
298static THEME_SET: Lazy<ThemeSet> = Lazy::new(ThemeSet::load_defaults);
299
300fn highlight_theme() -> &'static Theme {
301 THEME_SET
302 .themes
303 .get("base16-ocean.dark")
304 .or_else(|| THEME_SET.themes.values().next())
305 .expect("syntect ships with default themes")
306}
307
308fn collect_search_text(markdown: &str, index_code: bool) -> String {
309 let mut parser_options = Options::empty();
310 parser_options.insert(Options::ENABLE_TABLES);
311 parser_options.insert(Options::ENABLE_STRIKETHROUGH);
312 parser_options.insert(Options::ENABLE_TASKLISTS);
313
314 let mut text = String::new();
315 let mut in_code_block = false;
316
317 for event in Parser::new_ext(markdown, parser_options) {
318 match event {
319 Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
320 Event::End(TagEnd::CodeBlock) => in_code_block = false,
321 Event::Text(value) | Event::Code(value) => {
322 if index_code || !in_code_block {
323 if !text.is_empty() {
324 text.push(' ');
325 }
326 text.push_str(&value);
327 }
328 }
329 _ => {}
330 }
331 }
332
333 normalize_space(&text)
334}
335
336fn heading_level(level: HeadingLevel) -> u8 {
337 match level {
338 HeadingLevel::H1 => 1,
339 HeadingLevel::H2 => 2,
340 HeadingLevel::H3 => 3,
341 HeadingLevel::H4 => 4,
342 HeadingLevel::H5 => 5,
343 HeadingLevel::H6 => 6,
344 }
345}
346
347fn heading_tag(level: HeadingLevel) -> &'static str {
348 match level {
349 HeadingLevel::H1 => "h1",
350 HeadingLevel::H2 => "h2",
351 HeadingLevel::H3 => "h3",
352 HeadingLevel::H4 => "h4",
353 HeadingLevel::H5 => "h5",
354 HeadingLevel::H6 => "h6",
355 }
356}
357
358fn slugify(text: &str) -> String {
359 static PUNCT: Lazy<Regex> =
360 Lazy::new(|| Regex::new(r"[^\p{Alphabetic}\p{Number}_\-\s]+").unwrap());
361 static SPACE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[\s_]+").unwrap());
362
363 let lower = text.trim().to_lowercase();
364 let without_punct = PUNCT.replace_all(&lower, "");
365 let slug = SPACE.replace_all(without_punct.trim(), "-");
366 if slug.is_empty() {
367 "section".to_string()
368 } else {
369 slug.to_string()
370 }
371}
372
373fn unique_slug(slug: &str, used: &mut HashMap<String, usize>) -> String {
374 let count = used.entry(slug.to_string()).or_insert(0);
375 *count += 1;
376 if *count == 1 {
377 slug.to_string()
378 } else {
379 format!("{slug}-{}", *count)
380 }
381}
382
383fn normalize_space(input: &str) -> String {
384 input.split_whitespace().collect::<Vec<_>>().join(" ")
385}
386
387fn escape_html(input: &str) -> String {
388 input
389 .replace('&', "&")
390 .replace('<', "<")
391 .replace('>', ">")
392}
393
394fn escape_attr(input: &str) -> String {
395 escape_html(input).replace('"', """)
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 #[test]
403 fn parses_frontmatter_and_title() {
404 let doc = parse_markdown(
405 "---\ntitle: Page Title\naccess: masked\n---\n# Ignored\nBody",
406 MarkdownOptions::default(),
407 )
408 .unwrap();
409
410 assert_eq!(doc.title, "Page Title");
411 assert_eq!(doc.frontmatter.access, "masked");
412 assert!(doc.html.contains("id=\"ignored\""));
413 }
414
415 #[test]
416 fn chinese_heading_anchor_is_preserved() {
417 let doc = parse_markdown("# 中文 标题\n\ntext", MarkdownOptions::default()).unwrap();
418
419 assert_eq!(doc.headings[0].anchor, "中文-标题");
420 assert!(doc.html.contains("id=\"中文-标题\""));
421 }
422
423 #[test]
424 fn mermaid_code_block_becomes_mermaid_pre() {
425 let doc = parse_markdown(
426 "```mermaid\nflowchart LR\nA-->B\n```",
427 MarkdownOptions::default(),
428 )
429 .unwrap();
430
431 assert!(doc.html.contains("<pre class=\"mermaid\">"));
432 assert!(doc.html.contains("A-->B"));
433 assert!(!doc.html.contains("data-rp-copy-code"));
434 }
435
436 #[test]
437 fn fenced_code_has_copy_button_and_is_highlighted_with_syntect() {
438 let doc = parse_markdown(
439 "```rust\nfn main() {\n println!(\"hi\");\n}\n```",
440 MarkdownOptions::default(),
441 )
442 .unwrap();
443
444 assert!(doc.html.contains("class=\"rp-code\""));
445 assert!(doc.html.contains("class=\"rp-code-copy\""));
446 assert!(doc.html.contains("data-rp-copy-code"));
447 assert!(doc.html.contains("aria-label=\"Copy code\""));
448 assert!(doc.html.contains("language-rust"));
449 assert!(doc.html.contains("<span style="));
450 assert!(doc.html.contains("println"));
451 }
452
453 #[test]
454 fn code_highlight_can_be_disabled_without_removing_copy_button() {
455 let doc = parse_markdown(
456 "```rust\nfn main() { println!(\"<hi>\"); }\n```",
457 MarkdownOptions {
458 code_highlight: false,
459 ..MarkdownOptions::default()
460 },
461 )
462 .unwrap();
463
464 assert!(doc.html.contains("class=\"rp-code\""));
465 assert!(doc.html.contains("data-rp-copy-code"));
466 assert!(doc.html.contains("class=\"rp-code-content language-rust\""));
467 assert!(doc.html.contains("println!(\"<hi>\")"));
468 assert!(!doc.html.contains("<span style="));
469 }
470
471 #[test]
472 fn code_is_excluded_from_search_by_default() {
473 let doc = parse_markdown(
474 "Body\n\n```rust\nlet hidden = true;\n```",
475 MarkdownOptions::default(),
476 )
477 .unwrap();
478
479 assert!(doc.search_text.contains("Body"));
480 assert!(!doc.search_text.contains("hidden"));
481 }
482}