1use std::path::Path;
4
5use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
6use thiserror::Error;
7use typstify_core::{
8 content::{ParsedContent, TocEntry},
9 frontmatter::parse_frontmatter,
10};
11
12use crate::syntax::SyntaxHighlighter;
13
14#[derive(Debug, Error)]
16pub enum MarkdownError {
17 #[error("frontmatter error: {0}")]
19 Frontmatter(#[from] typstify_core::error::CoreError),
20}
21
22pub type Result<T> = std::result::Result<T, MarkdownError>;
24
25#[derive(Debug)]
27pub struct MarkdownParser {
28 highlighter: SyntaxHighlighter,
29 options: Options,
30}
31
32impl Default for MarkdownParser {
33 fn default() -> Self {
34 Self::new()
35 }
36}
37
38impl MarkdownParser {
39 pub fn new() -> Self {
41 let mut options = Options::empty();
42 options.insert(Options::ENABLE_TABLES);
43 options.insert(Options::ENABLE_FOOTNOTES);
44 options.insert(Options::ENABLE_STRIKETHROUGH);
45 options.insert(Options::ENABLE_TASKLISTS);
46 options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
47
48 Self {
49 highlighter: SyntaxHighlighter::default(),
50 options,
51 }
52 }
53
54 pub fn with_theme(theme: &str) -> Self {
56 let mut parser = Self::new();
57 parser.highlighter.set_theme(theme);
58 parser
59 }
60
61 pub fn parse(&self, content: &str, path: &Path) -> Result<ParsedContent> {
63 let (frontmatter, body) = parse_frontmatter(content, path)?;
65
66 let (html, toc) = self.render_markdown(&body);
68
69 Ok(ParsedContent {
70 frontmatter,
71 html,
72 raw: body,
73 toc,
74 })
75 }
76
77 pub fn parse_body(&self, body: &str) -> (String, Vec<TocEntry>) {
79 self.render_markdown(body)
80 }
81
82 fn render_markdown(&self, content: &str) -> (String, Vec<TocEntry>) {
84 let parser = Parser::new_ext(content, self.options);
85 let mut toc = Vec::new();
86 let mut html = String::new();
87 let mut current_heading: Option<(u8, String)> = None;
88 let mut code_block_lang: Option<String> = None;
89 let mut code_block_content = String::new();
90
91 for event in parser {
92 match event {
93 Event::Start(Tag::Heading { level, id, .. }) => {
95 let lvl = level as u8;
96 current_heading = Some((lvl, String::new()));
97 let id_attr = id.map(|i| format!(" id=\"{i}\"")).unwrap_or_default();
98 html.push_str(&format!("<h{lvl}{id_attr}>"));
99 }
100
101 Event::End(TagEnd::Heading(level)) => {
103 let lvl = level as u8;
104 if let Some((_, ref text)) = current_heading {
105 let id = slugify(text);
106 toc.push(TocEntry {
107 level: lvl,
108 text: text.clone(),
109 id: id.clone(),
110 });
111 }
112 html.push_str(&format!("</h{lvl}>"));
113 current_heading = None;
114 }
115
116 Event::Start(Tag::CodeBlock(kind)) => {
118 code_block_lang = match kind {
119 CodeBlockKind::Fenced(lang) => {
120 let lang = lang.to_string();
121 if lang.is_empty() { None } else { Some(lang) }
122 }
123 CodeBlockKind::Indented => None,
124 };
125 code_block_content.clear();
126 }
127
128 Event::End(TagEnd::CodeBlock) => {
130 let highlighted = self
131 .highlighter
132 .highlight(&code_block_content, code_block_lang.as_deref());
133 html.push_str(&highlighted);
134 code_block_lang = None;
135 code_block_content.clear();
136 }
137
138 Event::Text(text)
140 if code_block_lang.is_some() || !code_block_content.is_empty() =>
141 {
142 code_block_content.push_str(&text);
143 }
144
145 Event::Text(text) => {
147 if let Some((_, ref mut heading_text)) = current_heading {
148 heading_text.push_str(&text);
149 }
150 html.push_str(&html_escape(&text));
151 }
152
153 Event::Code(code) => {
155 if let Some((_, ref mut heading_text)) = current_heading {
156 heading_text.push_str(&code);
157 }
158 html.push_str(&format!("<code>{}</code>", html_escape(&code)));
159 }
160
161 Event::SoftBreak => {
163 html.push('\n');
164 }
165
166 Event::HardBreak => {
168 html.push_str("<br />\n");
169 }
170
171 Event::Start(tag) => {
173 html.push_str(&tag_to_html_start(&tag));
174 }
175
176 Event::End(tag) => {
178 html.push_str(&tag_to_html_end(&tag));
179 }
180
181 Event::Html(raw) | Event::InlineHtml(raw) => {
183 html.push_str(&raw);
184 }
185
186 Event::FootnoteReference(name) => {
188 html.push_str(&format!(
189 "<sup class=\"footnote-ref\"><a href=\"#fn-{name}\">[{name}]</a></sup>"
190 ));
191 }
192
193 Event::Rule => {
195 html.push_str("<hr />\n");
196 }
197
198 Event::TaskListMarker(checked) => {
200 let checkbox = if checked {
201 "<input type=\"checkbox\" checked disabled />"
202 } else {
203 "<input type=\"checkbox\" disabled />"
204 };
205 html.push_str(checkbox);
206 }
207
208 Event::InlineMath(math) => {
209 html.push_str(&format!("<span class=\"math inline\">\\({math}\\)</span>"));
210 }
211
212 Event::DisplayMath(math) => {
213 html.push_str(&format!("<div class=\"math display\">\\[{math}\\]</div>"));
214 }
215 }
216 }
217
218 (html, toc)
219 }
220}
221
222fn tag_to_html_start(tag: &Tag) -> String {
224 match tag {
225 Tag::Paragraph => "<p>".to_string(),
226 Tag::Heading { level, id, .. } => {
227 let id_attr = id
228 .as_ref()
229 .map(|i| format!(" id=\"{i}\""))
230 .unwrap_or_default();
231 format!("<h{}{id_attr}>", *level as u8)
232 }
233 Tag::BlockQuote(_) => "<blockquote>".to_string(),
234 Tag::CodeBlock(_) => String::new(), Tag::List(Some(start)) => format!("<ol start=\"{start}\">"),
236 Tag::List(None) => "<ul>".to_string(),
237 Tag::Item => "<li>".to_string(),
238 Tag::FootnoteDefinition(name) => {
239 format!("<div class=\"footnote\" id=\"fn-{name}\">")
240 }
241 Tag::Table(alignments) => {
242 let _ = alignments; "<table>".to_string()
244 }
245 Tag::TableHead => "<thead><tr>".to_string(),
246 Tag::TableRow => "<tr>".to_string(),
247 Tag::TableCell => "<td>".to_string(),
248 Tag::Emphasis => "<em>".to_string(),
249 Tag::Strong => "<strong>".to_string(),
250 Tag::Strikethrough => "<del>".to_string(),
251 Tag::Link {
252 dest_url, title, ..
253 } => {
254 let title_attr = if title.is_empty() {
255 String::new()
256 } else {
257 format!(" title=\"{}\"", html_escape(title))
258 };
259 format!("<a href=\"{}\"{}> ", html_escape(dest_url), title_attr)
260 }
261 Tag::Image {
262 dest_url, title, ..
263 } => {
264 let title_attr = if title.is_empty() {
265 String::new()
266 } else {
267 format!(" title=\"{}\"", html_escape(title))
268 };
269 format!(
271 "<img src=\"{}\" loading=\"lazy\" decoding=\"async\"{}",
272 html_escape(dest_url),
273 title_attr
274 )
275 }
276 Tag::HtmlBlock => String::new(),
277 Tag::MetadataBlock(_) => String::new(),
278 Tag::DefinitionList => "<dl>".to_string(),
279 Tag::DefinitionListTitle => "<dt>".to_string(),
280 Tag::DefinitionListDefinition => "<dd>".to_string(),
281 Tag::Superscript => "<sup>".to_string(),
282 Tag::Subscript => "<sub>".to_string(),
283 }
284}
285
286fn tag_to_html_end(tag: &TagEnd) -> String {
288 match tag {
289 TagEnd::Paragraph => "</p>\n".to_string(),
290 TagEnd::Heading(level) => format!("</h{}>\n", *level as u8),
291 TagEnd::BlockQuote(_) => "</blockquote>\n".to_string(),
292 TagEnd::CodeBlock => String::new(), TagEnd::List(ordered) => {
294 if *ordered {
295 "</ol>\n".to_string()
296 } else {
297 "</ul>\n".to_string()
298 }
299 }
300 TagEnd::Item => "</li>\n".to_string(),
301 TagEnd::FootnoteDefinition => "</div>\n".to_string(),
302 TagEnd::Table => "</table>\n".to_string(),
303 TagEnd::TableHead => "</tr></thead>\n".to_string(),
304 TagEnd::TableRow => "</tr>\n".to_string(),
305 TagEnd::TableCell => "</td>".to_string(),
306 TagEnd::Emphasis => "</em>".to_string(),
307 TagEnd::Strong => "</strong>".to_string(),
308 TagEnd::Strikethrough => "</del>".to_string(),
309 TagEnd::Link => "</a>".to_string(),
310 TagEnd::Image => " />".to_string(),
311 TagEnd::HtmlBlock => String::new(),
312 TagEnd::MetadataBlock(_) => String::new(),
313 TagEnd::DefinitionList => "</dl>\n".to_string(),
314 TagEnd::DefinitionListTitle => "</dt>\n".to_string(),
315 TagEnd::DefinitionListDefinition => "</dd>\n".to_string(),
316 TagEnd::Superscript => "</sup>".to_string(),
317 TagEnd::Subscript => "</sub>".to_string(),
318 }
319}
320
321fn html_escape(s: &str) -> String {
323 s.replace('&', "&")
324 .replace('<', "<")
325 .replace('>', ">")
326 .replace('"', """)
327}
328
329fn slugify(text: &str) -> String {
331 text.to_lowercase()
332 .chars()
333 .map(|c| {
334 if c.is_alphanumeric() {
335 c
336 } else if c.is_whitespace() || c == '-' || c == '_' {
337 '-'
338 } else {
339 '\0'
340 }
341 })
342 .filter(|c| *c != '\0')
343 .collect::<String>()
344 .split('-')
345 .filter(|s| !s.is_empty())
346 .collect::<Vec<_>>()
347 .join("-")
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353
354 #[test]
355 fn test_parse_simple_markdown() {
356 let parser = MarkdownParser::new();
357 let content = r#"---
358title: "Test Post"
359---
360
361# Hello World
362
363This is a test."#;
364
365 let result = parser.parse(content, Path::new("test.md")).unwrap();
366
367 assert_eq!(result.frontmatter.title, "Test Post");
368 assert!(result.html.contains("<h1"));
369 assert!(result.html.contains("Hello World"));
370 assert!(result.html.contains("<p>"));
371 }
372
373 #[test]
374 fn test_parse_code_block() {
375 let parser = MarkdownParser::new();
376 let (html, _) = parser.parse_body(
377 r#"```rust
378fn main() {
379 println!("Hello");
380}
381```"#,
382 );
383
384 assert!(html.contains("fn"));
385 assert!(html.contains("main"));
386 }
387
388 #[test]
389 fn test_toc_extraction() {
390 let parser = MarkdownParser::new();
391 let (_, toc) = parser.parse_body(
392 r#"# Heading 1
393## Heading 2
394### Heading 3"#,
395 );
396
397 assert_eq!(toc.len(), 3);
398 assert_eq!(toc[0].level, 1);
399 assert_eq!(toc[0].text, "Heading 1");
400 assert_eq!(toc[1].level, 2);
401 assert_eq!(toc[2].level, 3);
402 }
403
404 #[test]
405 fn test_slugify() {
406 assert_eq!(slugify("Hello World"), "hello-world");
407 assert_eq!(slugify("Test 123 Post"), "test-123-post");
408 assert_eq!(slugify("Multiple Spaces"), "multiple-spaces");
409 assert_eq!(slugify("Special!@#Chars"), "specialchars");
410 }
411
412 #[test]
413 fn test_table_rendering() {
414 let parser = MarkdownParser::new();
415 let (html, _) = parser.parse_body(
416 r#"| Header 1 | Header 2 |
417|----------|----------|
418| Cell 1 | Cell 2 |"#,
419 );
420
421 assert!(html.contains("<table>"));
422 assert!(html.contains("<thead>"));
423 assert!(html.contains("<tr>"));
424 assert!(html.contains("<td>"));
425 }
426
427 #[test]
428 fn test_task_list() {
429 let parser = MarkdownParser::new();
430 let (html, _) = parser.parse_body(
431 r#"- [x] Done
432- [ ] Not done"#,
433 );
434
435 assert!(html.contains("checkbox"));
436 assert!(html.contains("checked"));
437 }
438
439 #[test]
440 fn test_no_frontmatter() {
441 let parser = MarkdownParser::new();
442 let content = "# Just Content\n\nNo frontmatter here.";
443 let result = parser.parse(content, Path::new("test.md")).unwrap();
444
445 assert!(result.frontmatter.title.is_empty());
446 assert!(result.html.contains("Just Content"));
447 }
448}