1use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
11
12#[derive(Debug, Clone, PartialEq)]
14pub enum Node {
15 Heading { level: u8, children: Vec<Node> },
17 Paragraph { children: Vec<Node> },
19 FencedCode { lang: String, content: String },
21 InlineCode { content: String },
23 BlockQuote { children: Vec<Node> },
25 List { ordered: bool, items: Vec<Node> },
27 ListItem { children: Vec<Node> },
29 Emphasis { children: Vec<Node> },
31 Strong { children: Vec<Node> },
33 Link {
35 href: String,
36 title: String,
37 children: Vec<Node>,
38 },
39 Image { src: String, alt: String },
41 HorizontalRule,
43 HardBreak,
45 SoftBreak,
47 Text(String),
49 Html(String),
51}
52
53#[derive(Debug)]
58enum Frame {
59 Root,
60 Heading { level: u8 },
61 Paragraph,
62 BlockQuote,
63 List { ordered: bool },
64 ListItem,
65 Emphasis,
66 Strong,
67 Link { href: String, title: String },
68 Image { src: String, alt: String },
69 FencedCode { lang: String },
70}
71
72#[derive(Debug, Clone, PartialEq)]
74pub struct Ast {
75 pub nodes: Vec<Node>,
77}
78
79impl Ast {
80 pub fn parse(source: &str) -> Self {
82 let opts =
83 Options::ENABLE_STRIKETHROUGH | Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES;
84 let parser = Parser::new_ext(source, opts);
85
86 let mut stack: Vec<(Frame, Vec<Node>)> = vec![(Frame::Root, Vec::new())];
88
89 let mut code_buf = String::new();
91
92 for event in parser {
93 match event {
94 Event::Start(tag) => match tag {
96 Tag::Heading { level, .. } => {
97 stack.push((
98 Frame::Heading {
99 level: heading_level(level),
100 },
101 Vec::new(),
102 ));
103 }
104 Tag::Paragraph => {
105 stack.push((Frame::Paragraph, Vec::new()));
106 }
107 Tag::BlockQuote(_) => {
108 stack.push((Frame::BlockQuote, Vec::new()));
109 }
110 Tag::List(start) => {
111 stack.push((
112 Frame::List {
113 ordered: start.is_some(),
114 },
115 Vec::new(),
116 ));
117 }
118 Tag::Item => {
119 stack.push((Frame::ListItem, Vec::new()));
120 }
121 Tag::Emphasis => {
122 stack.push((Frame::Emphasis, Vec::new()));
123 }
124 Tag::Strong => {
125 stack.push((Frame::Strong, Vec::new()));
126 }
127 Tag::Link {
128 dest_url, title, ..
129 } => {
130 stack.push((
131 Frame::Link {
132 href: dest_url.to_string(),
133 title: title.to_string(),
134 },
135 Vec::new(),
136 ));
137 }
138 Tag::Image {
139 dest_url, title, ..
140 } => {
141 stack.push((
144 Frame::Image {
145 src: dest_url.to_string(),
146 alt: title.to_string(),
147 },
148 Vec::new(),
149 ));
150 }
151 Tag::CodeBlock(kind) => {
152 let lang = match kind {
153 pulldown_cmark::CodeBlockKind::Fenced(s) => s.to_string(),
154 pulldown_cmark::CodeBlockKind::Indented => String::new(),
155 };
156 code_buf.clear();
157 stack.push((Frame::FencedCode { lang }, Vec::new()));
158 }
159 _ => {}
161 },
162
163 Event::End(tag_end) => {
165 let node = match tag_end {
166 TagEnd::Heading(_) => {
167 if let Some((Frame::Heading { level }, children)) = stack.pop() {
168 Some(Node::Heading { level, children })
169 } else {
170 None
171 }
172 }
173 TagEnd::Paragraph => {
174 if let Some((Frame::Paragraph, children)) = stack.pop() {
175 Some(Node::Paragraph { children })
176 } else {
177 None
178 }
179 }
180 TagEnd::BlockQuote(_) => {
181 if let Some((Frame::BlockQuote, children)) = stack.pop() {
182 Some(Node::BlockQuote { children })
183 } else {
184 None
185 }
186 }
187 TagEnd::List(_) => {
188 if let Some((Frame::List { ordered }, items)) = stack.pop() {
189 Some(Node::List { ordered, items })
190 } else {
191 None
192 }
193 }
194 TagEnd::Item => {
195 if let Some((Frame::ListItem, children)) = stack.pop() {
196 Some(Node::ListItem { children })
197 } else {
198 None
199 }
200 }
201 TagEnd::Emphasis => {
202 if let Some((Frame::Emphasis, children)) = stack.pop() {
203 Some(Node::Emphasis { children })
204 } else {
205 None
206 }
207 }
208 TagEnd::Strong => {
209 if let Some((Frame::Strong, children)) = stack.pop() {
210 Some(Node::Strong { children })
211 } else {
212 None
213 }
214 }
215 TagEnd::Link => {
216 if let Some((Frame::Link { href, title }, children)) = stack.pop() {
217 Some(Node::Link {
218 href,
219 title,
220 children,
221 })
222 } else {
223 None
224 }
225 }
226 TagEnd::Image => {
227 if let Some((Frame::Image { src, alt }, _children)) = stack.pop() {
228 Some(Node::Image { src, alt })
231 } else {
232 None
233 }
234 }
235 TagEnd::CodeBlock => {
236 if let Some((Frame::FencedCode { lang }, _)) = stack.pop() {
237 let content = std::mem::take(&mut code_buf);
238 let content = content.trim_end_matches('\n').to_string();
240 Some(Node::FencedCode { lang, content })
241 } else {
242 None
243 }
244 }
245 _ => None,
247 };
248
249 if let Some(n) = node {
250 if let Some((_, ref mut parent_children)) = stack.last_mut() {
251 parent_children.push(n);
252 }
253 }
254 }
255
256 Event::Text(s) => {
258 if matches!(stack.last(), Some((Frame::FencedCode { .. }, _))) {
260 code_buf.push_str(&s);
261 } else if let Some((_, ref mut children)) = stack.last_mut() {
262 children.push(Node::Text(s.to_string()));
263 }
264 }
265 Event::Code(s) => {
266 if let Some((_, ref mut children)) = stack.last_mut() {
267 children.push(Node::InlineCode {
268 content: s.to_string(),
269 });
270 }
271 }
272 Event::Html(s) | Event::InlineHtml(s) => {
273 if let Some((_, ref mut children)) = stack.last_mut() {
274 children.push(Node::Html(s.to_string()));
275 }
276 }
277 Event::SoftBreak => {
278 if let Some((_, ref mut children)) = stack.last_mut() {
279 children.push(Node::SoftBreak);
280 }
281 }
282 Event::HardBreak => {
283 if let Some((_, ref mut children)) = stack.last_mut() {
284 children.push(Node::HardBreak);
285 }
286 }
287 Event::Rule => {
288 if let Some((_, ref mut children)) = stack.last_mut() {
289 children.push(Node::HorizontalRule);
290 }
291 }
292 _ => {}
294 }
295 }
296
297 let nodes = match stack.into_iter().next() {
299 Some((Frame::Root, children)) => children,
300 _ => Vec::new(),
301 };
302
303 Self { nodes }
304 }
305
306 pub fn title(&self) -> Option<String> {
308 for node in &self.nodes {
309 if let Node::Heading { level: 1, children } = node {
310 let text = collect_text(children);
311 if !text.is_empty() {
312 return Some(text);
313 }
314 }
315 }
316 None
317 }
318}
319
320fn heading_level(level: HeadingLevel) -> u8 {
325 match level {
326 HeadingLevel::H1 => 1,
327 HeadingLevel::H2 => 2,
328 HeadingLevel::H3 => 3,
329 HeadingLevel::H4 => 4,
330 HeadingLevel::H5 => 5,
331 HeadingLevel::H6 => 6,
332 }
333}
334
335fn collect_text(nodes: &[Node]) -> String {
337 let mut out = String::new();
338 for node in nodes {
339 match node {
340 Node::Text(s) => out.push_str(s),
341 Node::InlineCode { content } => out.push_str(content),
342 Node::Emphasis { children }
343 | Node::Strong { children }
344 | Node::Link { children, .. } => {
345 out.push_str(&collect_text(children));
346 }
347 _ => {}
348 }
349 }
350 out
351}
352
353#[cfg(test)]
358mod tests {
359 use super::*;
360
361 #[test]
362 fn test_heading_parsing() {
363 let ast = Ast::parse("# Hello\n\n## World\n");
364 assert_eq!(ast.nodes.len(), 2);
365 assert!(matches!(&ast.nodes[0], Node::Heading { level: 1, .. }));
366 assert!(matches!(&ast.nodes[1], Node::Heading { level: 2, .. }));
367 }
368
369 #[test]
370 fn test_heading_text_children() {
371 let ast = Ast::parse("# My Title\n");
372 if let Node::Heading { level, children } = &ast.nodes[0] {
373 assert_eq!(*level, 1);
374 assert!(matches!(&children[0], Node::Text(s) if s == "My Title"));
375 } else {
376 panic!("expected Heading");
377 }
378 }
379
380 #[test]
381 fn test_title_from_h1() {
382 let ast = Ast::parse("# The Title\n\nSome paragraph.\n");
383 assert_eq!(ast.title(), Some("The Title".to_string()));
384 }
385
386 #[test]
387 fn test_title_none_when_no_h1() {
388 let ast = Ast::parse("## Subheading only\n");
389 assert_eq!(ast.title(), None);
390 }
391
392 #[test]
393 fn test_fenced_code() {
394 let src = "```rust\nfn main() {}\n```\n";
395 let ast = Ast::parse(src);
396 assert_eq!(ast.nodes.len(), 1);
397 if let Node::FencedCode { lang, content } = &ast.nodes[0] {
398 assert_eq!(lang, "rust");
399 assert_eq!(content, "fn main() {}");
400 } else {
401 panic!("expected FencedCode, got {:?}", ast.nodes[0]);
402 }
403 }
404
405 #[test]
406 fn test_fenced_code_no_lang() {
407 let src = "```\nhello\n```\n";
408 let ast = Ast::parse(src);
409 if let Node::FencedCode { lang, content } = &ast.nodes[0] {
410 assert_eq!(lang, "");
411 assert_eq!(content, "hello");
412 } else {
413 panic!("expected FencedCode");
414 }
415 }
416
417 #[test]
418 fn test_paragraph_and_inline_code() {
419 let src = "Use `cargo test` now.\n";
420 let ast = Ast::parse(src);
421 assert_eq!(ast.nodes.len(), 1);
422 if let Node::Paragraph { children } = &ast.nodes[0] {
423 let has_inline = children
424 .iter()
425 .any(|n| matches!(n, Node::InlineCode { content } if content == "cargo test"));
426 assert!(has_inline);
427 } else {
428 panic!("expected Paragraph");
429 }
430 }
431
432 #[test]
433 fn test_nested_list() {
434 let src = "- alpha\n- beta\n";
435 let ast = Ast::parse(src);
436 assert_eq!(ast.nodes.len(), 1);
437 if let Node::List { ordered, items } = &ast.nodes[0] {
438 assert!(!ordered);
439 assert_eq!(items.len(), 2);
440 for item in items {
441 assert!(matches!(item, Node::ListItem { .. }));
442 }
443 } else {
444 panic!("expected List");
445 }
446 }
447
448 #[test]
449 fn test_ordered_list() {
450 let src = "1. one\n2. two\n";
451 let ast = Ast::parse(src);
452 if let Node::List { ordered, .. } = &ast.nodes[0] {
453 assert!(ordered);
454 } else {
455 panic!("expected List");
456 }
457 }
458
459 #[test]
460 fn test_blockquote() {
461 let src = "> a quote\n";
462 let ast = Ast::parse(src);
463 assert!(matches!(&ast.nodes[0], Node::BlockQuote { .. }));
464 }
465
466 #[test]
467 fn test_horizontal_rule() {
468 let src = "---\n";
469 let ast = Ast::parse(src);
470 assert!(matches!(&ast.nodes[0], Node::HorizontalRule));
471 }
472}