1use crate::error::Error;
2use pulldown_cmark::{CodeBlockKind, Event, Options as CmarkOptions, Parser as CmarkParser};
3use serde_yaml;
4use std::collections::HashMap;
5
6#[derive(Debug, Clone)]
8pub struct ParseOptions {
9 pub gfm: bool,
11 pub smart_punctuation: bool,
13 pub frontmatter: bool,
15 pub custom_components: bool,
17}
18
19impl Default for ParseOptions {
20 fn default() -> Self {
21 Self {
22 gfm: true,
23 smart_punctuation: true,
24 frontmatter: true,
25 custom_components: true,
26 }
27 }
28}
29
30#[derive(Debug)]
32pub struct ParsedDocument {
33 pub ast: Vec<Node>,
35 pub frontmatter: Option<HashMap<String, serde_yaml::Value>>,
37}
38
39#[derive(Debug, Clone)]
41pub enum Node {
42 Heading {
44 level: u8,
45 content: String,
46 id: String,
47 },
48 Paragraph(Vec<InlineNode>),
50 BlockQuote(Vec<Node>),
52 CodeBlock {
54 language: Option<String>,
55 content: String,
56 attributes: HashMap<String, String>,
57 },
58 List {
60 ordered: bool,
61 items: Vec<Vec<Node>>,
62 },
63 ThematicBreak,
65 Component {
67 name: String,
68 attributes: HashMap<String, String>,
69 children: Vec<Node>,
70 },
71 Html(String),
73 Table {
75 headers: Vec<Vec<InlineNode>>,
76 rows: Vec<Vec<Vec<InlineNode>>>,
77 alignments: Vec<Alignment>,
78 },
79}
80
81impl Node {
82 pub fn name(&self) -> &str {
84 match self {
85 Node::Component { name, .. } => name,
86 _ => "",
87 }
88 }
89
90 pub fn attributes(&self) -> HashMap<String, String> {
92 match self {
93 Node::Component { attributes, .. } => attributes.clone(),
94 _ => HashMap::new(),
95 }
96 }
97
98 pub fn children(&self) -> Vec<Node> {
100 match self {
101 Node::Component { children, .. } => children.clone(),
102 _ => Vec::new(),
103 }
104 }
105}
106
107#[derive(Debug, Clone)]
109pub enum InlineNode {
110 Text(String),
112 Emphasis(Vec<InlineNode>),
114 Strong(Vec<InlineNode>),
116 Strikethrough(Vec<InlineNode>),
118 Link {
120 text: Vec<InlineNode>,
121 url: String,
122 title: Option<String>,
123 },
124 Image {
126 alt: String,
127 url: String,
128 title: Option<String>,
129 },
130 Code(String),
132 LineBreak,
134 Html(String),
136}
137
138#[derive(Debug, Clone, Copy)]
140pub enum Alignment {
141 None,
143 Left,
145 Center,
147 Right,
149}
150
151pub fn parse(markdown: &str, options: &ParseOptions) -> Result<ParsedDocument, Error> {
153 let mut frontmatter = None;
154 let mut content = markdown.to_string();
155
156 if options.frontmatter && content.starts_with("---") {
158 if let Some((yaml, rest)) = extract_frontmatter(&content) {
159 frontmatter = parse_yaml_frontmatter(yaml)?;
160 content = rest.to_string();
161 }
162 }
163
164 let mut cmark_options = CmarkOptions::empty();
166 if options.gfm {
167 cmark_options.insert(CmarkOptions::ENABLE_TABLES);
168 cmark_options.insert(CmarkOptions::ENABLE_STRIKETHROUGH);
169 cmark_options.insert(CmarkOptions::ENABLE_TASKLISTS);
170 }
171 if options.smart_punctuation {
172 cmark_options.insert(CmarkOptions::ENABLE_SMART_PUNCTUATION);
173 }
174
175 let parser = CmarkParser::new_ext(&content, cmark_options);
177 let ast = process_events(parser, options)?;
178
179 Ok(ParsedDocument { ast, frontmatter })
180}
181
182fn extract_frontmatter(content: &str) -> Option<(&str, &str)> {
184 let rest = content.strip_prefix("---")?;
185 let end_index = rest.find("\n---")?;
186 let yaml = &rest[..end_index];
187 let content_start = end_index + 5; if content_start < rest.len() {
190 Some((yaml, &rest[content_start..]))
191 } else {
192 Some((yaml, ""))
193 }
194}
195
196fn parse_yaml_frontmatter(yaml: &str) -> Result<Option<HashMap<String, serde_yaml::Value>>, Error> {
198 let frontmatter: HashMap<String, serde_yaml::Value> = serde_yaml::from_str(yaml)?;
199 if frontmatter.is_empty() {
200 Ok(None)
201 } else {
202 Ok(Some(frontmatter))
203 }
204}
205
206fn process_events<'a, I>(events: I, options: &ParseOptions) -> Result<Vec<Node>, Error>
208where
209 I: Iterator<Item = Event<'a>>,
210{
211 let mut nodes = Vec::new();
212 let mut current_node: Option<Node> = None;
213 let mut current_inline_nodes: Vec<InlineNode> = Vec::new();
214 let mut list_stack: Vec<(bool, Vec<Vec<Node>>)> = Vec::new();
215 let mut block_quote_stack: Vec<Vec<Node>> = Vec::new();
216 let mut link_stack: Vec<(String, Option<String>, Vec<InlineNode>)> = Vec::new();
217 let mut component_stack: Vec<(String, HashMap<String, String>, Vec<Node>)> = Vec::new();
218 let mut table_headers: Vec<Vec<InlineNode>> = Vec::new();
219 let mut table_alignments: Vec<Alignment> = Vec::new();
220 let mut table_rows: Vec<Vec<Vec<InlineNode>>> = Vec::new();
221 let mut in_table_head = false;
222 let mut in_table_row = false;
223 let mut current_table_row: Vec<Vec<InlineNode>> = Vec::new();
224 let mut current_table_cell: Vec<InlineNode> = Vec::new();
225 let mut _in_emphasis = false;
226 let mut _in_strong = false;
227 let mut _in_strikethrough = false;
228
229 use pulldown_cmark::{Event, Tag};
230
231 let mut events = events.peekable();
232
233 while let Some(event) = events.next() {
234 match event {
235 Event::Start(Tag::Paragraph) => {
236 current_inline_nodes = Vec::new();
237 }
238 Event::End(Tag::Paragraph) => {
239 if !current_inline_nodes.is_empty() {
240 let node = Node::Paragraph(current_inline_nodes.clone());
241 current_inline_nodes.clear();
242
243 if !block_quote_stack.is_empty() {
244 let last_idx = block_quote_stack.len() - 1;
245 block_quote_stack[last_idx].push(node);
246 } else if !list_stack.is_empty() {
247 let last_list_idx = list_stack.len() - 1;
248 if let Some(last_item) = list_stack[last_list_idx].1.last_mut() {
249 last_item.push(node);
250 }
251 } else if !component_stack.is_empty() {
252 let last_idx = component_stack.len() - 1;
253 component_stack[last_idx].2.push(node);
254 } else {
255 nodes.push(node);
256 }
257 }
258 }
259 Event::Start(Tag::Heading(level, _, _)) => {
260 current_inline_nodes = Vec::new();
261 current_node = Some(Node::Heading {
262 level: level as u8,
263 content: String::new(),
264 id: String::new(),
265 });
266 }
267 Event::End(Tag::Heading(..)) => {
268 if let Some(Node::Heading { level, .. }) = current_node {
269 let mut content = String::new();
271 for node in ¤t_inline_nodes {
272 match node {
273 InlineNode::Text(text) => content.push_str(text),
274 InlineNode::Code(code) => content.push_str(code),
275 _ => {} }
277 }
278
279 let id = content
281 .to_lowercase()
282 .replace(|c: char| !c.is_alphanumeric(), "-")
283 .replace("--", "-")
284 .trim_matches('-')
285 .to_string();
286
287 let heading = Node::Heading { level, content, id };
288
289 if !block_quote_stack.is_empty() {
290 let last_idx = block_quote_stack.len() - 1;
291 block_quote_stack[last_idx].push(heading);
292 } else if !component_stack.is_empty() {
293 let last_idx = component_stack.len() - 1;
294 component_stack[last_idx].2.push(heading);
295 } else {
296 nodes.push(heading);
297 }
298
299 current_node = None;
300 current_inline_nodes.clear();
301 }
302 }
303 Event::Start(Tag::BlockQuote) => {
304 block_quote_stack.push(Vec::new());
305 }
306 Event::End(Tag::BlockQuote) => {
307 if let Some(quote_nodes) = block_quote_stack.pop() {
308 let node = Node::BlockQuote(quote_nodes);
309
310 if !block_quote_stack.is_empty() {
311 let last_idx = block_quote_stack.len() - 1;
312 block_quote_stack[last_idx].push(node);
313 } else if !component_stack.is_empty() {
314 let last_idx = component_stack.len() - 1;
315 component_stack[last_idx].2.push(node);
316 } else {
317 nodes.push(node);
318 }
319 }
320 }
321 Event::Start(Tag::CodeBlock(kind)) => {
322 let mut language = None;
323 let attributes = HashMap::new();
324
325 if let CodeBlockKind::Fenced(lang) = kind {
326 let lang_str = lang.to_string();
327 if !lang_str.is_empty() {
328 language = Some(lang_str);
329 }
330 }
331
332 current_node = Some(Node::CodeBlock {
333 language,
334 content: String::new(),
335 attributes,
336 });
337 }
338 Event::End(Tag::CodeBlock(_)) => {
339 if let Some(node) = current_node.take() {
340 if !block_quote_stack.is_empty() {
341 let last_idx = block_quote_stack.len() - 1;
342 block_quote_stack[last_idx].push(node);
343 } else if !component_stack.is_empty() {
344 let last_idx = component_stack.len() - 1;
345 component_stack[last_idx].2.push(node);
346 } else {
347 nodes.push(node);
348 }
349 }
350 }
351 Event::Start(Tag::List(first_item_number)) => {
352 list_stack.push((first_item_number.is_some(), Vec::new()));
353 }
354 Event::End(Tag::List(_)) => {
355 if let Some((ordered, items)) = list_stack.pop() {
356 let node = Node::List { ordered, items };
357
358 if !block_quote_stack.is_empty() {
359 let last_idx = block_quote_stack.len() - 1;
360 block_quote_stack[last_idx].push(node);
361 } else if !list_stack.is_empty() {
362 let last_list_idx = list_stack.len() - 1;
363 if let Some(last_item) = list_stack[last_list_idx].1.last_mut() {
364 last_item.push(node);
365 }
366 } else if !component_stack.is_empty() {
367 let last_idx = component_stack.len() - 1;
368 component_stack[last_idx].2.push(node);
369 } else {
370 nodes.push(node);
371 }
372 }
373 }
374 Event::Start(Tag::Item) => {
375 if !list_stack.is_empty() {
376 let last_idx = list_stack.len() - 1;
377 list_stack[last_idx].1.push(Vec::new());
378 }
379 }
380 Event::End(Tag::Item) => {
381 }
383 Event::Text(text) => {
384 if let Some(Node::CodeBlock {
385 ref mut content, ..
386 }) = current_node
387 {
388 content.push_str(&text);
389 } else {
390 current_inline_nodes.push(InlineNode::Text(text.to_string()));
391 }
392 }
393 Event::Code(code) => {
394 current_inline_nodes.push(InlineNode::Code(code.to_string()));
395 }
396 Event::Html(html) => {
397 let html_str = html.to_string();
398
399 if options.custom_components && html_str.trim().starts_with("::") {
401 if html_str.trim().starts_with(":::") {
402 if let Some(component_name) = parse_component_start(&html_str) {
404 let attributes =
405 extract_component_attributes(&html_str).unwrap_or_default();
406
407 if !component_stack.is_empty() {
408 let child_component = (component_name, attributes, Vec::new());
409 let last_idx = component_stack.len() - 1;
410 component_stack[last_idx].2.push(Node::Component {
411 name: child_component.0.clone(),
412 attributes: child_component.1.clone(),
413 children: Vec::new(),
414 });
415 component_stack.push(child_component);
416 }
417 }
418 } else if let Some(component_name) = parse_component_start(&html_str) {
419 let attributes =
420 extract_component_attributes(&html_str).unwrap_or_default();
421 component_stack.push((component_name, attributes, Vec::new()));
422 } else if html_str.trim() == "::" || html_str.trim() == ":::" {
423 if let Some((name, attributes, children)) = component_stack.pop() {
425 let node = Node::Component {
426 name,
427 attributes,
428 children,
429 };
430
431 if !component_stack.is_empty() {
432 let last_idx = component_stack.len() - 1;
433 if let Some(Node::Component {
435 name: child_name,
436 attributes: child_attrs,
437 children: child_children,
438 }) = component_stack[last_idx].2.last_mut()
439 {
440 if child_name == node.name()
441 && *child_attrs == node.attributes()
442 {
443 *child_children = node.children();
445 continue;
446 }
447 }
448 component_stack[last_idx].2.push(node);
449 } else if !block_quote_stack.is_empty() {
450 let last_idx = block_quote_stack.len() - 1;
451 block_quote_stack[last_idx].push(node);
452 } else {
453 nodes.push(node);
454 }
455 }
456 } else {
457 nodes.push(Node::Html(html_str));
458 }
459 } else {
460 nodes.push(Node::Html(html_str));
461 }
462 }
463 Event::Start(Tag::Emphasis) => {
464 let mut emphasis_nodes = Vec::new();
465
466 for next_event in events.by_ref() {
468 match next_event {
469 Event::Text(text) => {
470 emphasis_nodes.push(InlineNode::Text(text.to_string()));
471 }
472 Event::End(Tag::Emphasis) => {
473 break;
474 }
475 _ => {} }
477 }
478
479 current_inline_nodes.push(InlineNode::Emphasis(emphasis_nodes));
480 }
481 Event::Start(Tag::Strong) => {
482 let mut strong_nodes = Vec::new();
483
484 for next_event in events.by_ref() {
486 match next_event {
487 Event::Text(text) => {
488 strong_nodes.push(InlineNode::Text(text.to_string()));
489 }
490 Event::End(Tag::Strong) => {
491 break;
492 }
493 _ => {} }
495 }
496
497 current_inline_nodes.push(InlineNode::Strong(strong_nodes));
498 }
499 Event::Start(Tag::Strikethrough) => {
500 let mut strikethrough_nodes = Vec::new();
501
502 for next_event in events.by_ref() {
504 match next_event {
505 Event::Text(text) => {
506 strikethrough_nodes.push(InlineNode::Text(text.to_string()));
507 }
508 Event::End(Tag::Strikethrough) => {
509 break;
510 }
511 _ => {} }
513 }
514
515 current_inline_nodes.push(InlineNode::Strikethrough(strikethrough_nodes));
516 }
517 Event::Start(Tag::Link(_link_type, url, title)) => {
518 let url_str = url.to_string();
519 let title_opt = if title.is_empty() {
520 None
521 } else {
522 Some(title.to_string())
523 };
524 link_stack.push((url_str, title_opt, Vec::new()));
525 }
526 Event::End(Tag::Link(_, _, _)) => {
527 if let Some((url, title, text)) = link_stack.pop() {
528 current_inline_nodes.push(InlineNode::Link { url, title, text });
529 }
530 }
531 Event::Start(Tag::Image(_link_type, url, title)) => {
532 let url_str = url.to_string();
533 let title_opt = if title.is_empty() {
534 None
535 } else {
536 Some(title.to_string())
537 };
538 if let Some(Event::Text(alt)) = events.next() {
540 current_inline_nodes.push(InlineNode::Image {
541 url: url_str,
542 title: title_opt,
543 alt: alt.to_string(),
544 });
545 } else {
546 current_inline_nodes.push(InlineNode::Image {
547 url: url_str,
548 title: title_opt,
549 alt: String::new(),
550 });
551 }
552 events.next();
554 }
555 Event::SoftBreak | Event::HardBreak => {
556 current_inline_nodes.push(InlineNode::LineBreak);
557 }
558 Event::Start(Tag::Table(alignments)) => {
559 table_headers = Vec::new();
560 table_rows = Vec::new();
561 table_alignments = alignments
562 .iter()
563 .map(|a| match a {
564 pulldown_cmark::Alignment::None => Alignment::None,
565 pulldown_cmark::Alignment::Left => Alignment::Left,
566 pulldown_cmark::Alignment::Center => Alignment::Center,
567 pulldown_cmark::Alignment::Right => Alignment::Right,
568 })
569 .collect();
570 }
571 Event::End(Tag::Table(_)) => {
572 let node = Node::Table {
573 headers: table_headers.clone(),
574 rows: table_rows.clone(),
575 alignments: table_alignments.clone(),
576 };
577
578 if !block_quote_stack.is_empty() {
579 let last_idx = block_quote_stack.len() - 1;
580 block_quote_stack[last_idx].push(node);
581 } else if !component_stack.is_empty() {
582 let last_idx = component_stack.len() - 1;
583 component_stack[last_idx].2.push(node);
584 } else {
585 nodes.push(node);
586 }
587
588 table_headers.clear();
589 table_rows.clear();
590 table_alignments.clear();
591 }
592 Event::Start(Tag::TableHead) => {
593 in_table_head = true;
594 }
595 Event::End(Tag::TableHead) => {
596 in_table_head = false;
597 }
598 Event::Start(Tag::TableRow) => {
599 in_table_row = true;
600 current_table_row = Vec::new();
601 }
602 Event::End(Tag::TableRow) => {
603 in_table_row = false;
604 if !current_table_row.is_empty() {
605 if in_table_head {
606 table_headers = current_table_row.clone();
607 } else {
608 table_rows.push(current_table_row.clone());
609 }
610 current_table_row.clear();
611 }
612 }
613 Event::Start(Tag::TableCell) => {
614 current_table_cell = Vec::new();
615 }
616 Event::End(Tag::TableCell) => {
617 if in_table_row {
618 current_table_row.push(current_table_cell.clone());
619 current_table_cell.clear();
620 }
621 }
622 Event::Rule => {
623 nodes.push(Node::ThematicBreak);
624 }
625 Event::FootnoteReference(_) => {
626 }
628 Event::TaskListMarker(_) => {
629 }
631 _ => {
633 }
635 }
636 }
637
638 Ok(nodes)
639}
640
641fn parse_component_start(html: &str) -> Option<String> {
643 let html = html.trim();
644 if !html.starts_with("::") {
645 return None;
646 }
647
648 let content = if html.starts_with(":::") {
649 html.trim_start_matches(":::")
650 } else {
651 html.trim_start_matches("::")
652 };
653
654 let name_end = content.find('{').unwrap_or(content.len());
655 let name = content[..name_end].trim();
656
657 if name.is_empty() {
658 None
659 } else {
660 Some(name.to_string())
661 }
662}
663
664fn extract_component_attributes(html: &str) -> Option<HashMap<String, String>> {
666 let html = html.trim();
667
668 if let Some(start) = html.find('{') {
669 if let Some(end) = html.find('}') {
670 let attrs_str = &html[start + 1..end];
671 let mut attributes = HashMap::new();
672
673 for attr_pair in attrs_str.split_whitespace() {
674 if let Some(equals_pos) = attr_pair.find('=') {
675 let name = attr_pair[..equals_pos].trim();
676 let value_with_quotes = attr_pair[equals_pos + 1..].trim();
677 let value = value_with_quotes
678 .trim_start_matches('"')
679 .trim_start_matches('\'')
680 .trim_end_matches('"')
681 .trim_end_matches('\'');
682
683 attributes.insert(name.to_string(), value.to_string());
684 }
685 }
686
687 return Some(attributes);
688 }
689 }
690
691 None
692}