1extern crate alloc;
74
75#[cfg_attr(all(), allow(clippy::mem_forget))]
76mod parser_cell {
77 use self_cell::self_cell;
78
79 use super::MarkdownParser;
80
81 self_cell!(
82 pub(super) struct ParserCell {
83 owner: String,
84 #[covariant]
85 dependent: MarkdownParser,
86 }
87 );
88}
89
90use alloc::collections::VecDeque;
91use std::io::{Read, Seek};
92
93pub use docspec_core::EventSource;
94use docspec_core::{Depth, Event, ImageSource, ListStyleType, Result, TableHeaderScope, TextStyle};
95use parser_cell::ParserCell;
96use pulldown_cmark::{CodeBlockKind, CowStr, HeadingLevel, Options, Parser, Tag, TagEnd};
97
98struct MarkdownParser<'a>(Parser<'a>);
99
100#[derive(Clone, Copy, PartialEq, Eq)]
102enum BlockState {
103 AutoParagraph,
105 Explicit,
107 None,
109 PendingExplicit,
111}
112
113#[derive(Clone, Copy, PartialEq, Eq)]
115enum Phase {
116 Finished,
118 NotStarted,
120 Running,
122}
123
124struct ListContext {
126 item_open: bool,
128 ordered: bool,
130 pending_start: Option<u64>,
133}
134
135struct ImageBuffer {
137 alt_buf: String,
139 title: Option<String>,
141 url: String,
143}
144
145enum MarkdownPulldownEvent {
146 Code(String),
147 End(TagEnd),
148 HardBreak,
149 Ignored,
150 Rule,
151 SoftBreak,
152 Start(MarkdownStartTag),
153 Text(String),
154}
155
156enum MarkdownStartTag {
157 BlockQuote,
158 CodeBlock {
159 syntax: Option<String>,
160 },
161 Emphasis,
162 Heading {
163 level: HeadingLevel,
164 },
165 Image {
166 dest_url: String,
167 title: Option<String>,
168 },
169 Item,
170 Link {
171 dest_url: String,
172 title: Option<String>,
173 },
174 List(Option<u64>),
175 Paragraph,
176 Strikethrough,
177 Strong,
178 Table,
179 TableCell,
180 TableHead,
181 TableRow,
182}
183
184struct LinkBuffer {
186 href: String,
188 started: bool,
190 title: Option<String>,
192}
193
194pub struct MarkdownReader {
212 block_state: BlockState,
214 bold_depth: Depth,
216 cell: ParserCell,
218 code_block_buffer: Option<String>,
220 image: Option<ImageBuffer>,
222 in_table_head: bool,
224 italic_depth: Depth,
226 link: Option<LinkBuffer>,
228 list_stack: alloc::vec::Vec<ListContext>,
231 phase: Phase,
233 queue: VecDeque<Event>,
235 strikethrough_depth: Depth,
237}
238
239impl MarkdownReader {
240 fn close_current_item_if_open(&mut self) {
241 if let Some(ctx) = self.list_stack.last_mut() {
242 if ctx.item_open {
243 if ctx.ordered {
244 self.queue.push_back(Event::EndOrderedListItem);
245 } else {
246 self.queue.push_back(Event::EndUnorderedListItem);
247 }
248 ctx.item_open = false;
249 self.block_state = BlockState::None;
250 }
251 }
252 }
253
254 fn current_text_style(&self) -> TextStyle {
255 let mut style = TextStyle::default();
256 if self.bold_depth.is_positive() {
257 style = style.bold();
258 }
259 if self.italic_depth.is_positive() {
260 style = style.italic();
261 }
262 if self.strikethrough_depth.is_positive() {
263 style = style.strikethrough();
264 }
265 style
266 }
267
268 fn emit_pending_link_start(&mut self) {
271 self.flush_pending_paragraph_start();
272 if let Some(link) = self.link.as_mut() {
273 if !link.started {
274 self.queue.push_back(Event::StartLink {
275 href: link.href.clone(),
276 id: None,
277 title: link.title.clone(),
278 });
279 link.started = true;
280 }
281 }
282 }
283
284 fn flush_pending_paragraph_start(&mut self) {
287 if self.block_state == BlockState::PendingExplicit {
288 self.queue.push_back(Event::StartParagraph {
289 alignment: None,
290 id: None,
291 });
292 self.block_state = BlockState::Explicit;
293 }
294 }
295
296 fn from_owned_string(source: String) -> Self {
297 let options = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
298 let cell = ParserCell::new(source, |s| MarkdownParser(Parser::new_ext(s, options)));
299 Self {
300 block_state: BlockState::None,
301 bold_depth: Depth::default(),
302 cell,
303 code_block_buffer: None,
304 image: None,
305 in_table_head: false,
306 italic_depth: Depth::default(),
307 link: None,
308 list_stack: Vec::new(),
309 phase: Phase::NotStarted,
310 queue: VecDeque::new(),
311 strikethrough_depth: Depth::default(),
312 }
313 }
314
315 #[inline]
324 pub fn from_reader<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self> {
325 let mut source = String::new();
326 reader.read_to_string(&mut source)?;
327 Ok(Self::from_owned_string(source))
328 }
329
330 #[inline]
342 #[must_use]
343 #[expect(
344 clippy::should_implement_trait,
345 reason = "constructor name is required for reader API consistency"
346 )]
347 pub fn from_str(input: &str) -> Self {
348 Self::from_owned_string(input.to_owned())
349 }
350
351 fn handle_code(&mut self, content: String) {
352 if let Some(img) = &mut self.image {
353 img.alt_buf.push_str(&content);
354 } else {
355 self.emit_pending_link_start();
356 if self.block_state == BlockState::None {
357 self.queue.push_back(Event::StartParagraph {
358 alignment: None,
359 id: None,
360 });
361 self.block_state = BlockState::AutoParagraph;
362 }
363 self.queue.push_back(Event::Text {
364 content,
365 style: self.current_text_style().code(),
366 });
367 }
368 }
369
370 fn handle_end_code_block(&mut self) {
373 if let Some(buf) = self.code_block_buffer.take() {
374 let content = buf.strip_suffix('\n').unwrap_or(&buf).to_owned();
375 if !content.is_empty() {
376 self.queue.push_back(Event::Text {
377 content,
378 style: TextStyle::default(),
379 });
380 }
381 }
382 self.push_event_end(Event::EndPreformatted);
383 }
384
385 fn handle_end_image(&mut self) {
389 let Some(img) = self.image.take() else { return };
390 self.flush_pending_paragraph_start();
391 let trimmed = img.alt_buf.trim();
392 let alt = if trimmed.is_empty() {
393 None
394 } else {
395 Some(trimmed.to_owned())
396 };
397 let decorative = alt.is_none();
398 self.queue.push_back(Event::Image {
399 source: ImageSource::Uri { uri: img.url },
400 alt,
401 title: img.title,
402 decorative,
403 id: None,
404 });
405 }
406
407 fn handle_end_item(&mut self) {
410 if self.block_state == BlockState::AutoParagraph {
411 self.queue.push_back(Event::EndParagraph);
412 }
413 self.close_current_item_if_open();
414 self.block_state = BlockState::None;
415 }
416
417 fn handle_end_link(&mut self) {
419 let Some(link) = self.link.take() else { return };
420 if link.started {
421 self.queue.push_back(Event::EndLink);
422 } else {
423 self.flush_pending_paragraph_start();
424 self.queue.push_back(Event::StartLink {
425 href: link.href,
426 id: None,
427 title: link.title,
428 });
429 self.queue.push_back(Event::EndLink);
430 }
431 }
432
433 fn handle_end_list(&mut self) {
435 self.close_current_item_if_open();
436 self.list_stack.pop();
437 self.block_state = BlockState::None;
438 }
439
440 fn handle_end_table_cell(&mut self) {
443 if self.in_table_head {
444 self.push_event_end(Event::EndTableHeader);
445 } else {
446 self.push_event_end(Event::EndTableCell);
447 }
448 }
449
450 fn handle_end_table_head(&mut self) {
452 self.push_event_end(Event::EndTableRow);
453 self.in_table_head = false;
454 }
455
456 fn handle_end_tag(&mut self, tag_end: TagEnd) {
462 match tag_end {
463 TagEnd::BlockQuote(_) => self.push_event_end(Event::EndBlockQuote),
464 TagEnd::CodeBlock => self.handle_end_code_block(),
465 TagEnd::Emphasis => self.italic_depth.dec(),
466 TagEnd::Heading(_) => self.push_event_end(Event::EndHeading),
467 TagEnd::Image => self.handle_end_image(),
468 TagEnd::Item => self.handle_end_item(),
469 TagEnd::Link => self.handle_end_link(),
470 TagEnd::List(_) => self.handle_end_list(),
471 TagEnd::Paragraph => {
472 if self.block_state == BlockState::PendingExplicit {
473 self.block_state = BlockState::None;
474 } else {
475 self.push_event_end(Event::EndParagraph);
476 }
477 }
478 TagEnd::Strikethrough => self.strikethrough_depth.dec(),
479 TagEnd::Strong => self.bold_depth.dec(),
480 TagEnd::Table => self.push_event_end(Event::EndTable),
481 TagEnd::TableCell => self.handle_end_table_cell(),
482 TagEnd::TableHead => self.handle_end_table_head(),
483 TagEnd::TableRow => self.push_event_end(Event::EndTableRow),
484 TagEnd::DefinitionList
486 | TagEnd::DefinitionListDefinition
487 | TagEnd::DefinitionListTitle
488 | TagEnd::FootnoteDefinition
489 | TagEnd::HtmlBlock
490 | TagEnd::MetadataBlock(_)
491 | TagEnd::Subscript
492 | TagEnd::Superscript => {}
493 }
494 }
495
496 fn handle_item_start(&mut self) {
497 let depth = self.list_stack.len().saturating_sub(1);
498 let level = u32::try_from(depth).map_or(u32::MAX, |v| v);
499 if let Some(ctx) = self.list_stack.last_mut() {
500 if ctx.ordered {
501 self.queue.push_back(Event::StartOrderedListItem {
502 start: ctx.pending_start.take(),
503 style_type: ListStyleType::Decimal,
504 level,
505 id: None,
506 });
507 } else {
508 self.queue.push_back(Event::StartUnorderedListItem {
509 style_type: ListStyleType::Disc,
510 level,
511 id: None,
512 });
513 }
514 ctx.item_open = true;
515 self.block_state = BlockState::Explicit;
516 }
517 }
518
519 fn handle_list_start(&mut self, start_opt: Option<u64>) {
520 self.list_stack.push(ListContext {
521 item_open: false,
522 ordered: start_opt.is_some(),
523 pending_start: start_opt,
524 });
525 }
526
527 fn handle_start_code_block(&mut self, syntax: Option<String>) {
530 self.code_block_buffer = Some(String::new());
531 self.push_event_start(Event::StartPreformatted { id: None, syntax });
532 }
533
534 fn handle_start_heading(&mut self, level: HeadingLevel) {
536 let level_u8 = match level {
537 HeadingLevel::H1 => 1,
538 HeadingLevel::H2 => 2,
539 HeadingLevel::H3 => 3,
540 HeadingLevel::H4 => 4,
541 HeadingLevel::H5 => 5,
542 HeadingLevel::H6 => 6,
543 };
544 self.push_event_start(Event::StartHeading {
545 level: level_u8,
546 id: None,
547 });
548 }
549
550 fn handle_start_image(&mut self, dest_url: String, title: Option<String>) {
554 self.flush_pending_paragraph_start();
563 if let Some(link) = self.link.take() {
564 if link.started {
565 self.queue.push_back(Event::EndLink);
566 } else {
567 self.queue.push_back(Event::StartLink {
568 href: link.href,
569 id: None,
570 title: link.title,
571 });
572 self.queue.push_back(Event::EndLink);
573 }
574 }
575
576 self.image = Some(ImageBuffer {
577 alt_buf: String::new(),
578 title,
579 url: dest_url,
580 });
581 }
582
583 fn handle_start_link(&mut self, dest_url: String, title: Option<String>) {
588 self.link = Some(LinkBuffer {
589 href: dest_url,
590 started: false,
591 title,
592 });
593 }
594
595 fn handle_start_table_cell(&mut self) {
598 if self.in_table_head {
599 self.push_event_start(Event::StartTableHeader {
600 scope: Some(TableHeaderScope::Column),
601 abbr: None,
602 colspan: None,
603 rowspan: None,
604 id: None,
605 });
606 } else {
607 self.push_event_start(Event::StartTableCell {
608 colspan: None,
609 rowspan: None,
610 id: None,
611 });
612 }
613 }
614
615 fn handle_start_table_head(&mut self) {
617 self.in_table_head = true;
618 self.push_event_start(Event::StartTableRow { id: None });
619 }
620
621 fn handle_start_tag(&mut self, tag: MarkdownStartTag) {
627 match tag {
628 MarkdownStartTag::BlockQuote => {
629 self.push_event_start(Event::StartBlockQuote { id: None });
630 }
631 MarkdownStartTag::CodeBlock { syntax } => self.handle_start_code_block(syntax),
632 MarkdownStartTag::Emphasis => self.italic_depth.inc(),
633 MarkdownStartTag::Heading { level } => self.handle_start_heading(level),
634 MarkdownStartTag::Image { dest_url, title } => self.handle_start_image(dest_url, title),
635 MarkdownStartTag::Item => self.handle_item_start(),
636 MarkdownStartTag::Link { dest_url, title } => self.handle_start_link(dest_url, title),
637 MarkdownStartTag::List(start_opt) => self.handle_list_start(start_opt),
638 MarkdownStartTag::Paragraph => self.block_state = BlockState::PendingExplicit,
639 MarkdownStartTag::Strikethrough => self.strikethrough_depth.inc(),
640 MarkdownStartTag::Strong => self.bold_depth.inc(),
641 MarkdownStartTag::Table => self.push_event_start(Event::StartTable { id: None }),
642 MarkdownStartTag::TableCell => self.handle_start_table_cell(),
643 MarkdownStartTag::TableHead => self.handle_start_table_head(),
644 MarkdownStartTag::TableRow => self.push_event_start(Event::StartTableRow { id: None }),
645 }
646 }
647
648 fn handle_text(&mut self, content: String) {
649 if let Some(img) = &mut self.image {
650 img.alt_buf.push_str(&content);
651 } else if let Some(buf) = &mut self.code_block_buffer {
652 buf.push_str(&content);
653 } else {
654 self.emit_pending_link_start();
655 if self.block_state == BlockState::None {
656 self.queue.push_back(Event::StartParagraph {
657 alignment: None,
658 id: None,
659 });
660 self.block_state = BlockState::AutoParagraph;
661 }
662 self.queue.push_back(Event::Text {
663 content,
664 style: self.current_text_style(),
665 });
666 }
667 }
668
669 fn next_pulldown_event(&mut self) -> Option<MarkdownPulldownEvent> {
670 self.cell.with_dependent_mut(|_, dep| {
671 dep.0.next().map(|event| match event {
672 pulldown_cmark::Event::Start(tag) => markdown_start_tag(tag)
673 .map_or(MarkdownPulldownEvent::Ignored, MarkdownPulldownEvent::Start),
674 pulldown_cmark::Event::End(tag_end) => MarkdownPulldownEvent::End(tag_end),
675 pulldown_cmark::Event::Text(text) => {
676 MarkdownPulldownEvent::Text(text.into_string())
677 }
678 pulldown_cmark::Event::Code(code) => {
679 MarkdownPulldownEvent::Code(code.into_string())
680 }
681 pulldown_cmark::Event::HardBreak => MarkdownPulldownEvent::HardBreak,
682 pulldown_cmark::Event::SoftBreak => MarkdownPulldownEvent::SoftBreak,
683 pulldown_cmark::Event::Rule => MarkdownPulldownEvent::Rule,
684 pulldown_cmark::Event::DisplayMath(_)
685 | pulldown_cmark::Event::FootnoteReference(_)
686 | pulldown_cmark::Event::Html(_)
687 | pulldown_cmark::Event::InlineHtml(_)
688 | pulldown_cmark::Event::InlineMath(_)
689 | pulldown_cmark::Event::TaskListMarker(_) => MarkdownPulldownEvent::Ignored,
690 })
691 })
692 }
693
694 fn process_next_pulldown_event(&mut self) {
695 let Some(pm_event) = self.next_pulldown_event() else {
696 if self.phase != Phase::Finished {
697 self.phase = Phase::Finished;
698 self.queue.push_back(Event::EndDocument);
699 }
700 return;
701 };
702
703 match pm_event {
704 MarkdownPulldownEvent::Start(tag) => self.handle_start_tag(tag),
705 MarkdownPulldownEvent::End(tag_end) => self.handle_end_tag(tag_end),
706 MarkdownPulldownEvent::Text(text) => self.handle_text(text),
707 MarkdownPulldownEvent::Code(code) => self.handle_code(code),
708 MarkdownPulldownEvent::HardBreak => {
709 if let Some(img) = &mut self.image {
710 img.alt_buf.push(' ');
711 } else if self.block_state == BlockState::PendingExplicit {
712 } else {
714 self.emit_pending_link_start();
715 self.queue.push_back(Event::LineBreak);
716 }
717 }
718 MarkdownPulldownEvent::SoftBreak => {
719 if let Some(img) = &mut self.image {
720 img.alt_buf.push(' ');
721 } else if self.block_state == BlockState::PendingExplicit {
722 } else {
724 self.emit_pending_link_start();
725 self.queue.push_back(Event::SoftBreak);
726 }
727 }
728 MarkdownPulldownEvent::Rule => {
729 self.queue.push_back(Event::ThematicBreak { id: None });
730 }
731 MarkdownPulldownEvent::Ignored => {}
732 }
733 }
734
735 fn push_event(&mut self, event: Event, state: BlockState) {
736 self.queue.push_back(event);
737 self.block_state = state;
738 }
739
740 fn push_event_end(&mut self, event: Event) {
741 self.push_event(event, BlockState::None);
742 }
743
744 fn push_event_start(&mut self, event: Event) {
745 self.push_event(event, BlockState::Explicit);
746 }
747}
748
749impl EventSource for MarkdownReader {
750 #[inline]
751 fn next_event(&mut self) -> Result<Option<Event>> {
752 if self.phase == Phase::NotStarted {
753 self.phase = Phase::Running;
754 return Ok(Some(Event::StartDocument {
755 id: None,
756 language: None,
757 metadata: None,
758 }));
759 }
760
761 if self.phase == Phase::Finished && self.queue.is_empty() {
762 return Ok(None);
763 }
764
765 while self.queue.is_empty() && self.phase != Phase::Finished {
766 self.process_next_pulldown_event();
767 }
768
769 Ok(self.queue.pop_front())
770 }
771}
772
773fn markdown_start_tag(tag: Tag<'_>) -> Option<MarkdownStartTag> {
774 match tag {
775 Tag::BlockQuote(_) => Some(MarkdownStartTag::BlockQuote),
776 Tag::CodeBlock(kind) => Some(MarkdownStartTag::CodeBlock {
777 syntax: code_block_syntax(kind),
778 }),
779 Tag::Emphasis => Some(MarkdownStartTag::Emphasis),
780 Tag::Heading { level, .. } => Some(MarkdownStartTag::Heading { level }),
781 Tag::Image {
782 dest_url, title, ..
783 } => Some(MarkdownStartTag::Image {
784 dest_url: dest_url.into_string(),
785 title: cow_to_optional_string(title),
786 }),
787 Tag::Item => Some(MarkdownStartTag::Item),
788 Tag::Link {
789 dest_url, title, ..
790 } => Some(MarkdownStartTag::Link {
791 dest_url: dest_url.into_string(),
792 title: cow_to_optional_string(title),
793 }),
794 Tag::List(start_opt) => Some(MarkdownStartTag::List(start_opt)),
795 Tag::Paragraph => Some(MarkdownStartTag::Paragraph),
796 Tag::Strikethrough => Some(MarkdownStartTag::Strikethrough),
797 Tag::Strong => Some(MarkdownStartTag::Strong),
798 Tag::Table(_) => Some(MarkdownStartTag::Table),
799 Tag::TableCell => Some(MarkdownStartTag::TableCell),
800 Tag::TableHead => Some(MarkdownStartTag::TableHead),
801 Tag::TableRow => Some(MarkdownStartTag::TableRow),
802 Tag::DefinitionList
803 | Tag::DefinitionListDefinition
804 | Tag::DefinitionListTitle
805 | Tag::FootnoteDefinition(_)
806 | Tag::HtmlBlock
807 | Tag::MetadataBlock(_)
808 | Tag::Subscript
809 | Tag::Superscript => None,
810 }
811}
812
813fn code_block_syntax(kind: CodeBlockKind<'_>) -> Option<String> {
814 match kind {
815 CodeBlockKind::Fenced(lang) if !lang.is_empty() => Some(lang.into_string()),
816 CodeBlockKind::Fenced(_) | CodeBlockKind::Indented => None,
817 }
818}
819
820fn cow_to_optional_string(value: CowStr<'_>) -> Option<String> {
821 if value.is_empty() {
822 None
823 } else {
824 Some(value.into_string())
825 }
826}
827
828#[cfg(test)]
829mod tests {
830 use super::*;
831
832 #[test]
833 fn handle_code_without_open_block_auto_opens_paragraph() {
834 let mut reader = MarkdownReader::from_str("");
835 reader.handle_code("code".to_string());
836
837 assert_eq!(reader.queue.len(), 2);
838 assert_eq!(
839 reader.queue.front(),
840 Some(&Event::StartParagraph {
841 alignment: None,
842 id: None,
843 })
844 );
845 assert_eq!(
846 reader.queue.get(1),
847 Some(&Event::Text {
848 content: "code".to_string(),
849 style: TextStyle::default().code(),
850 })
851 );
852 }
853
854 #[test]
855 fn handle_text_without_open_block_auto_opens_paragraph() {
856 let mut reader = MarkdownReader::from_str("");
857 reader.handle_text("hello".to_string());
858
859 assert_eq!(reader.queue.len(), 2);
860 assert_eq!(
861 reader.queue.front(),
862 Some(&Event::StartParagraph {
863 alignment: None,
864 id: None,
865 })
866 );
867 assert_eq!(
868 reader.queue.get(1),
869 Some(&Event::Text {
870 content: "hello".to_string(),
871 style: TextStyle::default(),
872 })
873 );
874 }
875}
876
877#[cfg(test)]
878mod send_static_assertions {
879 fn assert_send_static<T: Send + 'static>() {}
880
881 #[test]
882 fn markdown_reader_is_send_static() {
883 assert_send_static::<crate::MarkdownReader>();
884 }
885}