1extern crate alloc;
74
75#[cfg_attr(all(), allow(clippy::mem_forget))]
76mod parser_cell {
77 use self_cell::self_cell;
78
79 use super::MarkdownParser;
80
81 self_cell!(
82 pub(super) struct ParserCell {
83 owner: String,
84 #[covariant]
85 dependent: MarkdownParser,
86 }
87 );
88}
89
90use alloc::collections::VecDeque;
91use std::io::{Read, Seek};
92
93pub use docspec_core::EventSource;
94use docspec_core::{Event, ImageSource, ListStyleType, Result, TableHeaderScope, TextStyleKind};
95use parser_cell::ParserCell;
96use pulldown_cmark::{CodeBlockKind, CowStr, HeadingLevel, Options, Parser, Tag, TagEnd};
97
98struct MarkdownParser<'a>(Parser<'a>);
99
100#[derive(Clone, Copy, PartialEq, Eq)]
102enum BlockState {
103 AutoParagraph,
105 Explicit,
107 None,
109 PendingExplicit,
111}
112
113#[derive(Clone, Copy, PartialEq, Eq)]
115enum Phase {
116 Finished,
118 NotStarted,
120 Running,
122}
123
124struct ListContext {
126 item_open: bool,
128 ordered: bool,
130 pending_start: Option<u64>,
133}
134
135struct ImageBuffer {
137 alt_buf: String,
139 title: Option<String>,
141 url: String,
143}
144
145enum MarkdownPulldownEvent {
146 Code(String),
147 End(TagEnd),
148 HardBreak,
149 Ignored,
150 Rule,
151 SoftBreak,
152 Start(MarkdownStartTag),
153 Text(String),
154}
155
156enum MarkdownStartTag {
157 BlockQuote,
158 CodeBlock {
159 syntax: Option<String>,
160 },
161 Emphasis,
162 Heading {
163 level: HeadingLevel,
164 },
165 Image {
166 dest_url: String,
167 title: Option<String>,
168 },
169 Item,
170 Link {
171 dest_url: String,
172 title: Option<String>,
173 },
174 List(Option<u64>),
175 Paragraph,
176 Strikethrough,
177 Strong,
178 Table,
179 TableCell,
180 TableHead,
181 TableRow,
182}
183
184struct LinkBuffer {
186 href: String,
188 started: bool,
190 title: Option<String>,
192}
193
194pub struct MarkdownReader {
212 block_state: BlockState,
214 cell: ParserCell,
216 code_block_buffer: Option<String>,
218 image: Option<ImageBuffer>,
220 in_preformatted: bool,
222 in_table_head: bool,
224 link: Option<LinkBuffer>,
226 list_stack: alloc::vec::Vec<ListContext>,
229 open_styles: alloc::vec::Vec<TextStyleKind>,
231 pending_open_styles: alloc::vec::Vec<TextStyleKind>,
233 phase: Phase,
235 queue: VecDeque<Event>,
237}
238
239impl MarkdownReader {
240 fn close_current_item_if_open(&mut self) {
241 let Some(ctx) = self.list_stack.last() else {
242 return;
243 };
244 if !ctx.item_open {
245 return;
246 }
247
248 let ordered = ctx.ordered;
249 self.close_all_open_styles();
250 if ordered {
251 self.queue.push_back(Event::EndOrderedListItem);
252 } else {
253 self.queue.push_back(Event::EndUnorderedListItem);
254 }
255 if let Some(current_ctx) = self.list_stack.last_mut() {
256 current_ctx.item_open = false;
257 }
258 self.block_state = BlockState::None;
259 }
260
261 fn close_all_open_styles(&mut self) {
262 self.pending_open_styles.clear();
263 while self.open_styles.pop().is_some() {
264 self.queue.push_back(Event::EndTextStyle);
265 }
266 }
267
268 fn close_style(&mut self, kind: &TextStyleKind) {
269 if self.in_preformatted {
270 return;
271 }
272
273 if let Some(pos) = self.pending_open_styles.iter().rposition(|k| k == kind) {
274 self.pending_open_styles.remove(pos);
275 return;
276 }
277
278 if let Some(pos) = self.open_styles.iter().rposition(|k| k == kind) {
279 let split_pos = pos
280 .checked_add(1)
281 .map_or(self.open_styles.len(), |value| value);
282 let above: alloc::vec::Vec<TextStyleKind> =
283 self.open_styles.drain(split_pos..).collect();
284 self.open_styles.pop();
285 for _ in above.iter().rev() {
286 self.queue.push_back(Event::EndTextStyle);
287 }
288 self.queue.push_back(Event::EndTextStyle);
289 for reopened in above {
290 self.pending_open_styles.push(reopened);
291 }
292 }
293 }
294
295 fn flush_pending_styles(&mut self) {
296 for kind in self.pending_open_styles.drain(..) {
297 self.queue.push_back(Event::StartTextStyle {
298 kind: kind.clone(),
299 id: None,
300 });
301 self.open_styles.push(kind);
302 }
303 }
304
305 fn open_style(&mut self, kind: TextStyleKind) {
306 if !self.in_preformatted {
307 self.pending_open_styles.push(kind);
308 }
309 }
310
311 fn emit_pending_link_start(&mut self) {
314 self.flush_pending_paragraph_start();
315 if let Some(link) = self.link.as_mut() {
316 if !link.started {
317 self.queue.push_back(Event::StartLink {
318 href: link.href.clone(),
319 id: None,
320 title: link.title.clone(),
321 });
322 link.started = true;
323 }
324 }
325 }
326
327 fn flush_pending_paragraph_start(&mut self) {
330 if self.block_state == BlockState::PendingExplicit {
331 self.queue.push_back(Event::StartParagraph {
332 alignment: None,
333 id: None,
334 });
335 self.block_state = BlockState::Explicit;
336 }
337 }
338
339 fn from_owned_string(source: String) -> Self {
340 let options = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
341 let cell = ParserCell::new(source, |s| MarkdownParser(Parser::new_ext(s, options)));
342 Self {
343 block_state: BlockState::None,
344 cell,
345 code_block_buffer: None,
346 image: None,
347 in_preformatted: false,
348 in_table_head: false,
349 link: None,
350 list_stack: Vec::new(),
351 open_styles: Vec::new(),
352 pending_open_styles: Vec::new(),
353 phase: Phase::NotStarted,
354 queue: VecDeque::new(),
355 }
356 }
357
358 #[inline]
367 pub fn from_reader<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self> {
368 let mut source = String::new();
369 reader.read_to_string(&mut source)?;
370 Ok(Self::from_owned_string(source))
371 }
372
373 #[inline]
385 #[must_use]
386 #[expect(
387 clippy::should_implement_trait,
388 reason = "constructor name is required for reader API consistency"
389 )]
390 pub fn from_str(input: &str) -> Self {
391 Self::from_owned_string(input.to_owned())
392 }
393
394 fn handle_code(&mut self, content: String) {
395 if let Some(img) = &mut self.image {
396 img.alt_buf.push_str(&content);
397 } else {
398 self.emit_pending_link_start();
399 if self.block_state == BlockState::None {
400 self.queue.push_back(Event::StartParagraph {
401 alignment: None,
402 id: None,
403 });
404 self.block_state = BlockState::AutoParagraph;
405 }
406 self.flush_pending_styles();
407 self.queue.push_back(Event::StartTextStyle {
408 kind: TextStyleKind::Code,
409 id: None,
410 });
411 self.queue.push_back(Event::Text { content });
412 self.queue.push_back(Event::EndTextStyle);
413 }
414 }
415
416 fn handle_end_code_block(&mut self) {
419 if let Some(buf) = self.code_block_buffer.take() {
420 let content = buf.strip_suffix('\n').unwrap_or(&buf).to_owned();
421 if !content.is_empty() {
422 self.queue.push_back(Event::Text { content });
423 }
424 }
425 self.in_preformatted = false;
426 self.push_event_end(Event::EndPreformatted);
427 }
428
429 fn handle_end_image(&mut self) {
433 let Some(img) = self.image.take() else { return };
434 self.flush_pending_paragraph_start();
435 let trimmed = img.alt_buf.trim();
436 let alt = if trimmed.is_empty() {
437 None
438 } else {
439 Some(trimmed.to_owned())
440 };
441 let decorative = alt.is_none();
442 self.queue.push_back(Event::Image {
443 source: ImageSource::Uri { uri: img.url },
444 alt,
445 title: img.title,
446 decorative,
447 id: None,
448 });
449 }
450
451 fn handle_end_item(&mut self) {
454 if self.block_state == BlockState::AutoParagraph {
455 self.close_all_open_styles();
456 self.queue.push_back(Event::EndParagraph);
457 }
458 self.close_current_item_if_open();
459 self.block_state = BlockState::None;
460 }
461
462 fn handle_end_link(&mut self) {
464 let Some(link) = self.link.take() else { return };
465 if link.started {
466 self.queue.push_back(Event::EndLink);
467 } else {
468 self.flush_pending_paragraph_start();
469 self.queue.push_back(Event::StartLink {
470 href: link.href,
471 id: None,
472 title: link.title,
473 });
474 self.queue.push_back(Event::EndLink);
475 }
476 }
477
478 fn handle_end_list(&mut self) {
480 self.close_current_item_if_open();
481 self.list_stack.pop();
482 self.block_state = BlockState::None;
483 }
484
485 fn handle_end_table_cell(&mut self) {
488 if self.in_table_head {
489 self.push_event_end(Event::EndTableHeader);
490 } else {
491 self.push_event_end(Event::EndTableCell);
492 }
493 }
494
495 fn handle_end_table_head(&mut self) {
497 self.push_event_end(Event::EndTableRow);
498 self.in_table_head = false;
499 }
500
501 fn handle_end_tag(&mut self, tag_end: TagEnd) {
507 match tag_end {
508 TagEnd::BlockQuote(_) => self.push_event_end(Event::EndBlockQuote),
509 TagEnd::CodeBlock => self.handle_end_code_block(),
510 TagEnd::Emphasis => self.close_style(&TextStyleKind::Italic),
511 TagEnd::Heading(_) => self.push_event_end(Event::EndHeading),
512 TagEnd::Image => self.handle_end_image(),
513 TagEnd::Item => self.handle_end_item(),
514 TagEnd::Link => self.handle_end_link(),
515 TagEnd::List(_) => self.handle_end_list(),
516 TagEnd::Paragraph => {
517 if self.block_state == BlockState::PendingExplicit {
518 self.close_all_open_styles();
519 self.block_state = BlockState::None;
520 } else {
521 self.push_event_end(Event::EndParagraph);
522 }
523 }
524 TagEnd::Strikethrough => self.close_style(&TextStyleKind::Strikethrough),
525 TagEnd::Strong => self.close_style(&TextStyleKind::Bold),
526 TagEnd::Table => self.push_event_end(Event::EndTable),
527 TagEnd::TableCell => self.handle_end_table_cell(),
528 TagEnd::TableHead => self.handle_end_table_head(),
529 TagEnd::TableRow => self.push_event_end(Event::EndTableRow),
530 TagEnd::DefinitionList
532 | TagEnd::DefinitionListDefinition
533 | TagEnd::DefinitionListTitle
534 | TagEnd::FootnoteDefinition
535 | TagEnd::HtmlBlock
536 | TagEnd::MetadataBlock(_)
537 | TagEnd::Subscript
538 | TagEnd::Superscript => {}
539 }
540 }
541
542 fn handle_item_start(&mut self) {
543 let depth = self.list_stack.len().saturating_sub(1);
544 let level = u32::try_from(depth).map_or(u32::MAX, |v| v);
545 if let Some(ctx) = self.list_stack.last_mut() {
546 if ctx.ordered {
547 self.queue.push_back(Event::StartOrderedListItem {
548 start: ctx.pending_start.take(),
549 style_type: ListStyleType::Decimal,
550 level,
551 id: None,
552 });
553 } else {
554 self.queue.push_back(Event::StartUnorderedListItem {
555 style_type: ListStyleType::Disc,
556 level,
557 id: None,
558 });
559 }
560 ctx.item_open = true;
561 self.block_state = BlockState::Explicit;
562 }
563 }
564
565 fn handle_list_start(&mut self, start_opt: Option<u64>) {
566 self.list_stack.push(ListContext {
567 item_open: false,
568 ordered: start_opt.is_some(),
569 pending_start: start_opt,
570 });
571 }
572
573 fn handle_start_code_block(&mut self, syntax: Option<String>) {
576 self.code_block_buffer = Some(String::new());
577 self.in_preformatted = true;
578 self.push_event_start(Event::StartPreformatted { id: None, syntax });
579 }
580
581 fn handle_start_heading(&mut self, level: HeadingLevel) {
583 let level_u8 = match level {
584 HeadingLevel::H1 => 1,
585 HeadingLevel::H2 => 2,
586 HeadingLevel::H3 => 3,
587 HeadingLevel::H4 => 4,
588 HeadingLevel::H5 => 5,
589 HeadingLevel::H6 => 6,
590 };
591 self.push_event_start(Event::StartHeading {
592 level: level_u8,
593 id: None,
594 });
595 }
596
597 fn handle_start_image(&mut self, dest_url: String, title: Option<String>) {
601 self.flush_pending_paragraph_start();
610 if let Some(link) = self.link.take() {
611 if link.started {
612 self.queue.push_back(Event::EndLink);
613 } else {
614 self.queue.push_back(Event::StartLink {
615 href: link.href,
616 id: None,
617 title: link.title,
618 });
619 self.queue.push_back(Event::EndLink);
620 }
621 }
622
623 self.image = Some(ImageBuffer {
624 alt_buf: String::new(),
625 title,
626 url: dest_url,
627 });
628 }
629
630 fn handle_start_link(&mut self, dest_url: String, title: Option<String>) {
635 self.link = Some(LinkBuffer {
636 href: dest_url,
637 started: false,
638 title,
639 });
640 }
641
642 fn handle_start_table_cell(&mut self) {
645 if self.in_table_head {
646 self.push_event_start(Event::StartTableHeader {
647 scope: Some(TableHeaderScope::Column),
648 abbr: None,
649 colspan: None,
650 rowspan: None,
651 id: None,
652 });
653 } else {
654 self.push_event_start(Event::StartTableCell {
655 colspan: None,
656 rowspan: None,
657 id: None,
658 });
659 }
660 }
661
662 fn handle_start_table_head(&mut self) {
664 self.in_table_head = true;
665 self.push_event_start(Event::StartTableRow { id: None });
666 }
667
668 fn handle_start_tag(&mut self, tag: MarkdownStartTag) {
674 match tag {
675 MarkdownStartTag::BlockQuote => {
676 self.push_event_start(Event::StartBlockQuote { id: None });
677 }
678 MarkdownStartTag::CodeBlock { syntax } => self.handle_start_code_block(syntax),
679 MarkdownStartTag::Emphasis => self.open_style(TextStyleKind::Italic),
680 MarkdownStartTag::Heading { level } => self.handle_start_heading(level),
681 MarkdownStartTag::Image { dest_url, title } => self.handle_start_image(dest_url, title),
682 MarkdownStartTag::Item => self.handle_item_start(),
683 MarkdownStartTag::Link { dest_url, title } => self.handle_start_link(dest_url, title),
684 MarkdownStartTag::List(start_opt) => self.handle_list_start(start_opt),
685 MarkdownStartTag::Paragraph => self.block_state = BlockState::PendingExplicit,
686 MarkdownStartTag::Strikethrough => self.open_style(TextStyleKind::Strikethrough),
687 MarkdownStartTag::Strong => self.open_style(TextStyleKind::Bold),
688 MarkdownStartTag::Table => self.push_event_start(Event::StartTable { id: None }),
689 MarkdownStartTag::TableCell => self.handle_start_table_cell(),
690 MarkdownStartTag::TableHead => self.handle_start_table_head(),
691 MarkdownStartTag::TableRow => self.push_event_start(Event::StartTableRow { id: None }),
692 }
693 }
694
695 fn handle_text(&mut self, content: String) {
696 if let Some(img) = &mut self.image {
697 img.alt_buf.push_str(&content);
698 } else if let Some(buf) = &mut self.code_block_buffer {
699 buf.push_str(&content);
700 } else {
701 self.emit_pending_link_start();
702 if self.block_state == BlockState::None {
703 self.queue.push_back(Event::StartParagraph {
704 alignment: None,
705 id: None,
706 });
707 self.block_state = BlockState::AutoParagraph;
708 }
709 self.flush_pending_styles();
710 self.queue.push_back(Event::Text { content });
711 }
712 }
713
714 fn next_pulldown_event(&mut self) -> Option<MarkdownPulldownEvent> {
715 self.cell.with_dependent_mut(|_, dep| {
716 dep.0.next().map(|event| match event {
717 pulldown_cmark::Event::Start(tag) => markdown_start_tag(tag)
718 .map_or(MarkdownPulldownEvent::Ignored, MarkdownPulldownEvent::Start),
719 pulldown_cmark::Event::End(tag_end) => MarkdownPulldownEvent::End(tag_end),
720 pulldown_cmark::Event::Text(text) => {
721 MarkdownPulldownEvent::Text(text.into_string())
722 }
723 pulldown_cmark::Event::Code(code) => {
724 MarkdownPulldownEvent::Code(code.into_string())
725 }
726 pulldown_cmark::Event::HardBreak => MarkdownPulldownEvent::HardBreak,
727 pulldown_cmark::Event::SoftBreak => MarkdownPulldownEvent::SoftBreak,
728 pulldown_cmark::Event::Rule => MarkdownPulldownEvent::Rule,
729 pulldown_cmark::Event::DisplayMath(_)
730 | pulldown_cmark::Event::FootnoteReference(_)
731 | pulldown_cmark::Event::Html(_)
732 | pulldown_cmark::Event::InlineHtml(_)
733 | pulldown_cmark::Event::InlineMath(_)
734 | pulldown_cmark::Event::TaskListMarker(_) => MarkdownPulldownEvent::Ignored,
735 })
736 })
737 }
738
739 fn process_next_pulldown_event(&mut self) {
740 let Some(pm_event) = self.next_pulldown_event() else {
741 if self.phase != Phase::Finished {
742 self.phase = Phase::Finished;
743 self.queue.push_back(Event::EndDocument);
744 }
745 return;
746 };
747
748 match pm_event {
749 MarkdownPulldownEvent::Start(tag) => self.handle_start_tag(tag),
750 MarkdownPulldownEvent::End(tag_end) => self.handle_end_tag(tag_end),
751 MarkdownPulldownEvent::Text(text) => self.handle_text(text),
752 MarkdownPulldownEvent::Code(code) => self.handle_code(code),
753 MarkdownPulldownEvent::HardBreak => {
754 if let Some(img) = &mut self.image {
755 img.alt_buf.push(' ');
756 } else if self.block_state == BlockState::PendingExplicit {
757 } else {
759 self.emit_pending_link_start();
760 self.queue.push_back(Event::LineBreak);
761 }
762 }
763 MarkdownPulldownEvent::SoftBreak => {
764 if let Some(img) = &mut self.image {
765 img.alt_buf.push(' ');
766 } else if self.block_state == BlockState::PendingExplicit {
767 } else {
769 self.emit_pending_link_start();
770 self.queue.push_back(Event::SoftBreak);
771 }
772 }
773 MarkdownPulldownEvent::Rule => {
774 self.queue.push_back(Event::ThematicBreak { id: None });
775 }
776 MarkdownPulldownEvent::Ignored => {}
777 }
778 }
779
780 fn push_event(&mut self, event: Event, state: BlockState) {
781 self.queue.push_back(event);
782 self.block_state = state;
783 }
784
785 fn push_event_end(&mut self, event: Event) {
786 self.close_all_open_styles();
787 self.push_event(event, BlockState::None);
788 }
789
790 fn push_event_start(&mut self, event: Event) {
791 self.push_event(event, BlockState::Explicit);
792 }
793}
794
795impl EventSource for MarkdownReader {
796 #[inline]
797 fn next_event(&mut self) -> Result<Option<Event>> {
798 if self.phase == Phase::NotStarted {
799 self.phase = Phase::Running;
800 return Ok(Some(Event::StartDocument {
801 id: None,
802 language: None,
803 metadata: None,
804 }));
805 }
806
807 if self.phase == Phase::Finished && self.queue.is_empty() {
808 return Ok(None);
809 }
810
811 while self.queue.is_empty() && self.phase != Phase::Finished {
812 self.process_next_pulldown_event();
813 }
814
815 Ok(self.queue.pop_front())
816 }
817}
818
819fn markdown_start_tag(tag: Tag<'_>) -> Option<MarkdownStartTag> {
820 match tag {
821 Tag::BlockQuote(_) => Some(MarkdownStartTag::BlockQuote),
822 Tag::CodeBlock(kind) => Some(MarkdownStartTag::CodeBlock {
823 syntax: code_block_syntax(kind),
824 }),
825 Tag::Emphasis => Some(MarkdownStartTag::Emphasis),
826 Tag::Heading { level, .. } => Some(MarkdownStartTag::Heading { level }),
827 Tag::Image {
828 dest_url, title, ..
829 } => Some(MarkdownStartTag::Image {
830 dest_url: dest_url.into_string(),
831 title: cow_to_optional_string(title),
832 }),
833 Tag::Item => Some(MarkdownStartTag::Item),
834 Tag::Link {
835 dest_url, title, ..
836 } => Some(MarkdownStartTag::Link {
837 dest_url: dest_url.into_string(),
838 title: cow_to_optional_string(title),
839 }),
840 Tag::List(start_opt) => Some(MarkdownStartTag::List(start_opt)),
841 Tag::Paragraph => Some(MarkdownStartTag::Paragraph),
842 Tag::Strikethrough => Some(MarkdownStartTag::Strikethrough),
843 Tag::Strong => Some(MarkdownStartTag::Strong),
844 Tag::Table(_) => Some(MarkdownStartTag::Table),
845 Tag::TableCell => Some(MarkdownStartTag::TableCell),
846 Tag::TableHead => Some(MarkdownStartTag::TableHead),
847 Tag::TableRow => Some(MarkdownStartTag::TableRow),
848 Tag::DefinitionList
849 | Tag::DefinitionListDefinition
850 | Tag::DefinitionListTitle
851 | Tag::FootnoteDefinition(_)
852 | Tag::HtmlBlock
853 | Tag::MetadataBlock(_)
854 | Tag::Subscript
855 | Tag::Superscript => None,
856 }
857}
858
859fn code_block_syntax(kind: CodeBlockKind<'_>) -> Option<String> {
860 match kind {
861 CodeBlockKind::Fenced(lang) if !lang.is_empty() => Some(lang.into_string()),
862 CodeBlockKind::Fenced(_) | CodeBlockKind::Indented => None,
863 }
864}
865
866fn cow_to_optional_string(value: CowStr<'_>) -> Option<String> {
867 if value.is_empty() {
868 None
869 } else {
870 Some(value.into_string())
871 }
872}
873
874#[cfg(test)]
875mod tests {
876 use super::*;
877
878 #[test]
879 fn handle_code_without_open_block_auto_opens_paragraph() {
880 let mut reader = MarkdownReader::from_str("");
881 reader.handle_code("code".to_string());
882
883 assert_eq!(reader.queue.len(), 4);
884 assert_eq!(
885 reader.queue.front(),
886 Some(&Event::StartParagraph {
887 alignment: None,
888 id: None,
889 })
890 );
891 assert_eq!(
892 reader.queue.get(1),
893 Some(&Event::StartTextStyle {
894 kind: TextStyleKind::Code,
895 id: None,
896 })
897 );
898 assert_eq!(
899 reader.queue.get(2),
900 Some(&Event::Text {
901 content: "code".to_string(),
902 })
903 );
904 assert_eq!(reader.queue.get(3), Some(&Event::EndTextStyle));
905 }
906
907 #[test]
908 fn handle_text_without_open_block_auto_opens_paragraph() {
909 let mut reader = MarkdownReader::from_str("");
910 reader.handle_text("hello".to_string());
911
912 assert_eq!(reader.queue.len(), 2);
913 assert_eq!(
914 reader.queue.front(),
915 Some(&Event::StartParagraph {
916 alignment: None,
917 id: None,
918 })
919 );
920 assert_eq!(
921 reader.queue.get(1),
922 Some(&Event::Text {
923 content: "hello".to_string(),
924 })
925 );
926 }
927}
928
929#[cfg(test)]
930mod send_static_assertions {
931 fn assert_send_static<T: Send + 'static>() {}
932
933 #[test]
934 fn markdown_reader_is_send_static() {
935 assert_send_static::<crate::MarkdownReader>();
936 }
937}