1extern crate alloc;
61
62use alloc::collections::VecDeque;
63
64pub use docspec_core::EventSource;
65use docspec_core::{Depth, Event, ImageSource, ListStyleType, Result, TableHeaderScope, TextStyle};
66use pulldown_cmark::{CodeBlockKind, CowStr, HeadingLevel, Options, Parser, Tag, TagEnd};
67
68#[derive(Clone, Copy, PartialEq, Eq)]
70enum BlockState {
71 AutoParagraph,
73 Explicit,
75 None,
77 PendingExplicit,
79}
80
81#[derive(Clone, Copy, PartialEq, Eq)]
83enum Phase {
84 Finished,
86 NotStarted,
88 Running,
90}
91
92struct ListContext {
94 item_open: bool,
96 ordered: bool,
98 pending_start: Option<u64>,
101}
102
103struct ImageBuffer {
105 alt_buf: String,
107 title: Option<String>,
109 url: String,
111}
112
113struct LinkBuffer {
115 href: String,
117 started: bool,
119 title: Option<String>,
121}
122
123pub struct MarkdownReader<'a> {
141 block_state: BlockState,
143 bold_depth: Depth,
145 code_block_buffer: Option<String>,
147 image: Option<ImageBuffer>,
149 in_table_head: bool,
151 italic_depth: Depth,
153 link: Option<LinkBuffer>,
155 list_stack: alloc::vec::Vec<ListContext>,
158 parser: Parser<'a>,
160 phase: Phase,
162 queue: VecDeque<Event>,
164 strikethrough_depth: Depth,
166}
167
168impl<'a> MarkdownReader<'a> {
169 fn close_current_item_if_open(&mut self) {
170 if let Some(ctx) = self.list_stack.last_mut() {
171 if ctx.item_open {
172 if ctx.ordered {
173 self.queue.push_back(Event::EndOrderedListItem);
174 } else {
175 self.queue.push_back(Event::EndUnorderedListItem);
176 }
177 ctx.item_open = false;
178 self.block_state = BlockState::None;
179 }
180 }
181 }
182
183 fn current_text_style(&self) -> TextStyle {
184 let mut style = TextStyle::default();
185 if self.bold_depth.is_positive() {
186 style = style.bold();
187 }
188 if self.italic_depth.is_positive() {
189 style = style.italic();
190 }
191 if self.strikethrough_depth.is_positive() {
192 style = style.strikethrough();
193 }
194 style
195 }
196
197 fn emit_pending_link_start(&mut self) {
200 self.flush_pending_paragraph_start();
201 if let Some(link) = self.link.as_mut() {
202 if !link.started {
203 self.queue.push_back(Event::StartLink {
204 href: link.href.clone(),
205 id: None,
206 title: link.title.clone(),
207 });
208 link.started = true;
209 }
210 }
211 }
212
213 fn flush_pending_paragraph_start(&mut self) {
216 if self.block_state == BlockState::PendingExplicit {
217 self.queue.push_back(Event::StartParagraph {
218 alignment: None,
219 id: None,
220 });
221 self.block_state = BlockState::Explicit;
222 }
223 }
224
225 fn handle_code(&mut self, content: String) {
226 if let Some(img) = &mut self.image {
227 img.alt_buf.push_str(&content);
228 } else {
229 self.emit_pending_link_start();
230 if self.block_state == BlockState::None {
231 self.queue.push_back(Event::StartParagraph {
232 alignment: None,
233 id: None,
234 });
235 self.block_state = BlockState::AutoParagraph;
236 }
237 self.queue.push_back(Event::Text {
238 content,
239 style: self.current_text_style().code(),
240 });
241 }
242 }
243
244 fn handle_end_code_block(&mut self) {
247 if let Some(buf) = self.code_block_buffer.take() {
248 let content = buf.strip_suffix('\n').unwrap_or(&buf).to_owned();
249 if !content.is_empty() {
250 self.queue.push_back(Event::Text {
251 content,
252 style: TextStyle::default(),
253 });
254 }
255 }
256 self.push_event_end(Event::EndPreformatted);
257 }
258
259 fn handle_end_image(&mut self) {
263 let Some(img) = self.image.take() else { return };
264 self.flush_pending_paragraph_start();
265 let trimmed = img.alt_buf.trim();
266 let alt = if trimmed.is_empty() {
267 None
268 } else {
269 Some(trimmed.to_owned())
270 };
271 let decorative = alt.is_none();
272 self.queue.push_back(Event::Image {
273 source: ImageSource::Uri { uri: img.url },
274 alt,
275 title: img.title,
276 decorative,
277 id: None,
278 });
279 }
280
281 fn handle_end_item(&mut self) {
284 if self.block_state == BlockState::AutoParagraph {
285 self.queue.push_back(Event::EndParagraph);
286 }
287 self.close_current_item_if_open();
288 self.block_state = BlockState::None;
289 }
290
291 fn handle_end_link(&mut self) {
293 let Some(link) = self.link.take() else { return };
294 if link.started {
295 self.queue.push_back(Event::EndLink);
296 } else {
297 self.flush_pending_paragraph_start();
298 self.queue.push_back(Event::StartLink {
299 href: link.href,
300 id: None,
301 title: link.title,
302 });
303 self.queue.push_back(Event::EndLink);
304 }
305 }
306
307 fn handle_end_list(&mut self) {
309 self.close_current_item_if_open();
310 self.list_stack.pop();
311 self.block_state = BlockState::None;
312 }
313
314 fn handle_end_table_cell(&mut self) {
317 if self.in_table_head {
318 self.push_event_end(Event::EndTableHeader);
319 } else {
320 self.push_event_end(Event::EndTableCell);
321 }
322 }
323
324 fn handle_end_table_head(&mut self) {
326 self.push_event_end(Event::EndTableRow);
327 self.in_table_head = false;
328 }
329
330 fn handle_end_tag(&mut self, tag_end: TagEnd) {
336 match tag_end {
337 TagEnd::BlockQuote(_) => self.push_event_end(Event::EndBlockQuote),
338 TagEnd::CodeBlock => self.handle_end_code_block(),
339 TagEnd::Emphasis => self.italic_depth.dec(),
340 TagEnd::Heading(_) => self.push_event_end(Event::EndHeading),
341 TagEnd::Image => self.handle_end_image(),
342 TagEnd::Item => self.handle_end_item(),
343 TagEnd::Link => self.handle_end_link(),
344 TagEnd::List(_) => self.handle_end_list(),
345 TagEnd::Paragraph => {
346 if self.block_state == BlockState::PendingExplicit {
347 self.block_state = BlockState::None;
348 } else {
349 self.push_event_end(Event::EndParagraph);
350 }
351 }
352 TagEnd::Strikethrough => self.strikethrough_depth.dec(),
353 TagEnd::Strong => self.bold_depth.dec(),
354 TagEnd::Table => self.push_event_end(Event::EndTable),
355 TagEnd::TableCell => self.handle_end_table_cell(),
356 TagEnd::TableHead => self.handle_end_table_head(),
357 TagEnd::TableRow => self.push_event_end(Event::EndTableRow),
358 TagEnd::DefinitionList
360 | TagEnd::DefinitionListDefinition
361 | TagEnd::DefinitionListTitle
362 | TagEnd::FootnoteDefinition
363 | TagEnd::HtmlBlock
364 | TagEnd::MetadataBlock(_)
365 | TagEnd::Subscript
366 | TagEnd::Superscript => {}
367 }
368 }
369
370 fn handle_item_start(&mut self) {
371 let depth = self.list_stack.len().saturating_sub(1);
372 let level = u32::try_from(depth).map_or(u32::MAX, |v| v);
373 if let Some(ctx) = self.list_stack.last_mut() {
374 if ctx.ordered {
375 self.queue.push_back(Event::StartOrderedListItem {
376 start: ctx.pending_start.take(),
377 style_type: ListStyleType::Decimal,
378 level,
379 id: None,
380 });
381 } else {
382 self.queue.push_back(Event::StartUnorderedListItem {
383 style_type: ListStyleType::Disc,
384 level,
385 id: None,
386 });
387 }
388 ctx.item_open = true;
389 self.block_state = BlockState::Explicit;
390 }
391 }
392
393 fn handle_list_start(&mut self, start_opt: Option<u64>) {
394 self.list_stack.push(ListContext {
395 item_open: false,
396 ordered: start_opt.is_some(),
397 pending_start: start_opt,
398 });
399 }
400
401 fn handle_start_code_block(&mut self, kind: CodeBlockKind<'a>) {
404 let syntax = match kind {
405 CodeBlockKind::Fenced(lang) if !lang.is_empty() => Some(lang.into_string()),
406 CodeBlockKind::Fenced(_) | CodeBlockKind::Indented => None,
407 };
408 self.code_block_buffer = Some(String::new());
409 self.push_event_start(Event::StartPreformatted { id: None, syntax });
410 }
411
412 fn handle_start_heading(&mut self, level: HeadingLevel) {
414 let level_u8 = match level {
415 HeadingLevel::H1 => 1,
416 HeadingLevel::H2 => 2,
417 HeadingLevel::H3 => 3,
418 HeadingLevel::H4 => 4,
419 HeadingLevel::H5 => 5,
420 HeadingLevel::H6 => 6,
421 };
422 self.push_event_start(Event::StartHeading {
423 level: level_u8,
424 id: None,
425 });
426 }
427
428 fn handle_start_image(&mut self, dest_url: CowStr<'a>, title: CowStr<'a>) {
432 self.flush_pending_paragraph_start();
441 if let Some(link) = self.link.take() {
442 if link.started {
443 self.queue.push_back(Event::EndLink);
444 } else {
445 self.queue.push_back(Event::StartLink {
446 href: link.href,
447 id: None,
448 title: link.title,
449 });
450 self.queue.push_back(Event::EndLink);
451 }
452 }
453
454 self.image = Some(ImageBuffer {
455 alt_buf: String::new(),
456 title: if title.is_empty() {
457 None
458 } else {
459 Some(title.into_string())
460 },
461 url: dest_url.into_string(),
462 });
463 }
464
465 fn handle_start_link(&mut self, dest_url: CowStr<'a>, title: CowStr<'a>) {
470 self.link = Some(LinkBuffer {
471 href: dest_url.into_string(),
472 started: false,
473 title: if title.is_empty() {
474 None
475 } else {
476 Some(title.into_string())
477 },
478 });
479 }
480
481 fn handle_start_table_cell(&mut self) {
484 if self.in_table_head {
485 self.push_event_start(Event::StartTableHeader {
486 scope: Some(TableHeaderScope::Column),
487 abbr: None,
488 colspan: None,
489 rowspan: None,
490 id: None,
491 });
492 } else {
493 self.push_event_start(Event::StartTableCell {
494 colspan: None,
495 rowspan: None,
496 id: None,
497 });
498 }
499 }
500
501 fn handle_start_table_head(&mut self) {
503 self.in_table_head = true;
504 self.push_event_start(Event::StartTableRow { id: None });
505 }
506
507 fn handle_start_tag(&mut self, tag: Tag<'a>) {
513 match tag {
514 Tag::BlockQuote(_) => self.push_event_start(Event::StartBlockQuote { id: None }),
515 Tag::CodeBlock(kind) => self.handle_start_code_block(kind),
516 Tag::Emphasis => self.italic_depth.inc(),
517 Tag::Heading { level, .. } => self.handle_start_heading(level),
518 Tag::Image {
519 dest_url, title, ..
520 } => self.handle_start_image(dest_url, title),
521 Tag::Item => self.handle_item_start(),
522 Tag::Link {
523 dest_url, title, ..
524 } => self.handle_start_link(dest_url, title),
525 Tag::List(start_opt) => self.handle_list_start(start_opt),
526 Tag::Paragraph => self.block_state = BlockState::PendingExplicit,
527 Tag::Strikethrough => self.strikethrough_depth.inc(),
528 Tag::Strong => self.bold_depth.inc(),
529 Tag::Table(_) => self.push_event_start(Event::StartTable { id: None }),
530 Tag::TableCell => self.handle_start_table_cell(),
531 Tag::TableHead => self.handle_start_table_head(),
532 Tag::TableRow => self.push_event_start(Event::StartTableRow { id: None }),
533 Tag::DefinitionList
535 | Tag::DefinitionListDefinition
536 | Tag::DefinitionListTitle
537 | Tag::FootnoteDefinition(_)
538 | Tag::HtmlBlock
539 | Tag::MetadataBlock(_)
540 | Tag::Subscript
541 | Tag::Superscript => {}
542 }
543 }
544
545 fn handle_text(&mut self, content: String) {
546 if let Some(img) = &mut self.image {
547 img.alt_buf.push_str(&content);
548 } else if let Some(buf) = &mut self.code_block_buffer {
549 buf.push_str(&content);
550 } else {
551 self.emit_pending_link_start();
552 if self.block_state == BlockState::None {
553 self.queue.push_back(Event::StartParagraph {
554 alignment: None,
555 id: None,
556 });
557 self.block_state = BlockState::AutoParagraph;
558 }
559 self.queue.push_back(Event::Text {
560 content,
561 style: self.current_text_style(),
562 });
563 }
564 }
565
566 #[inline]
579 #[must_use]
580 pub fn new(markdown: &'a str) -> Self {
581 let options = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
582 let parser = Parser::new_ext(markdown, options);
583 Self {
584 block_state: BlockState::None,
585 bold_depth: Depth::default(),
586 code_block_buffer: None,
587 image: None,
588 in_table_head: false,
589 italic_depth: Depth::default(),
590 link: None,
591 list_stack: Vec::new(),
592 parser,
593 phase: Phase::NotStarted,
594 queue: VecDeque::new(),
595 strikethrough_depth: Depth::default(),
596 }
597 }
598
599 fn process_next_pulldown_event(&mut self) {
600 let Some(pm_event) = self.parser.next() else {
601 if self.phase != Phase::Finished {
602 self.phase = Phase::Finished;
603 self.queue.push_back(Event::EndDocument);
604 }
605 return;
606 };
607
608 match pm_event {
609 pulldown_cmark::Event::Start(tag) => self.handle_start_tag(tag),
610 pulldown_cmark::Event::End(tag_end) => self.handle_end_tag(tag_end),
611 pulldown_cmark::Event::Text(text) => self.handle_text(text.into_string()),
612 pulldown_cmark::Event::Code(code) => self.handle_code(code.into_string()),
613 pulldown_cmark::Event::HardBreak => {
614 if let Some(img) = &mut self.image {
615 img.alt_buf.push(' ');
616 } else if self.block_state == BlockState::PendingExplicit {
617 } else {
619 self.emit_pending_link_start();
620 self.queue.push_back(Event::LineBreak);
621 }
622 }
623 pulldown_cmark::Event::SoftBreak => {
624 if let Some(img) = &mut self.image {
625 img.alt_buf.push(' ');
626 } else if self.block_state == BlockState::PendingExplicit {
627 } else {
629 self.emit_pending_link_start();
630 self.queue.push_back(Event::SoftBreak);
631 }
632 }
633 pulldown_cmark::Event::Rule => {
634 self.queue.push_back(Event::ThematicBreak { id: None });
635 }
636 pulldown_cmark::Event::DisplayMath(_)
637 | pulldown_cmark::Event::FootnoteReference(_)
638 | pulldown_cmark::Event::Html(_)
639 | pulldown_cmark::Event::InlineHtml(_)
640 | pulldown_cmark::Event::InlineMath(_)
641 | pulldown_cmark::Event::TaskListMarker(_) => {}
642 }
643 }
644
645 fn push_event(&mut self, event: Event, state: BlockState) {
646 self.queue.push_back(event);
647 self.block_state = state;
648 }
649
650 fn push_event_end(&mut self, event: Event) {
651 self.push_event(event, BlockState::None);
652 }
653
654 fn push_event_start(&mut self, event: Event) {
655 self.push_event(event, BlockState::Explicit);
656 }
657}
658
659impl EventSource for MarkdownReader<'_> {
660 #[inline]
661 fn next_event(&mut self) -> Result<Option<Event>> {
662 if self.phase == Phase::NotStarted {
663 self.phase = Phase::Running;
664 return Ok(Some(Event::StartDocument {
665 id: None,
666 language: None,
667 metadata: None,
668 }));
669 }
670
671 if self.phase == Phase::Finished && self.queue.is_empty() {
672 return Ok(None);
673 }
674
675 while self.queue.is_empty() && self.phase != Phase::Finished {
676 self.process_next_pulldown_event();
677 }
678
679 Ok(self.queue.pop_front())
680 }
681}
682
683#[cfg(test)]
684mod tests {
685 use super::*;
686
687 #[test]
688 fn handle_code_without_open_block_auto_opens_paragraph() {
689 let mut reader = MarkdownReader::new("");
690 reader.handle_code("code".to_string());
691
692 assert_eq!(reader.queue.len(), 2);
693 assert_eq!(
694 reader.queue.front(),
695 Some(&Event::StartParagraph {
696 alignment: None,
697 id: None,
698 })
699 );
700 assert_eq!(
701 reader.queue.get(1),
702 Some(&Event::Text {
703 content: "code".to_string(),
704 style: TextStyle::default().code(),
705 })
706 );
707 }
708
709 #[test]
710 fn handle_text_without_open_block_auto_opens_paragraph() {
711 let mut reader = MarkdownReader::new("");
712 reader.handle_text("hello".to_string());
713
714 assert_eq!(reader.queue.len(), 2);
715 assert_eq!(
716 reader.queue.front(),
717 Some(&Event::StartParagraph {
718 alignment: None,
719 id: None,
720 })
721 );
722 assert_eq!(
723 reader.queue.get(1),
724 Some(&Event::Text {
725 content: "hello".to_string(),
726 style: TextStyle::default(),
727 })
728 );
729 }
730}