Skip to main content

docspec_markdown_reader/
lib.rs

1//! Markdown to `DocSpec` event stream reader.
2//!
3//! This crate provides a [`MarkdownReader`] that implements [`EventSource`] to convert
4//! Markdown documents into the `DocSpec` event stream format. It uses `pulldown-cmark`
5//! to parse CommonMark-compliant Markdown and emits typed events representing document
6//! structure.
7//!
8//! # Quick Start
9//!
10//! ```
11//! use docspec_markdown_reader::{MarkdownReader, EventSource};
12//!
13//! let markdown = "# Hello\n\nWorld";
14//! let mut reader = MarkdownReader::from_str(markdown);
15//!
16//! while let Some(event) = reader.next_event()? {
17//!     println!("{event:?}");
18//! }
19//! # Ok::<(), docspec_core::Error>(())
20//! ```
21//!
22//! # Supported Elements
23//!
24//! - Headings (h1–h6) → `StartHeading` / `EndHeading`
25//! - Paragraphs → `StartParagraph` / `EndParagraph`
26//! - Block quotes → `StartBlockQuote` / `EndBlockQuote`
27//! - Code blocks → `StartPreformatted` / `EndPreformatted`
28//! - Bold text → `Text { style: TextStyle { bold: true, .. }, .. }`
29//! - Italic text → `Text { style: TextStyle { italic: true, .. }, .. }`
30//! - Inline code → `Text { style: TextStyle { code: true, .. }, .. }`
31//! - Strikethrough → `Text { style: TextStyle { strikethrough: true, .. }, .. }`
32//! - Images → `Image { source: Uri, alt, title, decorative }`
33//! - Hard line breaks → `LineBreak`
34//! - Soft line breaks → `SoftBreak`
35//! - Thematic breaks → `ThematicBreak`
36//! - Tables → `StartTable` / `EndTable`, `StartTableRow` / `EndTableRow`,
37//!   `StartTableHeader` / `EndTableHeader`, `StartTableCell` / `EndTableCell`
38//!   (GFM column alignment syntax is parsed, but alignment data is discarded)
39//! - Bullet lists → `StartUnorderedListItem` / `EndUnorderedListItem`
40//! - Numbered lists → `StartOrderedListItem` / `EndOrderedListItem`
41//!   (`start: Option<u64>` is `Some(n)` on the first item of each list, `None` on subsequent items;
42//!   child items may nest inside their parent's `Start*`/`End*` pair with `level` indicating
43//!   indent depth; task list markers (`- [ ]`/`- [x]`) are parsed as literal text)
44//! - Links → `StartLink { href, title }` / `EndLink` (inline, reference, collapsed,
45//!   shortcut, autolink, and email autolink variants — all resolved to inline form
46//!   by pulldown-cmark; image-inside-link closes the link before emitting the image
47//!   as a sibling block: content preceding the image stays inside the link, content
48//!   following the image is outside the link, and the link is empty only when the
49//!   image is the sole link label, e.g. `[![alt](img)](url)`)
50//!
51//! # Unsupported Elements
52//!
53//! The following elements are not emitted as structured events. Text content is
54//! recursively extracted where applicable; structure is silently dropped:
55//! - Definition lists and footnotes
56//! - HTML blocks and inline HTML
57//! - Math blocks and inline math
58//! - Subscript and superscript formatting
59//!
60//! # Memory Model
61//!
62//! `MarkdownReader` owns its source text for the parser's lifetime. While events
63//! are emitted one at a time via [`EventSource::next_event`] (the stream-event
64//! guarantee is preserved), the source `String` is held in memory until the reader
65//! is dropped. This is a constraint of `pulldown-cmark`, which is permanently
66//! borrow-based by design (see [pulldown-cmark issue #463]).
67//!
68//! For contrast, `HtmlReader` (from `docspec-html-reader`) streams its source via a
69//! 16 KB sliding-window buffer and does not hold the full document in memory.
70//!
71//! [pulldown-cmark issue #463]: https://github.com/raphlinus/pulldown-cmark/issues/463
72
73extern crate alloc;
74
75#[cfg_attr(all(), allow(clippy::mem_forget))]
76mod parser_cell {
77    use self_cell::self_cell;
78
79    use super::MarkdownParser;
80
81    self_cell!(
82        pub(super) struct ParserCell {
83            owner: String,
84            #[covariant]
85            dependent: MarkdownParser,
86        }
87    );
88}
89
90use alloc::collections::VecDeque;
91use std::io::{Read, Seek};
92
93pub use docspec_core::EventSource;
94use docspec_core::{Depth, Event, ImageSource, ListStyleType, Result, TableHeaderScope, TextStyle};
95use parser_cell::ParserCell;
96use pulldown_cmark::{CodeBlockKind, CowStr, HeadingLevel, Options, Parser, Tag, TagEnd};
97
98struct MarkdownParser<'a>(Parser<'a>);
99
100/// Whether content is inside a block-level element.
101#[derive(Clone, Copy, PartialEq, Eq)]
102enum BlockState {
103    /// Inside an auto-opened paragraph (text arrived outside any explicit block).
104    AutoParagraph,
105    /// Inside an explicit block (from a `StartParagraph` or `StartHeading` tag).
106    Explicit,
107    /// Not inside any block context.
108    None,
109    /// Explicit block whose `StartParagraph` is deferred until the first real event.
110    PendingExplicit,
111}
112
113/// Document processing phase.
114#[derive(Clone, Copy, PartialEq, Eq)]
115enum Phase {
116    /// `EndDocument` has been emitted.
117    Finished,
118    /// `StartDocument` not yet emitted.
119    NotStarted,
120    /// Processing events between `StartDocument` and `EndDocument`.
121    Running,
122}
123
124/// Context for a single list level tracked by [`MarkdownReader`].
125struct ListContext {
126    /// Whether the item at this list level is currently open (start emitted, end not yet emitted).
127    item_open: bool,
128    /// Whether this list is ordered (numbered) rather than unordered (bulleted).
129    ordered: bool,
130    /// Start number to attach to the next item emitted; `Some(n)` only before the first
131    /// item is emitted, then `None` for all subsequent items in the same list.
132    pending_start: Option<u64>,
133}
134
135/// Buffered image state during image alt text collection.
136struct ImageBuffer {
137    /// Accumulated alt text.
138    alt_buf: String,
139    /// Image title if provided.
140    title: Option<String>,
141    /// Image URL.
142    url: String,
143}
144
145enum MarkdownPulldownEvent {
146    Code(String),
147    End(TagEnd),
148    HardBreak,
149    Ignored,
150    Rule,
151    SoftBreak,
152    Start(MarkdownStartTag),
153    Text(String),
154}
155
156enum MarkdownStartTag {
157    BlockQuote,
158    CodeBlock {
159        syntax: Option<String>,
160    },
161    Emphasis,
162    Heading {
163        level: HeadingLevel,
164    },
165    Image {
166        dest_url: String,
167        title: Option<String>,
168    },
169    Item,
170    Link {
171        dest_url: String,
172        title: Option<String>,
173    },
174    List(Option<u64>),
175    Paragraph,
176    Strikethrough,
177    Strong,
178    Table,
179    TableCell,
180    TableHead,
181    TableRow,
182}
183
184/// Buffered link state during link inline content collection.
185struct LinkBuffer {
186    /// Link target URL.
187    href: String,
188    /// Whether `StartLink` has been emitted yet (deferred until first inline event arrives).
189    started: bool,
190    /// Optional link title (from `CommonMark` `[text](url "title")` syntax).
191    title: Option<String>,
192}
193
194/// A streaming Markdown reader that implements [`EventSource`].
195///
196/// `MarkdownReader` parses Markdown using `pulldown-cmark` and emits `DocSpec` events
197/// one at a time. It handles the mapping from `pulldown-cmark`'s event model to `DocSpec`'s
198/// event model, including tracking inline formatting state.
199///
200/// # Example
201///
202/// ```
203/// use docspec_markdown_reader::{MarkdownReader, EventSource};
204///
205/// let mut reader = MarkdownReader::from_str("**bold** and *italic*");
206/// while let Some(event) = reader.next_event()? {
207///     // Process events...
208/// }
209/// # Ok::<(), docspec_core::Error>(())
210/// ```
211pub struct MarkdownReader {
212    /// Current block-level context.
213    block_state: BlockState,
214    /// Nesting depth for bold (strong) formatting.
215    bold_depth: Depth,
216    /// Owned source text and parser borrowing from it.
217    cell: ParserCell,
218    /// Buffered code block text (accumulated until `EndCodeBlock` to strip trailing newline).
219    code_block_buffer: Option<String>,
220    /// Buffered image being processed (alt text accumulation).
221    image: Option<ImageBuffer>,
222    /// Whether the parser is currently inside a table header row.
223    in_table_head: bool,
224    /// Nesting depth for italic (emphasis) formatting.
225    italic_depth: Depth,
226    /// Buffered link being processed (deferred Start emission for image-in-link extraction).
227    link: Option<LinkBuffer>,
228    /// LIFO stack of list contexts. `len()` gives the current nesting depth;
229    /// `level = list_stack.len().saturating_sub(1)` at item-emit time.
230    list_stack: alloc::vec::Vec<ListContext>,
231    /// Document processing phase.
232    phase: Phase,
233    /// Queue of `DocSpec` events to emit.
234    queue: VecDeque<Event>,
235    /// Nesting depth for strikethrough formatting.
236    strikethrough_depth: Depth,
237}
238
239impl MarkdownReader {
240    fn close_current_item_if_open(&mut self) {
241        if let Some(ctx) = self.list_stack.last_mut() {
242            if ctx.item_open {
243                if ctx.ordered {
244                    self.queue.push_back(Event::EndOrderedListItem);
245                } else {
246                    self.queue.push_back(Event::EndUnorderedListItem);
247                }
248                ctx.item_open = false;
249                self.block_state = BlockState::None;
250            }
251        }
252    }
253
254    fn current_text_style(&self) -> TextStyle {
255        let mut style = TextStyle::default();
256        if self.bold_depth.is_positive() {
257            style = style.bold();
258        }
259        if self.italic_depth.is_positive() {
260            style = style.italic();
261        }
262        if self.strikethrough_depth.is_positive() {
263            style = style.strikethrough();
264        }
265        style
266    }
267
268    /// Emits `StartLink` for the buffered link if it hasn't been emitted yet.
269    /// Called before any inline event that would belong inside a link.
270    fn emit_pending_link_start(&mut self) {
271        self.flush_pending_paragraph_start();
272        if let Some(link) = self.link.as_mut() {
273            if !link.started {
274                self.queue.push_back(Event::StartLink {
275                    href: link.href.clone(),
276                    id: None,
277                    title: link.title.clone(),
278                });
279                link.started = true;
280            }
281        }
282    }
283
284    /// Emits `StartParagraph` for the deferred paragraph if it hasn't been emitted yet.
285    /// Called before any committing event that would belong inside a paragraph.
286    fn flush_pending_paragraph_start(&mut self) {
287        if self.block_state == BlockState::PendingExplicit {
288            self.queue.push_back(Event::StartParagraph {
289                alignment: None,
290                id: None,
291            });
292            self.block_state = BlockState::Explicit;
293        }
294    }
295
296    fn from_owned_string(source: String) -> Self {
297        let options = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
298        let cell = ParserCell::new(source, |s| MarkdownParser(Parser::new_ext(s, options)));
299        Self {
300            block_state: BlockState::None,
301            bold_depth: Depth::default(),
302            cell,
303            code_block_buffer: None,
304            image: None,
305            in_table_head: false,
306            italic_depth: Depth::default(),
307            link: None,
308            list_stack: Vec::new(),
309            phase: Phase::NotStarted,
310            queue: VecDeque::new(),
311            strikethrough_depth: Depth::default(),
312        }
313    }
314
315    /// Creates a `MarkdownReader` from any `Read + Seek` source.
316    ///
317    /// Reads the entire source into memory (required by `pulldown_cmark`'s
318    /// borrow-based parser).
319    ///
320    /// # Errors
321    ///
322    /// Returns [`Error::Io`](docspec_core::Error::Io) if reading fails.
323    #[inline]
324    pub fn from_reader<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self> {
325        let mut source = String::new();
326        reader.read_to_string(&mut source)?;
327        Ok(Self::from_owned_string(source))
328    }
329
330    /// Creates a `MarkdownReader` from a string slice.
331    ///
332    /// The input is copied into an owned `String` for the parser's lifetime.
333    ///
334    /// # Example
335    ///
336    /// ```
337    /// use docspec_markdown_reader::MarkdownReader;
338    ///
339    /// let reader = MarkdownReader::from_str("# Hello World");
340    /// ```
341    #[inline]
342    #[must_use]
343    #[expect(
344        clippy::should_implement_trait,
345        reason = "constructor name is required for reader API consistency"
346    )]
347    pub fn from_str(input: &str) -> Self {
348        Self::from_owned_string(input.to_owned())
349    }
350
351    fn handle_code(&mut self, content: String) {
352        if let Some(img) = &mut self.image {
353            img.alt_buf.push_str(&content);
354        } else {
355            self.emit_pending_link_start();
356            if self.block_state == BlockState::None {
357                self.queue.push_back(Event::StartParagraph {
358                    alignment: None,
359                    id: None,
360                });
361                self.block_state = BlockState::AutoParagraph;
362            }
363            self.queue.push_back(Event::Text {
364                content,
365                style: self.current_text_style().code(),
366            });
367        }
368    }
369
370    /// Emits the buffered code block content (stripping the parser-added trailing newline)
371    /// followed by `EndPreformatted`. Skips the text event if the buffer is empty.
372    fn handle_end_code_block(&mut self) {
373        if let Some(buf) = self.code_block_buffer.take() {
374            let content = buf.strip_suffix('\n').unwrap_or(&buf).to_owned();
375            if !content.is_empty() {
376                self.queue.push_back(Event::Text {
377                    content,
378                    style: TextStyle::default(),
379                });
380            }
381        }
382        self.push_event_end(Event::EndPreformatted);
383    }
384
385    /// Emits an `Image` event from the accumulated image buffer, deriving
386    /// `decorative = true` when the trimmed alt text is empty. Consumes the
387    /// in-progress image state; does nothing if no image is in progress.
388    fn handle_end_image(&mut self) {
389        let Some(img) = self.image.take() else { return };
390        self.flush_pending_paragraph_start();
391        let trimmed = img.alt_buf.trim();
392        let alt = if trimmed.is_empty() {
393            None
394        } else {
395            Some(trimmed.to_owned())
396        };
397        let decorative = alt.is_none();
398        self.queue.push_back(Event::Image {
399            source: ImageSource::Uri { uri: img.url },
400            alt,
401            title: img.title,
402            decorative,
403            id: None,
404        });
405    }
406
407    /// Closes an auto-opened paragraph if one is open, then closes the current
408    /// list item and resets block state.
409    fn handle_end_item(&mut self) {
410        if self.block_state == BlockState::AutoParagraph {
411            self.queue.push_back(Event::EndParagraph);
412        }
413        self.close_current_item_if_open();
414        self.block_state = BlockState::None;
415    }
416
417    /// Emits `EndLink` (and `StartLink` if not yet emitted) for the buffered link.
418    fn handle_end_link(&mut self) {
419        let Some(link) = self.link.take() else { return };
420        if link.started {
421            self.queue.push_back(Event::EndLink);
422        } else {
423            self.flush_pending_paragraph_start();
424            self.queue.push_back(Event::StartLink {
425                href: link.href,
426                id: None,
427                title: link.title,
428            });
429            self.queue.push_back(Event::EndLink);
430        }
431    }
432
433    /// Closes the current list item if open, pops the list context, and resets block state.
434    fn handle_end_list(&mut self) {
435        self.close_current_item_if_open();
436        self.list_stack.pop();
437        self.block_state = BlockState::None;
438    }
439
440    /// Emits `EndTableCell` or `EndTableHeader` depending on whether the parser
441    /// is currently inside a table header row.
442    fn handle_end_table_cell(&mut self) {
443        if self.in_table_head {
444            self.push_event_end(Event::EndTableHeader);
445        } else {
446            self.push_event_end(Event::EndTableCell);
447        }
448    }
449
450    /// Emits `EndTableRow` and clears the table-head flag for a table head closing tag.
451    fn handle_end_table_head(&mut self) {
452        self.push_event_end(Event::EndTableRow);
453        self.in_table_head = false;
454    }
455
456    /// Dispatches a `pulldown-cmark` end tag to the appropriate per-tag handler.
457    ///
458    /// Tags in the explicit ignore list below are known-unsupported elements whose
459    /// structure is intentionally dropped (text content may still be extracted by
460    /// other event handlers).
461    fn handle_end_tag(&mut self, tag_end: TagEnd) {
462        match tag_end {
463            TagEnd::BlockQuote(_) => self.push_event_end(Event::EndBlockQuote),
464            TagEnd::CodeBlock => self.handle_end_code_block(),
465            TagEnd::Emphasis => self.italic_depth.dec(),
466            TagEnd::Heading(_) => self.push_event_end(Event::EndHeading),
467            TagEnd::Image => self.handle_end_image(),
468            TagEnd::Item => self.handle_end_item(),
469            TagEnd::Link => self.handle_end_link(),
470            TagEnd::List(_) => self.handle_end_list(),
471            TagEnd::Paragraph => {
472                if self.block_state == BlockState::PendingExplicit {
473                    self.block_state = BlockState::None;
474                } else {
475                    self.push_event_end(Event::EndParagraph);
476                }
477            }
478            TagEnd::Strikethrough => self.strikethrough_depth.dec(),
479            TagEnd::Strong => self.bold_depth.dec(),
480            TagEnd::Table => self.push_event_end(Event::EndTable),
481            TagEnd::TableCell => self.handle_end_table_cell(),
482            TagEnd::TableHead => self.handle_end_table_head(),
483            TagEnd::TableRow => self.push_event_end(Event::EndTableRow),
484            // Tags intentionally ignored (structure dropped, text extracted elsewhere):
485            TagEnd::DefinitionList
486            | TagEnd::DefinitionListDefinition
487            | TagEnd::DefinitionListTitle
488            | TagEnd::FootnoteDefinition
489            | TagEnd::HtmlBlock
490            | TagEnd::MetadataBlock(_)
491            | TagEnd::Subscript
492            | TagEnd::Superscript => {}
493        }
494    }
495
496    fn handle_item_start(&mut self) {
497        let depth = self.list_stack.len().saturating_sub(1);
498        let level = u32::try_from(depth).map_or(u32::MAX, |v| v);
499        if let Some(ctx) = self.list_stack.last_mut() {
500            if ctx.ordered {
501                self.queue.push_back(Event::StartOrderedListItem {
502                    start: ctx.pending_start.take(),
503                    style_type: ListStyleType::Decimal,
504                    level,
505                    id: None,
506                });
507            } else {
508                self.queue.push_back(Event::StartUnorderedListItem {
509                    style_type: ListStyleType::Disc,
510                    level,
511                    id: None,
512                });
513            }
514            ctx.item_open = true;
515            self.block_state = BlockState::Explicit;
516        }
517    }
518
519    fn handle_list_start(&mut self, start_opt: Option<u64>) {
520        self.list_stack.push(ListContext {
521            item_open: false,
522            ordered: start_opt.is_some(),
523            pending_start: start_opt,
524        });
525    }
526
527    /// Emits `StartPreformatted` for a code block opening tag, initialising
528    /// the internal code-block buffer for content accumulation.
529    fn handle_start_code_block(&mut self, syntax: Option<String>) {
530        self.code_block_buffer = Some(String::new());
531        self.push_event_start(Event::StartPreformatted { id: None, syntax });
532    }
533
534    /// Emits `StartHeading` after mapping a `pulldown-cmark` `HeadingLevel` to a `u8` level.
535    fn handle_start_heading(&mut self, level: HeadingLevel) {
536        let level_u8 = match level {
537            HeadingLevel::H1 => 1,
538            HeadingLevel::H2 => 2,
539            HeadingLevel::H3 => 3,
540            HeadingLevel::H4 => 4,
541            HeadingLevel::H5 => 5,
542            HeadingLevel::H6 => 6,
543        };
544        self.push_event_start(Event::StartHeading {
545            level: level_u8,
546            id: None,
547        });
548    }
549
550    /// Initialises image state for alt-text accumulation when an image opening tag is
551    /// encountered. The title is stored as `None` when the pulldown-cmark title string
552    /// is empty.
553    fn handle_start_image(&mut self, dest_url: String, title: Option<String>) {
554        // Image-in-link extraction: close the link before processing the image so the
555        // image can be emitted as a sibling block (BlockNote and similar schemas do not
556        // allow block-level images inside inline links). When `link.started` is true, the
557        // link already contains preceding inline content — emit only `EndLink`. When it
558        // is false (image is the sole link label, e.g. `[![alt](img)](url)`), emit an
559        // empty `StartLink`/`EndLink` pair so the URL is preserved. `TagEnd::Image` fires
560        // `Event::Image` before `TagEnd::Paragraph`, so downstream writers close the
561        // surrounding paragraph before serialising the image as a sibling block.
562        self.flush_pending_paragraph_start();
563        if let Some(link) = self.link.take() {
564            if link.started {
565                self.queue.push_back(Event::EndLink);
566            } else {
567                self.queue.push_back(Event::StartLink {
568                    href: link.href,
569                    id: None,
570                    title: link.title,
571                });
572                self.queue.push_back(Event::EndLink);
573            }
574        }
575
576        self.image = Some(ImageBuffer {
577            alt_buf: String::new(),
578            title,
579            url: dest_url,
580        });
581    }
582
583    /// Stores link state for deferred `StartLink` emission.
584    ///
585    /// Emission is deferred until the first inline event arrives (lazy emission).
586    /// This allows image-in-link to be detected before any `StartLink` is emitted.
587    fn handle_start_link(&mut self, dest_url: String, title: Option<String>) {
588        self.link = Some(LinkBuffer {
589            href: dest_url,
590            started: false,
591            title,
592        });
593    }
594
595    /// Emits `StartTableHeader` or `StartTableCell` depending on whether the parser
596    /// is currently inside a table header row.
597    fn handle_start_table_cell(&mut self) {
598        if self.in_table_head {
599            self.push_event_start(Event::StartTableHeader {
600                scope: Some(TableHeaderScope::Column),
601                abbr: None,
602                colspan: None,
603                rowspan: None,
604                id: None,
605            });
606        } else {
607            self.push_event_start(Event::StartTableCell {
608                colspan: None,
609                rowspan: None,
610                id: None,
611            });
612        }
613    }
614
615    /// Sets the table-head flag and emits `StartTableRow` for a table head opening tag.
616    fn handle_start_table_head(&mut self) {
617        self.in_table_head = true;
618        self.push_event_start(Event::StartTableRow { id: None });
619    }
620
621    /// Dispatches a `pulldown-cmark` start tag to the appropriate per-tag handler.
622    ///
623    /// Tags in the explicit ignore list below are known-unsupported elements whose
624    /// structure is intentionally dropped (text content may still be extracted by
625    /// other event handlers).
626    fn handle_start_tag(&mut self, tag: MarkdownStartTag) {
627        match tag {
628            MarkdownStartTag::BlockQuote => {
629                self.push_event_start(Event::StartBlockQuote { id: None });
630            }
631            MarkdownStartTag::CodeBlock { syntax } => self.handle_start_code_block(syntax),
632            MarkdownStartTag::Emphasis => self.italic_depth.inc(),
633            MarkdownStartTag::Heading { level } => self.handle_start_heading(level),
634            MarkdownStartTag::Image { dest_url, title } => self.handle_start_image(dest_url, title),
635            MarkdownStartTag::Item => self.handle_item_start(),
636            MarkdownStartTag::Link { dest_url, title } => self.handle_start_link(dest_url, title),
637            MarkdownStartTag::List(start_opt) => self.handle_list_start(start_opt),
638            MarkdownStartTag::Paragraph => self.block_state = BlockState::PendingExplicit,
639            MarkdownStartTag::Strikethrough => self.strikethrough_depth.inc(),
640            MarkdownStartTag::Strong => self.bold_depth.inc(),
641            MarkdownStartTag::Table => self.push_event_start(Event::StartTable { id: None }),
642            MarkdownStartTag::TableCell => self.handle_start_table_cell(),
643            MarkdownStartTag::TableHead => self.handle_start_table_head(),
644            MarkdownStartTag::TableRow => self.push_event_start(Event::StartTableRow { id: None }),
645        }
646    }
647
648    fn handle_text(&mut self, content: String) {
649        if let Some(img) = &mut self.image {
650            img.alt_buf.push_str(&content);
651        } else if let Some(buf) = &mut self.code_block_buffer {
652            buf.push_str(&content);
653        } else {
654            self.emit_pending_link_start();
655            if self.block_state == BlockState::None {
656                self.queue.push_back(Event::StartParagraph {
657                    alignment: None,
658                    id: None,
659                });
660                self.block_state = BlockState::AutoParagraph;
661            }
662            self.queue.push_back(Event::Text {
663                content,
664                style: self.current_text_style(),
665            });
666        }
667    }
668
669    fn next_pulldown_event(&mut self) -> Option<MarkdownPulldownEvent> {
670        self.cell.with_dependent_mut(|_, dep| {
671            dep.0.next().map(|event| match event {
672                pulldown_cmark::Event::Start(tag) => markdown_start_tag(tag)
673                    .map_or(MarkdownPulldownEvent::Ignored, MarkdownPulldownEvent::Start),
674                pulldown_cmark::Event::End(tag_end) => MarkdownPulldownEvent::End(tag_end),
675                pulldown_cmark::Event::Text(text) => {
676                    MarkdownPulldownEvent::Text(text.into_string())
677                }
678                pulldown_cmark::Event::Code(code) => {
679                    MarkdownPulldownEvent::Code(code.into_string())
680                }
681                pulldown_cmark::Event::HardBreak => MarkdownPulldownEvent::HardBreak,
682                pulldown_cmark::Event::SoftBreak => MarkdownPulldownEvent::SoftBreak,
683                pulldown_cmark::Event::Rule => MarkdownPulldownEvent::Rule,
684                pulldown_cmark::Event::DisplayMath(_)
685                | pulldown_cmark::Event::FootnoteReference(_)
686                | pulldown_cmark::Event::Html(_)
687                | pulldown_cmark::Event::InlineHtml(_)
688                | pulldown_cmark::Event::InlineMath(_)
689                | pulldown_cmark::Event::TaskListMarker(_) => MarkdownPulldownEvent::Ignored,
690            })
691        })
692    }
693
694    fn process_next_pulldown_event(&mut self) {
695        let Some(pm_event) = self.next_pulldown_event() else {
696            if self.phase != Phase::Finished {
697                self.phase = Phase::Finished;
698                self.queue.push_back(Event::EndDocument);
699            }
700            return;
701        };
702
703        match pm_event {
704            MarkdownPulldownEvent::Start(tag) => self.handle_start_tag(tag),
705            MarkdownPulldownEvent::End(tag_end) => self.handle_end_tag(tag_end),
706            MarkdownPulldownEvent::Text(text) => self.handle_text(text),
707            MarkdownPulldownEvent::Code(code) => self.handle_code(code),
708            MarkdownPulldownEvent::HardBreak => {
709                if let Some(img) = &mut self.image {
710                    img.alt_buf.push(' ');
711                } else if self.block_state == BlockState::PendingExplicit {
712                    // emitting a break before StartParagraph would be malformed — discard
713                } else {
714                    self.emit_pending_link_start();
715                    self.queue.push_back(Event::LineBreak);
716                }
717            }
718            MarkdownPulldownEvent::SoftBreak => {
719                if let Some(img) = &mut self.image {
720                    img.alt_buf.push(' ');
721                } else if self.block_state == BlockState::PendingExplicit {
722                    // emitting a break before StartParagraph would be malformed — discard
723                } else {
724                    self.emit_pending_link_start();
725                    self.queue.push_back(Event::SoftBreak);
726                }
727            }
728            MarkdownPulldownEvent::Rule => {
729                self.queue.push_back(Event::ThematicBreak { id: None });
730            }
731            MarkdownPulldownEvent::Ignored => {}
732        }
733    }
734
735    fn push_event(&mut self, event: Event, state: BlockState) {
736        self.queue.push_back(event);
737        self.block_state = state;
738    }
739
740    fn push_event_end(&mut self, event: Event) {
741        self.push_event(event, BlockState::None);
742    }
743
744    fn push_event_start(&mut self, event: Event) {
745        self.push_event(event, BlockState::Explicit);
746    }
747}
748
749impl EventSource for MarkdownReader {
750    #[inline]
751    fn next_event(&mut self) -> Result<Option<Event>> {
752        if self.phase == Phase::NotStarted {
753            self.phase = Phase::Running;
754            return Ok(Some(Event::StartDocument {
755                id: None,
756                language: None,
757                metadata: None,
758            }));
759        }
760
761        if self.phase == Phase::Finished && self.queue.is_empty() {
762            return Ok(None);
763        }
764
765        while self.queue.is_empty() && self.phase != Phase::Finished {
766            self.process_next_pulldown_event();
767        }
768
769        Ok(self.queue.pop_front())
770    }
771}
772
773fn markdown_start_tag(tag: Tag<'_>) -> Option<MarkdownStartTag> {
774    match tag {
775        Tag::BlockQuote(_) => Some(MarkdownStartTag::BlockQuote),
776        Tag::CodeBlock(kind) => Some(MarkdownStartTag::CodeBlock {
777            syntax: code_block_syntax(kind),
778        }),
779        Tag::Emphasis => Some(MarkdownStartTag::Emphasis),
780        Tag::Heading { level, .. } => Some(MarkdownStartTag::Heading { level }),
781        Tag::Image {
782            dest_url, title, ..
783        } => Some(MarkdownStartTag::Image {
784            dest_url: dest_url.into_string(),
785            title: cow_to_optional_string(title),
786        }),
787        Tag::Item => Some(MarkdownStartTag::Item),
788        Tag::Link {
789            dest_url, title, ..
790        } => Some(MarkdownStartTag::Link {
791            dest_url: dest_url.into_string(),
792            title: cow_to_optional_string(title),
793        }),
794        Tag::List(start_opt) => Some(MarkdownStartTag::List(start_opt)),
795        Tag::Paragraph => Some(MarkdownStartTag::Paragraph),
796        Tag::Strikethrough => Some(MarkdownStartTag::Strikethrough),
797        Tag::Strong => Some(MarkdownStartTag::Strong),
798        Tag::Table(_) => Some(MarkdownStartTag::Table),
799        Tag::TableCell => Some(MarkdownStartTag::TableCell),
800        Tag::TableHead => Some(MarkdownStartTag::TableHead),
801        Tag::TableRow => Some(MarkdownStartTag::TableRow),
802        Tag::DefinitionList
803        | Tag::DefinitionListDefinition
804        | Tag::DefinitionListTitle
805        | Tag::FootnoteDefinition(_)
806        | Tag::HtmlBlock
807        | Tag::MetadataBlock(_)
808        | Tag::Subscript
809        | Tag::Superscript => None,
810    }
811}
812
813fn code_block_syntax(kind: CodeBlockKind<'_>) -> Option<String> {
814    match kind {
815        CodeBlockKind::Fenced(lang) if !lang.is_empty() => Some(lang.into_string()),
816        CodeBlockKind::Fenced(_) | CodeBlockKind::Indented => None,
817    }
818}
819
820fn cow_to_optional_string(value: CowStr<'_>) -> Option<String> {
821    if value.is_empty() {
822        None
823    } else {
824        Some(value.into_string())
825    }
826}
827
828#[cfg(test)]
829mod tests {
830    use super::*;
831
832    #[test]
833    fn handle_code_without_open_block_auto_opens_paragraph() {
834        let mut reader = MarkdownReader::from_str("");
835        reader.handle_code("code".to_string());
836
837        assert_eq!(reader.queue.len(), 2);
838        assert_eq!(
839            reader.queue.front(),
840            Some(&Event::StartParagraph {
841                alignment: None,
842                id: None,
843            })
844        );
845        assert_eq!(
846            reader.queue.get(1),
847            Some(&Event::Text {
848                content: "code".to_string(),
849                style: TextStyle::default().code(),
850            })
851        );
852    }
853
854    #[test]
855    fn handle_text_without_open_block_auto_opens_paragraph() {
856        let mut reader = MarkdownReader::from_str("");
857        reader.handle_text("hello".to_string());
858
859        assert_eq!(reader.queue.len(), 2);
860        assert_eq!(
861            reader.queue.front(),
862            Some(&Event::StartParagraph {
863                alignment: None,
864                id: None,
865            })
866        );
867        assert_eq!(
868            reader.queue.get(1),
869            Some(&Event::Text {
870                content: "hello".to_string(),
871                style: TextStyle::default(),
872            })
873        );
874    }
875}
876
877#[cfg(test)]
878mod send_static_assertions {
879    fn assert_send_static<T: Send + 'static>() {}
880
881    #[test]
882    fn markdown_reader_is_send_static() {
883        assert_send_static::<crate::MarkdownReader>();
884    }
885}