Skip to main content

docspec_markdown_reader/
lib.rs

1//! Markdown to `DocSpec` event stream reader.
2//!
3//! This crate provides a [`MarkdownReader`] that implements [`EventSource`] to convert
4//! Markdown documents into the `DocSpec` event stream format. It uses `pulldown-cmark`
5//! to parse CommonMark-compliant Markdown and emits typed events representing document
6//! structure.
7//!
8//! # Quick Start
9//!
10//! ```
11//! use docspec_markdown_reader::{MarkdownReader, EventSource};
12//!
13//! let markdown = "# Hello\n\nWorld";
14//! let mut reader = MarkdownReader::from_str(markdown);
15//!
16//! while let Some(event) = reader.next_event()? {
17//!     println!("{event:?}");
18//! }
19//! # Ok::<(), docspec_core::Error>(())
20//! ```
21//!
22//! # Supported Elements
23//!
24//! - Headings (h1–h6) → `StartHeading` / `EndHeading`
25//! - Paragraphs → `StartParagraph` / `EndParagraph`
26//! - Block quotes → `StartBlockQuote` / `EndBlockQuote`
27//! - Code blocks → `StartPreformatted` / `EndPreformatted`
28//! - Bold text → `StartTextStyle { kind: Bold }` / `EndTextStyle`
29//! - Italic text → `StartTextStyle { kind: Italic }` / `EndTextStyle`
30//! - Inline code → `StartTextStyle { kind: Code }` / `EndTextStyle`
31//! - Strikethrough → `StartTextStyle { kind: Strikethrough }` / `EndTextStyle`
32//! - Images → `Image { source: Uri, alt, title, decorative }`
33//! - Hard line breaks → `LineBreak`
34//! - Soft line breaks → `SoftBreak`
35//! - Thematic breaks → `ThematicBreak`
36//! - Tables → `StartTable` / `EndTable`, `StartTableRow` / `EndTableRow`,
37//!   `StartTableHeader` / `EndTableHeader`, `StartTableCell` / `EndTableCell`
38//!   (GFM column alignment syntax is parsed, but alignment data is discarded)
39//! - Bullet lists → `StartUnorderedListItem` / `EndUnorderedListItem`
40//! - Numbered lists → `StartOrderedListItem` / `EndOrderedListItem`
41//!   (`start: Option<u64>` is `Some(n)` on the first item of each list, `None` on subsequent items;
42//!   child items may nest inside their parent's `Start*`/`End*` pair with `level` indicating
43//!   indent depth; task list markers (`- [ ]`/`- [x]`) are parsed as literal text)
44//! - Links → `StartLink { href, title }` / `EndLink` (inline, reference, collapsed,
45//!   shortcut, autolink, and email autolink variants — all resolved to inline form
46//!   by pulldown-cmark; image-inside-link closes the link before emitting the image
47//!   as a sibling block: content preceding the image stays inside the link, content
48//!   following the image is outside the link, and the link is empty only when the
49//!   image is the sole link label, e.g. `[![alt](img)](url)`)
50//!
51//! # Unsupported Elements
52//!
53//! The following elements are not emitted as structured events. Text content is
54//! recursively extracted where applicable; structure is silently dropped:
55//! - Definition lists and footnotes
56//! - HTML blocks and inline HTML
57//! - Math blocks and inline math
58//! - Subscript and superscript formatting
59//!
60//! # Memory Model
61//!
62//! `MarkdownReader` owns its source text for the parser's lifetime. While events
63//! are emitted one at a time via [`EventSource::next_event`] (the stream-event
64//! guarantee is preserved), the source `String` is held in memory until the reader
65//! is dropped. This is a constraint of `pulldown-cmark`, which is permanently
66//! borrow-based by design (see [pulldown-cmark issue #463]).
67//!
68//! For contrast, `HtmlReader` (from `docspec-html-reader`) streams its source via a
69//! 16 KB sliding-window buffer and does not hold the full document in memory.
70//!
71//! [pulldown-cmark issue #463]: https://github.com/raphlinus/pulldown-cmark/issues/463
72
73extern crate alloc;
74
75#[cfg_attr(all(), allow(clippy::mem_forget))]
76mod parser_cell {
77    use self_cell::self_cell;
78
79    use super::MarkdownParser;
80
81    self_cell!(
82        pub(super) struct ParserCell {
83            owner: String,
84            #[covariant]
85            dependent: MarkdownParser,
86        }
87    );
88}
89
90use alloc::collections::VecDeque;
91use std::io::{Read, Seek};
92
93pub use docspec_core::EventSource;
94use docspec_core::{Event, ImageSource, ListStyleType, Result, TableHeaderScope, TextStyleKind};
95use parser_cell::ParserCell;
96use pulldown_cmark::{CodeBlockKind, CowStr, HeadingLevel, Options, Parser, Tag, TagEnd};
97
98struct MarkdownParser<'a>(Parser<'a>);
99
100/// Whether content is inside a block-level element.
101#[derive(Clone, Copy, PartialEq, Eq)]
102enum BlockState {
103    /// Inside an auto-opened paragraph (text arrived outside any explicit block).
104    AutoParagraph,
105    /// Inside an explicit block (from a `StartParagraph` or `StartHeading` tag).
106    Explicit,
107    /// Not inside any block context.
108    None,
109    /// Explicit block whose `StartParagraph` is deferred until the first real event.
110    PendingExplicit,
111}
112
113/// Document processing phase.
114#[derive(Clone, Copy, PartialEq, Eq)]
115enum Phase {
116    /// `EndDocument` has been emitted.
117    Finished,
118    /// `StartDocument` not yet emitted.
119    NotStarted,
120    /// Processing events between `StartDocument` and `EndDocument`.
121    Running,
122}
123
124/// Context for a single list level tracked by [`MarkdownReader`].
125struct ListContext {
126    /// Whether the item at this list level is currently open (start emitted, end not yet emitted).
127    item_open: bool,
128    /// Whether this list is ordered (numbered) rather than unordered (bulleted).
129    ordered: bool,
130    /// Start number to attach to the next item emitted; `Some(n)` only before the first
131    /// item is emitted, then `None` for all subsequent items in the same list.
132    pending_start: Option<u64>,
133}
134
135/// Buffered image state during image alt text collection.
136struct ImageBuffer {
137    /// Accumulated alt text.
138    alt_buf: String,
139    /// Image title if provided.
140    title: Option<String>,
141    /// Image URL.
142    url: String,
143}
144
145enum MarkdownPulldownEvent {
146    Code(String),
147    End(TagEnd),
148    HardBreak,
149    Ignored,
150    Rule,
151    SoftBreak,
152    Start(MarkdownStartTag),
153    Text(String),
154}
155
156enum MarkdownStartTag {
157    BlockQuote,
158    CodeBlock {
159        syntax: Option<String>,
160    },
161    Emphasis,
162    Heading {
163        level: HeadingLevel,
164    },
165    Image {
166        dest_url: String,
167        title: Option<String>,
168    },
169    Item,
170    Link {
171        dest_url: String,
172        title: Option<String>,
173    },
174    List(Option<u64>),
175    Paragraph,
176    Strikethrough,
177    Strong,
178    Table,
179    TableCell,
180    TableHead,
181    TableRow,
182}
183
184/// Buffered link state during link inline content collection.
185struct LinkBuffer {
186    /// Link target URL.
187    href: String,
188    /// Whether `StartLink` has been emitted yet (deferred until first inline event arrives).
189    started: bool,
190    /// Optional link title (from `CommonMark` `[text](url "title")` syntax).
191    title: Option<String>,
192}
193
194/// A streaming Markdown reader that implements [`EventSource`].
195///
196/// `MarkdownReader` parses Markdown using `pulldown-cmark` and emits `DocSpec` events
197/// one at a time. It handles the mapping from `pulldown-cmark`'s event model to `DocSpec`'s
198/// event model, including tracking inline formatting state.
199///
200/// # Example
201///
202/// ```
203/// use docspec_markdown_reader::{MarkdownReader, EventSource};
204///
205/// let mut reader = MarkdownReader::from_str("**bold** and *italic*");
206/// while let Some(event) = reader.next_event()? {
207///     // Process events...
208/// }
209/// # Ok::<(), docspec_core::Error>(())
210/// ```
211pub struct MarkdownReader {
212    /// Current block-level context.
213    block_state: BlockState,
214    /// Owned source text and parser borrowing from it.
215    cell: ParserCell,
216    /// Buffered code block text (accumulated until `EndCodeBlock` to strip trailing newline).
217    code_block_buffer: Option<String>,
218    /// Buffered image being processed (alt text accumulation).
219    image: Option<ImageBuffer>,
220    /// Whether the parser is currently inside a preformatted code block.
221    in_preformatted: bool,
222    /// Whether the parser is currently inside a table header row.
223    in_table_head: bool,
224    /// Buffered link being processed (deferred Start emission for image-in-link extraction).
225    link: Option<LinkBuffer>,
226    /// LIFO stack of list contexts. `len()` gives the current nesting depth;
227    /// `level = list_stack.len().saturating_sub(1)` at item-emit time.
228    list_stack: alloc::vec::Vec<ListContext>,
229    /// Inline styles already emitted and currently open.
230    open_styles: alloc::vec::Vec<TextStyleKind>,
231    /// Inline styles waiting to be emitted before the next text event.
232    pending_open_styles: alloc::vec::Vec<TextStyleKind>,
233    /// Document processing phase.
234    phase: Phase,
235    /// Queue of `DocSpec` events to emit.
236    queue: VecDeque<Event>,
237}
238
239impl MarkdownReader {
240    fn close_current_item_if_open(&mut self) {
241        let Some(ctx) = self.list_stack.last() else {
242            return;
243        };
244        if !ctx.item_open {
245            return;
246        }
247
248        let ordered = ctx.ordered;
249        self.close_all_open_styles();
250        if ordered {
251            self.queue.push_back(Event::EndOrderedListItem);
252        } else {
253            self.queue.push_back(Event::EndUnorderedListItem);
254        }
255        if let Some(current_ctx) = self.list_stack.last_mut() {
256            current_ctx.item_open = false;
257        }
258        self.block_state = BlockState::None;
259    }
260
261    fn close_all_open_styles(&mut self) {
262        self.pending_open_styles.clear();
263        while self.open_styles.pop().is_some() {
264            self.queue.push_back(Event::EndTextStyle);
265        }
266    }
267
268    fn close_style(&mut self, kind: &TextStyleKind) {
269        if self.in_preformatted {
270            return;
271        }
272
273        if let Some(pos) = self.pending_open_styles.iter().rposition(|k| k == kind) {
274            self.pending_open_styles.remove(pos);
275            return;
276        }
277
278        if let Some(pos) = self.open_styles.iter().rposition(|k| k == kind) {
279            let split_pos = pos
280                .checked_add(1)
281                .map_or(self.open_styles.len(), |value| value);
282            let above: alloc::vec::Vec<TextStyleKind> =
283                self.open_styles.drain(split_pos..).collect();
284            self.open_styles.pop();
285            for _ in above.iter().rev() {
286                self.queue.push_back(Event::EndTextStyle);
287            }
288            self.queue.push_back(Event::EndTextStyle);
289            for reopened in above {
290                self.pending_open_styles.push(reopened);
291            }
292        }
293    }
294
295    fn flush_pending_styles(&mut self) {
296        for kind in self.pending_open_styles.drain(..) {
297            self.queue.push_back(Event::StartTextStyle {
298                kind: kind.clone(),
299                id: None,
300            });
301            self.open_styles.push(kind);
302        }
303    }
304
305    fn open_style(&mut self, kind: TextStyleKind) {
306        if !self.in_preformatted {
307            self.pending_open_styles.push(kind);
308        }
309    }
310
311    /// Emits `StartLink` for the buffered link if it hasn't been emitted yet.
312    /// Called before any inline event that would belong inside a link.
313    fn emit_pending_link_start(&mut self) {
314        self.flush_pending_paragraph_start();
315        if let Some(link) = self.link.as_mut() {
316            if !link.started {
317                self.queue.push_back(Event::StartLink {
318                    href: link.href.clone(),
319                    id: None,
320                    title: link.title.clone(),
321                });
322                link.started = true;
323            }
324        }
325    }
326
327    /// Emits `StartParagraph` for the deferred paragraph if it hasn't been emitted yet.
328    /// Called before any committing event that would belong inside a paragraph.
329    fn flush_pending_paragraph_start(&mut self) {
330        if self.block_state == BlockState::PendingExplicit {
331            self.queue.push_back(Event::StartParagraph {
332                alignment: None,
333                id: None,
334            });
335            self.block_state = BlockState::Explicit;
336        }
337    }
338
339    fn from_owned_string(source: String) -> Self {
340        let options = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
341        let cell = ParserCell::new(source, |s| MarkdownParser(Parser::new_ext(s, options)));
342        Self {
343            block_state: BlockState::None,
344            cell,
345            code_block_buffer: None,
346            image: None,
347            in_preformatted: false,
348            in_table_head: false,
349            link: None,
350            list_stack: Vec::new(),
351            open_styles: Vec::new(),
352            pending_open_styles: Vec::new(),
353            phase: Phase::NotStarted,
354            queue: VecDeque::new(),
355        }
356    }
357
358    /// Creates a `MarkdownReader` from any `Read + Seek` source.
359    ///
360    /// Reads the entire source into memory (required by `pulldown_cmark`'s
361    /// borrow-based parser).
362    ///
363    /// # Errors
364    ///
365    /// Returns [`Error::Io`](docspec_core::Error::Io) if reading fails.
366    #[inline]
367    pub fn from_reader<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self> {
368        let mut source = String::new();
369        reader.read_to_string(&mut source)?;
370        Ok(Self::from_owned_string(source))
371    }
372
373    /// Creates a `MarkdownReader` from a string slice.
374    ///
375    /// The input is copied into an owned `String` for the parser's lifetime.
376    ///
377    /// # Example
378    ///
379    /// ```
380    /// use docspec_markdown_reader::MarkdownReader;
381    ///
382    /// let reader = MarkdownReader::from_str("# Hello World");
383    /// ```
384    #[inline]
385    #[must_use]
386    #[expect(
387        clippy::should_implement_trait,
388        reason = "constructor name is required for reader API consistency"
389    )]
390    pub fn from_str(input: &str) -> Self {
391        Self::from_owned_string(input.to_owned())
392    }
393
394    fn handle_code(&mut self, content: String) {
395        if let Some(img) = &mut self.image {
396            img.alt_buf.push_str(&content);
397        } else {
398            self.emit_pending_link_start();
399            if self.block_state == BlockState::None {
400                self.queue.push_back(Event::StartParagraph {
401                    alignment: None,
402                    id: None,
403                });
404                self.block_state = BlockState::AutoParagraph;
405            }
406            self.flush_pending_styles();
407            self.queue.push_back(Event::StartTextStyle {
408                kind: TextStyleKind::Code,
409                id: None,
410            });
411            self.queue.push_back(Event::Text { content });
412            self.queue.push_back(Event::EndTextStyle);
413        }
414    }
415
416    /// Emits the buffered code block content (stripping the parser-added trailing newline)
417    /// followed by `EndPreformatted`. Skips the text event if the buffer is empty.
418    fn handle_end_code_block(&mut self) {
419        if let Some(buf) = self.code_block_buffer.take() {
420            let content = buf.strip_suffix('\n').unwrap_or(&buf).to_owned();
421            if !content.is_empty() {
422                self.queue.push_back(Event::Text { content });
423            }
424        }
425        self.in_preformatted = false;
426        self.push_event_end(Event::EndPreformatted);
427    }
428
429    /// Emits an `Image` event from the accumulated image buffer, deriving
430    /// `decorative = true` when the trimmed alt text is empty. Consumes the
431    /// in-progress image state; does nothing if no image is in progress.
432    fn handle_end_image(&mut self) {
433        let Some(img) = self.image.take() else { return };
434        self.flush_pending_paragraph_start();
435        let trimmed = img.alt_buf.trim();
436        let alt = if trimmed.is_empty() {
437            None
438        } else {
439            Some(trimmed.to_owned())
440        };
441        let decorative = alt.is_none();
442        self.queue.push_back(Event::Image {
443            source: ImageSource::Uri { uri: img.url },
444            alt,
445            title: img.title,
446            decorative,
447            id: None,
448        });
449    }
450
451    /// Closes an auto-opened paragraph if one is open, then closes the current
452    /// list item and resets block state.
453    fn handle_end_item(&mut self) {
454        if self.block_state == BlockState::AutoParagraph {
455            self.close_all_open_styles();
456            self.queue.push_back(Event::EndParagraph);
457        }
458        self.close_current_item_if_open();
459        self.block_state = BlockState::None;
460    }
461
462    /// Emits `EndLink` (and `StartLink` if not yet emitted) for the buffered link.
463    fn handle_end_link(&mut self) {
464        let Some(link) = self.link.take() else { return };
465        if link.started {
466            self.queue.push_back(Event::EndLink);
467        } else {
468            self.flush_pending_paragraph_start();
469            self.queue.push_back(Event::StartLink {
470                href: link.href,
471                id: None,
472                title: link.title,
473            });
474            self.queue.push_back(Event::EndLink);
475        }
476    }
477
478    /// Closes the current list item if open, pops the list context, and resets block state.
479    fn handle_end_list(&mut self) {
480        self.close_current_item_if_open();
481        self.list_stack.pop();
482        self.block_state = BlockState::None;
483    }
484
485    /// Emits `EndTableCell` or `EndTableHeader` depending on whether the parser
486    /// is currently inside a table header row.
487    fn handle_end_table_cell(&mut self) {
488        if self.in_table_head {
489            self.push_event_end(Event::EndTableHeader);
490        } else {
491            self.push_event_end(Event::EndTableCell);
492        }
493    }
494
495    /// Emits `EndTableRow` and clears the table-head flag for a table head closing tag.
496    fn handle_end_table_head(&mut self) {
497        self.push_event_end(Event::EndTableRow);
498        self.in_table_head = false;
499    }
500
501    /// Dispatches a `pulldown-cmark` end tag to the appropriate per-tag handler.
502    ///
503    /// Tags in the explicit ignore list below are known-unsupported elements whose
504    /// structure is intentionally dropped (text content may still be extracted by
505    /// other event handlers).
506    fn handle_end_tag(&mut self, tag_end: TagEnd) {
507        match tag_end {
508            TagEnd::BlockQuote(_) => self.push_event_end(Event::EndBlockQuote),
509            TagEnd::CodeBlock => self.handle_end_code_block(),
510            TagEnd::Emphasis => self.close_style(&TextStyleKind::Italic),
511            TagEnd::Heading(_) => self.push_event_end(Event::EndHeading),
512            TagEnd::Image => self.handle_end_image(),
513            TagEnd::Item => self.handle_end_item(),
514            TagEnd::Link => self.handle_end_link(),
515            TagEnd::List(_) => self.handle_end_list(),
516            TagEnd::Paragraph => {
517                if self.block_state == BlockState::PendingExplicit {
518                    self.close_all_open_styles();
519                    self.block_state = BlockState::None;
520                } else {
521                    self.push_event_end(Event::EndParagraph);
522                }
523            }
524            TagEnd::Strikethrough => self.close_style(&TextStyleKind::Strikethrough),
525            TagEnd::Strong => self.close_style(&TextStyleKind::Bold),
526            TagEnd::Table => self.push_event_end(Event::EndTable),
527            TagEnd::TableCell => self.handle_end_table_cell(),
528            TagEnd::TableHead => self.handle_end_table_head(),
529            TagEnd::TableRow => self.push_event_end(Event::EndTableRow),
530            // Tags intentionally ignored (structure dropped, text extracted elsewhere):
531            TagEnd::DefinitionList
532            | TagEnd::DefinitionListDefinition
533            | TagEnd::DefinitionListTitle
534            | TagEnd::FootnoteDefinition
535            | TagEnd::HtmlBlock
536            | TagEnd::MetadataBlock(_)
537            | TagEnd::Subscript
538            | TagEnd::Superscript => {}
539        }
540    }
541
542    fn handle_item_start(&mut self) {
543        let depth = self.list_stack.len().saturating_sub(1);
544        let level = u32::try_from(depth).map_or(u32::MAX, |v| v);
545        if let Some(ctx) = self.list_stack.last_mut() {
546            if ctx.ordered {
547                self.queue.push_back(Event::StartOrderedListItem {
548                    start: ctx.pending_start.take(),
549                    style_type: ListStyleType::Decimal,
550                    level,
551                    id: None,
552                });
553            } else {
554                self.queue.push_back(Event::StartUnorderedListItem {
555                    style_type: ListStyleType::Disc,
556                    level,
557                    id: None,
558                });
559            }
560            ctx.item_open = true;
561            self.block_state = BlockState::Explicit;
562        }
563    }
564
565    fn handle_list_start(&mut self, start_opt: Option<u64>) {
566        self.list_stack.push(ListContext {
567            item_open: false,
568            ordered: start_opt.is_some(),
569            pending_start: start_opt,
570        });
571    }
572
573    /// Emits `StartPreformatted` for a code block opening tag, initialising
574    /// the internal code-block buffer for content accumulation.
575    fn handle_start_code_block(&mut self, syntax: Option<String>) {
576        self.code_block_buffer = Some(String::new());
577        self.in_preformatted = true;
578        self.push_event_start(Event::StartPreformatted { id: None, syntax });
579    }
580
581    /// Emits `StartHeading` after mapping a `pulldown-cmark` `HeadingLevel` to a `u8` level.
582    fn handle_start_heading(&mut self, level: HeadingLevel) {
583        let level_u8 = match level {
584            HeadingLevel::H1 => 1,
585            HeadingLevel::H2 => 2,
586            HeadingLevel::H3 => 3,
587            HeadingLevel::H4 => 4,
588            HeadingLevel::H5 => 5,
589            HeadingLevel::H6 => 6,
590        };
591        self.push_event_start(Event::StartHeading {
592            level: level_u8,
593            id: None,
594        });
595    }
596
597    /// Initialises image state for alt-text accumulation when an image opening tag is
598    /// encountered. The title is stored as `None` when the pulldown-cmark title string
599    /// is empty.
600    fn handle_start_image(&mut self, dest_url: String, title: Option<String>) {
601        // Image-in-link extraction: close the link before processing the image so the
602        // image can be emitted as a sibling block (BlockNote and similar schemas do not
603        // allow block-level images inside inline links). When `link.started` is true, the
604        // link already contains preceding inline content — emit only `EndLink`. When it
605        // is false (image is the sole link label, e.g. `[![alt](img)](url)`), emit an
606        // empty `StartLink`/`EndLink` pair so the URL is preserved. `TagEnd::Image` fires
607        // `Event::Image` before `TagEnd::Paragraph`, so downstream writers close the
608        // surrounding paragraph before serialising the image as a sibling block.
609        self.flush_pending_paragraph_start();
610        if let Some(link) = self.link.take() {
611            if link.started {
612                self.queue.push_back(Event::EndLink);
613            } else {
614                self.queue.push_back(Event::StartLink {
615                    href: link.href,
616                    id: None,
617                    title: link.title,
618                });
619                self.queue.push_back(Event::EndLink);
620            }
621        }
622
623        self.image = Some(ImageBuffer {
624            alt_buf: String::new(),
625            title,
626            url: dest_url,
627        });
628    }
629
630    /// Stores link state for deferred `StartLink` emission.
631    ///
632    /// Emission is deferred until the first inline event arrives (lazy emission).
633    /// This allows image-in-link to be detected before any `StartLink` is emitted.
634    fn handle_start_link(&mut self, dest_url: String, title: Option<String>) {
635        self.link = Some(LinkBuffer {
636            href: dest_url,
637            started: false,
638            title,
639        });
640    }
641
642    /// Emits `StartTableHeader` or `StartTableCell` depending on whether the parser
643    /// is currently inside a table header row.
644    fn handle_start_table_cell(&mut self) {
645        if self.in_table_head {
646            self.push_event_start(Event::StartTableHeader {
647                scope: Some(TableHeaderScope::Column),
648                abbr: None,
649                colspan: None,
650                rowspan: None,
651                id: None,
652            });
653        } else {
654            self.push_event_start(Event::StartTableCell {
655                colspan: None,
656                rowspan: None,
657                id: None,
658            });
659        }
660    }
661
662    /// Sets the table-head flag and emits `StartTableRow` for a table head opening tag.
663    fn handle_start_table_head(&mut self) {
664        self.in_table_head = true;
665        self.push_event_start(Event::StartTableRow { id: None });
666    }
667
668    /// Dispatches a `pulldown-cmark` start tag to the appropriate per-tag handler.
669    ///
670    /// Tags in the explicit ignore list below are known-unsupported elements whose
671    /// structure is intentionally dropped (text content may still be extracted by
672    /// other event handlers).
673    fn handle_start_tag(&mut self, tag: MarkdownStartTag) {
674        match tag {
675            MarkdownStartTag::BlockQuote => {
676                self.push_event_start(Event::StartBlockQuote { id: None });
677            }
678            MarkdownStartTag::CodeBlock { syntax } => self.handle_start_code_block(syntax),
679            MarkdownStartTag::Emphasis => self.open_style(TextStyleKind::Italic),
680            MarkdownStartTag::Heading { level } => self.handle_start_heading(level),
681            MarkdownStartTag::Image { dest_url, title } => self.handle_start_image(dest_url, title),
682            MarkdownStartTag::Item => self.handle_item_start(),
683            MarkdownStartTag::Link { dest_url, title } => self.handle_start_link(dest_url, title),
684            MarkdownStartTag::List(start_opt) => self.handle_list_start(start_opt),
685            MarkdownStartTag::Paragraph => self.block_state = BlockState::PendingExplicit,
686            MarkdownStartTag::Strikethrough => self.open_style(TextStyleKind::Strikethrough),
687            MarkdownStartTag::Strong => self.open_style(TextStyleKind::Bold),
688            MarkdownStartTag::Table => self.push_event_start(Event::StartTable { id: None }),
689            MarkdownStartTag::TableCell => self.handle_start_table_cell(),
690            MarkdownStartTag::TableHead => self.handle_start_table_head(),
691            MarkdownStartTag::TableRow => self.push_event_start(Event::StartTableRow { id: None }),
692        }
693    }
694
695    fn handle_text(&mut self, content: String) {
696        if let Some(img) = &mut self.image {
697            img.alt_buf.push_str(&content);
698        } else if let Some(buf) = &mut self.code_block_buffer {
699            buf.push_str(&content);
700        } else {
701            self.emit_pending_link_start();
702            if self.block_state == BlockState::None {
703                self.queue.push_back(Event::StartParagraph {
704                    alignment: None,
705                    id: None,
706                });
707                self.block_state = BlockState::AutoParagraph;
708            }
709            self.flush_pending_styles();
710            self.queue.push_back(Event::Text { content });
711        }
712    }
713
714    fn next_pulldown_event(&mut self) -> Option<MarkdownPulldownEvent> {
715        self.cell.with_dependent_mut(|_, dep| {
716            dep.0.next().map(|event| match event {
717                pulldown_cmark::Event::Start(tag) => markdown_start_tag(tag)
718                    .map_or(MarkdownPulldownEvent::Ignored, MarkdownPulldownEvent::Start),
719                pulldown_cmark::Event::End(tag_end) => MarkdownPulldownEvent::End(tag_end),
720                pulldown_cmark::Event::Text(text) => {
721                    MarkdownPulldownEvent::Text(text.into_string())
722                }
723                pulldown_cmark::Event::Code(code) => {
724                    MarkdownPulldownEvent::Code(code.into_string())
725                }
726                pulldown_cmark::Event::HardBreak => MarkdownPulldownEvent::HardBreak,
727                pulldown_cmark::Event::SoftBreak => MarkdownPulldownEvent::SoftBreak,
728                pulldown_cmark::Event::Rule => MarkdownPulldownEvent::Rule,
729                pulldown_cmark::Event::DisplayMath(_)
730                | pulldown_cmark::Event::FootnoteReference(_)
731                | pulldown_cmark::Event::Html(_)
732                | pulldown_cmark::Event::InlineHtml(_)
733                | pulldown_cmark::Event::InlineMath(_)
734                | pulldown_cmark::Event::TaskListMarker(_) => MarkdownPulldownEvent::Ignored,
735            })
736        })
737    }
738
739    fn process_next_pulldown_event(&mut self) {
740        let Some(pm_event) = self.next_pulldown_event() else {
741            if self.phase != Phase::Finished {
742                self.phase = Phase::Finished;
743                self.queue.push_back(Event::EndDocument);
744            }
745            return;
746        };
747
748        match pm_event {
749            MarkdownPulldownEvent::Start(tag) => self.handle_start_tag(tag),
750            MarkdownPulldownEvent::End(tag_end) => self.handle_end_tag(tag_end),
751            MarkdownPulldownEvent::Text(text) => self.handle_text(text),
752            MarkdownPulldownEvent::Code(code) => self.handle_code(code),
753            MarkdownPulldownEvent::HardBreak => {
754                if let Some(img) = &mut self.image {
755                    img.alt_buf.push(' ');
756                } else if self.block_state == BlockState::PendingExplicit {
757                    // emitting a break before StartParagraph would be malformed — discard
758                } else {
759                    self.emit_pending_link_start();
760                    self.queue.push_back(Event::LineBreak);
761                }
762            }
763            MarkdownPulldownEvent::SoftBreak => {
764                if let Some(img) = &mut self.image {
765                    img.alt_buf.push(' ');
766                } else if self.block_state == BlockState::PendingExplicit {
767                    // emitting a break before StartParagraph would be malformed — discard
768                } else {
769                    self.emit_pending_link_start();
770                    self.queue.push_back(Event::SoftBreak);
771                }
772            }
773            MarkdownPulldownEvent::Rule => {
774                self.queue.push_back(Event::ThematicBreak { id: None });
775            }
776            MarkdownPulldownEvent::Ignored => {}
777        }
778    }
779
780    fn push_event(&mut self, event: Event, state: BlockState) {
781        self.queue.push_back(event);
782        self.block_state = state;
783    }
784
785    fn push_event_end(&mut self, event: Event) {
786        self.close_all_open_styles();
787        self.push_event(event, BlockState::None);
788    }
789
790    fn push_event_start(&mut self, event: Event) {
791        self.push_event(event, BlockState::Explicit);
792    }
793}
794
795impl EventSource for MarkdownReader {
796    #[inline]
797    fn next_event(&mut self) -> Result<Option<Event>> {
798        if self.phase == Phase::NotStarted {
799            self.phase = Phase::Running;
800            return Ok(Some(Event::StartDocument {
801                id: None,
802                language: None,
803                metadata: None,
804            }));
805        }
806
807        if self.phase == Phase::Finished && self.queue.is_empty() {
808            return Ok(None);
809        }
810
811        while self.queue.is_empty() && self.phase != Phase::Finished {
812            self.process_next_pulldown_event();
813        }
814
815        Ok(self.queue.pop_front())
816    }
817}
818
819fn markdown_start_tag(tag: Tag<'_>) -> Option<MarkdownStartTag> {
820    match tag {
821        Tag::BlockQuote(_) => Some(MarkdownStartTag::BlockQuote),
822        Tag::CodeBlock(kind) => Some(MarkdownStartTag::CodeBlock {
823            syntax: code_block_syntax(kind),
824        }),
825        Tag::Emphasis => Some(MarkdownStartTag::Emphasis),
826        Tag::Heading { level, .. } => Some(MarkdownStartTag::Heading { level }),
827        Tag::Image {
828            dest_url, title, ..
829        } => Some(MarkdownStartTag::Image {
830            dest_url: dest_url.into_string(),
831            title: cow_to_optional_string(title),
832        }),
833        Tag::Item => Some(MarkdownStartTag::Item),
834        Tag::Link {
835            dest_url, title, ..
836        } => Some(MarkdownStartTag::Link {
837            dest_url: dest_url.into_string(),
838            title: cow_to_optional_string(title),
839        }),
840        Tag::List(start_opt) => Some(MarkdownStartTag::List(start_opt)),
841        Tag::Paragraph => Some(MarkdownStartTag::Paragraph),
842        Tag::Strikethrough => Some(MarkdownStartTag::Strikethrough),
843        Tag::Strong => Some(MarkdownStartTag::Strong),
844        Tag::Table(_) => Some(MarkdownStartTag::Table),
845        Tag::TableCell => Some(MarkdownStartTag::TableCell),
846        Tag::TableHead => Some(MarkdownStartTag::TableHead),
847        Tag::TableRow => Some(MarkdownStartTag::TableRow),
848        Tag::DefinitionList
849        | Tag::DefinitionListDefinition
850        | Tag::DefinitionListTitle
851        | Tag::FootnoteDefinition(_)
852        | Tag::HtmlBlock
853        | Tag::MetadataBlock(_)
854        | Tag::Subscript
855        | Tag::Superscript => None,
856    }
857}
858
859fn code_block_syntax(kind: CodeBlockKind<'_>) -> Option<String> {
860    match kind {
861        CodeBlockKind::Fenced(lang) if !lang.is_empty() => Some(lang.into_string()),
862        CodeBlockKind::Fenced(_) | CodeBlockKind::Indented => None,
863    }
864}
865
866fn cow_to_optional_string(value: CowStr<'_>) -> Option<String> {
867    if value.is_empty() {
868        None
869    } else {
870        Some(value.into_string())
871    }
872}
873
874#[cfg(test)]
875mod tests {
876    use super::*;
877
878    #[test]
879    fn handle_code_without_open_block_auto_opens_paragraph() {
880        let mut reader = MarkdownReader::from_str("");
881        reader.handle_code("code".to_string());
882
883        assert_eq!(reader.queue.len(), 4);
884        assert_eq!(
885            reader.queue.front(),
886            Some(&Event::StartParagraph {
887                alignment: None,
888                id: None,
889            })
890        );
891        assert_eq!(
892            reader.queue.get(1),
893            Some(&Event::StartTextStyle {
894                kind: TextStyleKind::Code,
895                id: None,
896            })
897        );
898        assert_eq!(
899            reader.queue.get(2),
900            Some(&Event::Text {
901                content: "code".to_string(),
902            })
903        );
904        assert_eq!(reader.queue.get(3), Some(&Event::EndTextStyle));
905    }
906
907    #[test]
908    fn handle_text_without_open_block_auto_opens_paragraph() {
909        let mut reader = MarkdownReader::from_str("");
910        reader.handle_text("hello".to_string());
911
912        assert_eq!(reader.queue.len(), 2);
913        assert_eq!(
914            reader.queue.front(),
915            Some(&Event::StartParagraph {
916                alignment: None,
917                id: None,
918            })
919        );
920        assert_eq!(
921            reader.queue.get(1),
922            Some(&Event::Text {
923                content: "hello".to_string(),
924            })
925        );
926    }
927}
928
929#[cfg(test)]
930mod send_static_assertions {
931    fn assert_send_static<T: Send + 'static>() {}
932
933    #[test]
934    fn markdown_reader_is_send_static() {
935        assert_send_static::<crate::MarkdownReader>();
936    }
937}