extern crate alloc;
#[cfg_attr(all(), allow(clippy::mem_forget))]
mod parser_cell {
use self_cell::self_cell;
use super::MarkdownParser;
self_cell!(
pub(super) struct ParserCell {
owner: String,
#[covariant]
dependent: MarkdownParser,
}
);
}
use alloc::collections::VecDeque;
use std::io::{Read, Seek};
pub use docspec_core::EventSource;
use docspec_core::{Event, ImageSource, ListStyleType, Result, TableHeaderScope, TextStyleKind};
use parser_cell::ParserCell;
use pulldown_cmark::{CodeBlockKind, CowStr, HeadingLevel, Options, Parser, Tag, TagEnd};
struct MarkdownParser<'a>(Parser<'a>);
#[derive(Clone, Copy, PartialEq, Eq)]
enum BlockState {
AutoParagraph,
Explicit,
None,
PendingExplicit,
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum Phase {
Finished,
NotStarted,
Running,
}
struct ListContext {
item_open: bool,
ordered: bool,
pending_start: Option<u64>,
}
struct ImageBuffer {
alt_buf: String,
title: Option<String>,
url: String,
}
enum MarkdownPulldownEvent {
Code(String),
End(TagEnd),
HardBreak,
Ignored,
Rule,
SoftBreak,
Start(MarkdownStartTag),
Text(String),
}
enum MarkdownStartTag {
BlockQuote,
CodeBlock {
syntax: Option<String>,
},
Emphasis,
Heading {
level: HeadingLevel,
},
Image {
dest_url: String,
title: Option<String>,
},
Item,
Link {
dest_url: String,
title: Option<String>,
},
List(Option<u64>),
Paragraph,
Strikethrough,
Strong,
Table,
TableCell,
TableHead,
TableRow,
}
struct LinkBuffer {
href: String,
started: bool,
title: Option<String>,
}
pub struct MarkdownReader {
block_state: BlockState,
cell: ParserCell,
code_block_buffer: Option<String>,
image: Option<ImageBuffer>,
in_preformatted: bool,
in_table_head: bool,
link: Option<LinkBuffer>,
list_stack: alloc::vec::Vec<ListContext>,
open_styles: alloc::vec::Vec<TextStyleKind>,
pending_open_styles: alloc::vec::Vec<TextStyleKind>,
phase: Phase,
queue: VecDeque<Event>,
}
impl MarkdownReader {
fn close_current_item_if_open(&mut self) {
let Some(ctx) = self.list_stack.last() else {
return;
};
if !ctx.item_open {
return;
}
let ordered = ctx.ordered;
self.close_all_open_styles();
if ordered {
self.queue.push_back(Event::EndOrderedListItem);
} else {
self.queue.push_back(Event::EndUnorderedListItem);
}
if let Some(current_ctx) = self.list_stack.last_mut() {
current_ctx.item_open = false;
}
self.block_state = BlockState::None;
}
fn close_all_open_styles(&mut self) {
self.pending_open_styles.clear();
while self.open_styles.pop().is_some() {
self.queue.push_back(Event::EndTextStyle);
}
}
fn close_style(&mut self, kind: &TextStyleKind) {
if self.in_preformatted {
return;
}
if let Some(pos) = self.pending_open_styles.iter().rposition(|k| k == kind) {
self.pending_open_styles.remove(pos);
return;
}
if let Some(pos) = self.open_styles.iter().rposition(|k| k == kind) {
let split_pos = pos
.checked_add(1)
.map_or(self.open_styles.len(), |value| value);
let above: alloc::vec::Vec<TextStyleKind> =
self.open_styles.drain(split_pos..).collect();
self.open_styles.pop();
for _ in above.iter().rev() {
self.queue.push_back(Event::EndTextStyle);
}
self.queue.push_back(Event::EndTextStyle);
for reopened in above {
self.pending_open_styles.push(reopened);
}
}
}
fn flush_pending_styles(&mut self) {
for kind in self.pending_open_styles.drain(..) {
self.queue.push_back(Event::StartTextStyle {
kind: kind.clone(),
id: None,
});
self.open_styles.push(kind);
}
}
fn open_style(&mut self, kind: TextStyleKind) {
if !self.in_preformatted {
self.pending_open_styles.push(kind);
}
}
fn emit_pending_link_start(&mut self) {
self.flush_pending_paragraph_start();
if let Some(link) = self.link.as_mut() {
if !link.started {
self.queue.push_back(Event::StartLink {
href: link.href.clone(),
id: None,
title: link.title.clone(),
});
link.started = true;
}
}
}
fn flush_pending_paragraph_start(&mut self) {
if self.block_state == BlockState::PendingExplicit {
self.queue.push_back(Event::StartParagraph {
alignment: None,
id: None,
});
self.block_state = BlockState::Explicit;
}
}
fn from_owned_string(source: String) -> Self {
let options = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
let cell = ParserCell::new(source, |s| MarkdownParser(Parser::new_ext(s, options)));
Self {
block_state: BlockState::None,
cell,
code_block_buffer: None,
image: None,
in_preformatted: false,
in_table_head: false,
link: None,
list_stack: Vec::new(),
open_styles: Vec::new(),
pending_open_styles: Vec::new(),
phase: Phase::NotStarted,
queue: VecDeque::new(),
}
}
#[inline]
pub fn from_reader<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self> {
let mut source = String::new();
reader.read_to_string(&mut source)?;
Ok(Self::from_owned_string(source))
}
#[inline]
#[must_use]
#[expect(
clippy::should_implement_trait,
reason = "constructor name is required for reader API consistency"
)]
pub fn from_str(input: &str) -> Self {
Self::from_owned_string(input.to_owned())
}
fn handle_code(&mut self, content: String) {
if let Some(img) = &mut self.image {
img.alt_buf.push_str(&content);
} else {
self.emit_pending_link_start();
if self.block_state == BlockState::None {
self.queue.push_back(Event::StartParagraph {
alignment: None,
id: None,
});
self.block_state = BlockState::AutoParagraph;
}
self.flush_pending_styles();
self.queue.push_back(Event::StartTextStyle {
kind: TextStyleKind::Code,
id: None,
});
self.queue.push_back(Event::Text { content });
self.queue.push_back(Event::EndTextStyle);
}
}
fn handle_end_code_block(&mut self) {
if let Some(buf) = self.code_block_buffer.take() {
let content = buf.strip_suffix('\n').unwrap_or(&buf).to_owned();
if !content.is_empty() {
self.queue.push_back(Event::Text { content });
}
}
self.in_preformatted = false;
self.push_event_end(Event::EndPreformatted);
}
fn handle_end_image(&mut self) {
let Some(img) = self.image.take() else { return };
self.flush_pending_paragraph_start();
let trimmed = img.alt_buf.trim();
let alt = if trimmed.is_empty() {
None
} else {
Some(trimmed.to_owned())
};
let decorative = alt.is_none();
self.queue.push_back(Event::Image {
source: ImageSource::Uri { uri: img.url },
alt,
title: img.title,
decorative,
id: None,
});
}
fn handle_end_item(&mut self) {
if self.block_state == BlockState::AutoParagraph {
self.close_all_open_styles();
self.queue.push_back(Event::EndParagraph);
}
self.close_current_item_if_open();
self.block_state = BlockState::None;
}
fn handle_end_link(&mut self) {
let Some(link) = self.link.take() else { return };
if link.started {
self.queue.push_back(Event::EndLink);
} else {
self.flush_pending_paragraph_start();
self.queue.push_back(Event::StartLink {
href: link.href,
id: None,
title: link.title,
});
self.queue.push_back(Event::EndLink);
}
}
fn handle_end_list(&mut self) {
self.close_current_item_if_open();
self.list_stack.pop();
self.block_state = BlockState::None;
}
fn handle_end_table_cell(&mut self) {
if self.in_table_head {
self.push_event_end(Event::EndTableHeader);
} else {
self.push_event_end(Event::EndTableCell);
}
}
fn handle_end_table_head(&mut self) {
self.push_event_end(Event::EndTableRow);
self.in_table_head = false;
}
fn handle_end_tag(&mut self, tag_end: TagEnd) {
match tag_end {
TagEnd::BlockQuote(_) => self.push_event_end(Event::EndBlockQuote),
TagEnd::CodeBlock => self.handle_end_code_block(),
TagEnd::Emphasis => self.close_style(&TextStyleKind::Italic),
TagEnd::Heading(_) => self.push_event_end(Event::EndHeading),
TagEnd::Image => self.handle_end_image(),
TagEnd::Item => self.handle_end_item(),
TagEnd::Link => self.handle_end_link(),
TagEnd::List(_) => self.handle_end_list(),
TagEnd::Paragraph => {
if self.block_state == BlockState::PendingExplicit {
self.close_all_open_styles();
self.block_state = BlockState::None;
} else {
self.push_event_end(Event::EndParagraph);
}
}
TagEnd::Strikethrough => self.close_style(&TextStyleKind::Strikethrough),
TagEnd::Strong => self.close_style(&TextStyleKind::Bold),
TagEnd::Table => self.push_event_end(Event::EndTable),
TagEnd::TableCell => self.handle_end_table_cell(),
TagEnd::TableHead => self.handle_end_table_head(),
TagEnd::TableRow => self.push_event_end(Event::EndTableRow),
TagEnd::DefinitionList
| TagEnd::DefinitionListDefinition
| TagEnd::DefinitionListTitle
| TagEnd::FootnoteDefinition
| TagEnd::HtmlBlock
| TagEnd::MetadataBlock(_)
| TagEnd::Subscript
| TagEnd::Superscript => {}
}
}
fn handle_item_start(&mut self) {
let depth = self.list_stack.len().saturating_sub(1);
let level = u32::try_from(depth).map_or(u32::MAX, |v| v);
if let Some(ctx) = self.list_stack.last_mut() {
if ctx.ordered {
self.queue.push_back(Event::StartOrderedListItem {
start: ctx.pending_start.take(),
style_type: ListStyleType::Decimal,
level,
id: None,
});
} else {
self.queue.push_back(Event::StartUnorderedListItem {
style_type: ListStyleType::Disc,
level,
id: None,
});
}
ctx.item_open = true;
self.block_state = BlockState::Explicit;
}
}
fn handle_list_start(&mut self, start_opt: Option<u64>) {
self.list_stack.push(ListContext {
item_open: false,
ordered: start_opt.is_some(),
pending_start: start_opt,
});
}
fn handle_start_code_block(&mut self, syntax: Option<String>) {
self.code_block_buffer = Some(String::new());
self.in_preformatted = true;
self.push_event_start(Event::StartPreformatted { id: None, syntax });
}
fn handle_start_heading(&mut self, level: HeadingLevel) {
let level_u8 = match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
};
self.push_event_start(Event::StartHeading {
level: level_u8,
id: None,
});
}
fn handle_start_image(&mut self, dest_url: String, title: Option<String>) {
self.flush_pending_paragraph_start();
if let Some(link) = self.link.take() {
if link.started {
self.queue.push_back(Event::EndLink);
} else {
self.queue.push_back(Event::StartLink {
href: link.href,
id: None,
title: link.title,
});
self.queue.push_back(Event::EndLink);
}
}
self.image = Some(ImageBuffer {
alt_buf: String::new(),
title,
url: dest_url,
});
}
fn handle_start_link(&mut self, dest_url: String, title: Option<String>) {
self.link = Some(LinkBuffer {
href: dest_url,
started: false,
title,
});
}
fn handle_start_table_cell(&mut self) {
if self.in_table_head {
self.push_event_start(Event::StartTableHeader {
scope: Some(TableHeaderScope::Column),
abbr: None,
colspan: None,
rowspan: None,
id: None,
});
} else {
self.push_event_start(Event::StartTableCell {
colspan: None,
rowspan: None,
id: None,
});
}
}
fn handle_start_table_head(&mut self) {
self.in_table_head = true;
self.push_event_start(Event::StartTableRow { id: None });
}
fn handle_start_tag(&mut self, tag: MarkdownStartTag) {
match tag {
MarkdownStartTag::BlockQuote => {
self.push_event_start(Event::StartBlockQuote { id: None });
}
MarkdownStartTag::CodeBlock { syntax } => self.handle_start_code_block(syntax),
MarkdownStartTag::Emphasis => self.open_style(TextStyleKind::Italic),
MarkdownStartTag::Heading { level } => self.handle_start_heading(level),
MarkdownStartTag::Image { dest_url, title } => self.handle_start_image(dest_url, title),
MarkdownStartTag::Item => self.handle_item_start(),
MarkdownStartTag::Link { dest_url, title } => self.handle_start_link(dest_url, title),
MarkdownStartTag::List(start_opt) => self.handle_list_start(start_opt),
MarkdownStartTag::Paragraph => self.block_state = BlockState::PendingExplicit,
MarkdownStartTag::Strikethrough => self.open_style(TextStyleKind::Strikethrough),
MarkdownStartTag::Strong => self.open_style(TextStyleKind::Bold),
MarkdownStartTag::Table => self.push_event_start(Event::StartTable { id: None }),
MarkdownStartTag::TableCell => self.handle_start_table_cell(),
MarkdownStartTag::TableHead => self.handle_start_table_head(),
MarkdownStartTag::TableRow => self.push_event_start(Event::StartTableRow { id: None }),
}
}
fn handle_text(&mut self, content: String) {
if let Some(img) = &mut self.image {
img.alt_buf.push_str(&content);
} else if let Some(buf) = &mut self.code_block_buffer {
buf.push_str(&content);
} else {
self.emit_pending_link_start();
if self.block_state == BlockState::None {
self.queue.push_back(Event::StartParagraph {
alignment: None,
id: None,
});
self.block_state = BlockState::AutoParagraph;
}
self.flush_pending_styles();
self.queue.push_back(Event::Text { content });
}
}
fn next_pulldown_event(&mut self) -> Option<MarkdownPulldownEvent> {
self.cell.with_dependent_mut(|_, dep| {
dep.0.next().map(|event| match event {
pulldown_cmark::Event::Start(tag) => markdown_start_tag(tag)
.map_or(MarkdownPulldownEvent::Ignored, MarkdownPulldownEvent::Start),
pulldown_cmark::Event::End(tag_end) => MarkdownPulldownEvent::End(tag_end),
pulldown_cmark::Event::Text(text) => {
MarkdownPulldownEvent::Text(text.into_string())
}
pulldown_cmark::Event::Code(code) => {
MarkdownPulldownEvent::Code(code.into_string())
}
pulldown_cmark::Event::HardBreak => MarkdownPulldownEvent::HardBreak,
pulldown_cmark::Event::SoftBreak => MarkdownPulldownEvent::SoftBreak,
pulldown_cmark::Event::Rule => MarkdownPulldownEvent::Rule,
pulldown_cmark::Event::DisplayMath(_)
| pulldown_cmark::Event::FootnoteReference(_)
| pulldown_cmark::Event::Html(_)
| pulldown_cmark::Event::InlineHtml(_)
| pulldown_cmark::Event::InlineMath(_)
| pulldown_cmark::Event::TaskListMarker(_) => MarkdownPulldownEvent::Ignored,
})
})
}
fn process_next_pulldown_event(&mut self) {
let Some(pm_event) = self.next_pulldown_event() else {
if self.phase != Phase::Finished {
self.phase = Phase::Finished;
self.queue.push_back(Event::EndDocument);
}
return;
};
match pm_event {
MarkdownPulldownEvent::Start(tag) => self.handle_start_tag(tag),
MarkdownPulldownEvent::End(tag_end) => self.handle_end_tag(tag_end),
MarkdownPulldownEvent::Text(text) => self.handle_text(text),
MarkdownPulldownEvent::Code(code) => self.handle_code(code),
MarkdownPulldownEvent::HardBreak => {
if let Some(img) = &mut self.image {
img.alt_buf.push(' ');
} else if self.block_state == BlockState::PendingExplicit {
} else {
self.emit_pending_link_start();
self.queue.push_back(Event::LineBreak);
}
}
MarkdownPulldownEvent::SoftBreak => {
if let Some(img) = &mut self.image {
img.alt_buf.push(' ');
} else if self.block_state == BlockState::PendingExplicit {
} else {
self.emit_pending_link_start();
self.queue.push_back(Event::SoftBreak);
}
}
MarkdownPulldownEvent::Rule => {
self.queue.push_back(Event::ThematicBreak { id: None });
}
MarkdownPulldownEvent::Ignored => {}
}
}
fn push_event(&mut self, event: Event, state: BlockState) {
self.queue.push_back(event);
self.block_state = state;
}
fn push_event_end(&mut self, event: Event) {
self.close_all_open_styles();
self.push_event(event, BlockState::None);
}
fn push_event_start(&mut self, event: Event) {
self.push_event(event, BlockState::Explicit);
}
}
impl EventSource for MarkdownReader {
#[inline]
fn next_event(&mut self) -> Result<Option<Event>> {
if self.phase == Phase::NotStarted {
self.phase = Phase::Running;
return Ok(Some(Event::StartDocument {
id: None,
language: None,
metadata: None,
}));
}
if self.phase == Phase::Finished && self.queue.is_empty() {
return Ok(None);
}
while self.queue.is_empty() && self.phase != Phase::Finished {
self.process_next_pulldown_event();
}
Ok(self.queue.pop_front())
}
}
fn markdown_start_tag(tag: Tag<'_>) -> Option<MarkdownStartTag> {
match tag {
Tag::BlockQuote(_) => Some(MarkdownStartTag::BlockQuote),
Tag::CodeBlock(kind) => Some(MarkdownStartTag::CodeBlock {
syntax: code_block_syntax(kind),
}),
Tag::Emphasis => Some(MarkdownStartTag::Emphasis),
Tag::Heading { level, .. } => Some(MarkdownStartTag::Heading { level }),
Tag::Image {
dest_url, title, ..
} => Some(MarkdownStartTag::Image {
dest_url: dest_url.into_string(),
title: cow_to_optional_string(title),
}),
Tag::Item => Some(MarkdownStartTag::Item),
Tag::Link {
dest_url, title, ..
} => Some(MarkdownStartTag::Link {
dest_url: dest_url.into_string(),
title: cow_to_optional_string(title),
}),
Tag::List(start_opt) => Some(MarkdownStartTag::List(start_opt)),
Tag::Paragraph => Some(MarkdownStartTag::Paragraph),
Tag::Strikethrough => Some(MarkdownStartTag::Strikethrough),
Tag::Strong => Some(MarkdownStartTag::Strong),
Tag::Table(_) => Some(MarkdownStartTag::Table),
Tag::TableCell => Some(MarkdownStartTag::TableCell),
Tag::TableHead => Some(MarkdownStartTag::TableHead),
Tag::TableRow => Some(MarkdownStartTag::TableRow),
Tag::DefinitionList
| Tag::DefinitionListDefinition
| Tag::DefinitionListTitle
| Tag::FootnoteDefinition(_)
| Tag::HtmlBlock
| Tag::MetadataBlock(_)
| Tag::Subscript
| Tag::Superscript => None,
}
}
fn code_block_syntax(kind: CodeBlockKind<'_>) -> Option<String> {
match kind {
CodeBlockKind::Fenced(lang) if !lang.is_empty() => Some(lang.into_string()),
CodeBlockKind::Fenced(_) | CodeBlockKind::Indented => None,
}
}
fn cow_to_optional_string(value: CowStr<'_>) -> Option<String> {
if value.is_empty() {
None
} else {
Some(value.into_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn handle_code_without_open_block_auto_opens_paragraph() {
let mut reader = MarkdownReader::from_str("");
reader.handle_code("code".to_string());
assert_eq!(reader.queue.len(), 4);
assert_eq!(
reader.queue.front(),
Some(&Event::StartParagraph {
alignment: None,
id: None,
})
);
assert_eq!(
reader.queue.get(1),
Some(&Event::StartTextStyle {
kind: TextStyleKind::Code,
id: None,
})
);
assert_eq!(
reader.queue.get(2),
Some(&Event::Text {
content: "code".to_string(),
})
);
assert_eq!(reader.queue.get(3), Some(&Event::EndTextStyle));
}
#[test]
fn handle_text_without_open_block_auto_opens_paragraph() {
let mut reader = MarkdownReader::from_str("");
reader.handle_text("hello".to_string());
assert_eq!(reader.queue.len(), 2);
assert_eq!(
reader.queue.front(),
Some(&Event::StartParagraph {
alignment: None,
id: None,
})
);
assert_eq!(
reader.queue.get(1),
Some(&Event::Text {
content: "hello".to_string(),
})
);
}
}
#[cfg(test)]
mod send_static_assertions {
fn assert_send_static<T: Send + 'static>() {}
#[test]
fn markdown_reader_is_send_static() {
assert_send_static::<crate::MarkdownReader>();
}
}