extern crate alloc;
use alloc::collections::VecDeque;
pub use docspec_core::EventSource;
use docspec_core::{Depth, Event, ImageSource, ListStyleType, Result, TableHeaderScope, TextStyle};
use pulldown_cmark::{CodeBlockKind, CowStr, HeadingLevel, Options, Parser, Tag, TagEnd};
#[derive(Clone, Copy, PartialEq, Eq)]
enum BlockState {
AutoParagraph,
Explicit,
None,
PendingExplicit,
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum Phase {
Finished,
NotStarted,
Running,
}
struct ListContext {
item_open: bool,
ordered: bool,
pending_start: Option<u64>,
}
struct ImageBuffer {
alt_buf: String,
title: Option<String>,
url: String,
}
struct LinkBuffer {
href: String,
started: bool,
title: Option<String>,
}
pub struct MarkdownReader<'a> {
block_state: BlockState,
bold_depth: Depth,
code_block_buffer: Option<String>,
image: Option<ImageBuffer>,
in_table_head: bool,
italic_depth: Depth,
link: Option<LinkBuffer>,
list_stack: alloc::vec::Vec<ListContext>,
parser: Parser<'a>,
phase: Phase,
queue: VecDeque<Event>,
strikethrough_depth: Depth,
}
impl<'a> MarkdownReader<'a> {
fn close_current_item_if_open(&mut self) {
if let Some(ctx) = self.list_stack.last_mut() {
if ctx.item_open {
if ctx.ordered {
self.queue.push_back(Event::EndOrderedListItem);
} else {
self.queue.push_back(Event::EndUnorderedListItem);
}
ctx.item_open = false;
self.block_state = BlockState::None;
}
}
}
fn current_text_style(&self) -> TextStyle {
let mut style = TextStyle::default();
if self.bold_depth.is_positive() {
style = style.bold();
}
if self.italic_depth.is_positive() {
style = style.italic();
}
if self.strikethrough_depth.is_positive() {
style = style.strikethrough();
}
style
}
fn emit_pending_link_start(&mut self) {
self.flush_pending_paragraph_start();
if let Some(link) = self.link.as_mut() {
if !link.started {
self.queue.push_back(Event::StartLink {
href: link.href.clone(),
id: None,
title: link.title.clone(),
});
link.started = true;
}
}
}
fn flush_pending_paragraph_start(&mut self) {
if self.block_state == BlockState::PendingExplicit {
self.queue.push_back(Event::StartParagraph {
alignment: None,
id: None,
});
self.block_state = BlockState::Explicit;
}
}
fn handle_code(&mut self, content: String) {
if let Some(img) = &mut self.image {
img.alt_buf.push_str(&content);
} else {
self.emit_pending_link_start();
if self.block_state == BlockState::None {
self.queue.push_back(Event::StartParagraph {
alignment: None,
id: None,
});
self.block_state = BlockState::AutoParagraph;
}
self.queue.push_back(Event::Text {
content,
style: self.current_text_style().code(),
});
}
}
fn handle_end_code_block(&mut self) {
if let Some(buf) = self.code_block_buffer.take() {
let content = buf.strip_suffix('\n').unwrap_or(&buf).to_owned();
if !content.is_empty() {
self.queue.push_back(Event::Text {
content,
style: TextStyle::default(),
});
}
}
self.push_event_end(Event::EndPreformatted);
}
fn handle_end_image(&mut self) {
let Some(img) = self.image.take() else { return };
self.flush_pending_paragraph_start();
let trimmed = img.alt_buf.trim();
let alt = if trimmed.is_empty() {
None
} else {
Some(trimmed.to_owned())
};
let decorative = alt.is_none();
self.queue.push_back(Event::Image {
source: ImageSource::Uri { uri: img.url },
alt,
title: img.title,
decorative,
id: None,
});
}
fn handle_end_item(&mut self) {
if self.block_state == BlockState::AutoParagraph {
self.queue.push_back(Event::EndParagraph);
}
self.close_current_item_if_open();
self.block_state = BlockState::None;
}
fn handle_end_link(&mut self) {
let Some(link) = self.link.take() else { return };
if link.started {
self.queue.push_back(Event::EndLink);
} else {
self.flush_pending_paragraph_start();
self.queue.push_back(Event::StartLink {
href: link.href,
id: None,
title: link.title,
});
self.queue.push_back(Event::EndLink);
}
}
fn handle_end_list(&mut self) {
self.close_current_item_if_open();
self.list_stack.pop();
self.block_state = BlockState::None;
}
fn handle_end_table_cell(&mut self) {
if self.in_table_head {
self.push_event_end(Event::EndTableHeader);
} else {
self.push_event_end(Event::EndTableCell);
}
}
fn handle_end_table_head(&mut self) {
self.push_event_end(Event::EndTableRow);
self.in_table_head = false;
}
fn handle_end_tag(&mut self, tag_end: TagEnd) {
match tag_end {
TagEnd::BlockQuote(_) => self.push_event_end(Event::EndBlockQuote),
TagEnd::CodeBlock => self.handle_end_code_block(),
TagEnd::Emphasis => self.italic_depth.dec(),
TagEnd::Heading(_) => self.push_event_end(Event::EndHeading),
TagEnd::Image => self.handle_end_image(),
TagEnd::Item => self.handle_end_item(),
TagEnd::Link => self.handle_end_link(),
TagEnd::List(_) => self.handle_end_list(),
TagEnd::Paragraph => {
if self.block_state == BlockState::PendingExplicit {
self.block_state = BlockState::None;
} else {
self.push_event_end(Event::EndParagraph);
}
}
TagEnd::Strikethrough => self.strikethrough_depth.dec(),
TagEnd::Strong => self.bold_depth.dec(),
TagEnd::Table => self.push_event_end(Event::EndTable),
TagEnd::TableCell => self.handle_end_table_cell(),
TagEnd::TableHead => self.handle_end_table_head(),
TagEnd::TableRow => self.push_event_end(Event::EndTableRow),
TagEnd::DefinitionList
| TagEnd::DefinitionListDefinition
| TagEnd::DefinitionListTitle
| TagEnd::FootnoteDefinition
| TagEnd::HtmlBlock
| TagEnd::MetadataBlock(_)
| TagEnd::Subscript
| TagEnd::Superscript => {}
}
}
fn handle_item_start(&mut self) {
let depth = self.list_stack.len().saturating_sub(1);
let level = u32::try_from(depth).map_or(u32::MAX, |v| v);
if let Some(ctx) = self.list_stack.last_mut() {
if ctx.ordered {
self.queue.push_back(Event::StartOrderedListItem {
start: ctx.pending_start.take(),
style_type: ListStyleType::Decimal,
level,
id: None,
});
} else {
self.queue.push_back(Event::StartUnorderedListItem {
style_type: ListStyleType::Disc,
level,
id: None,
});
}
ctx.item_open = true;
self.block_state = BlockState::Explicit;
}
}
fn handle_list_start(&mut self, start_opt: Option<u64>) {
self.list_stack.push(ListContext {
item_open: false,
ordered: start_opt.is_some(),
pending_start: start_opt,
});
}
fn handle_start_code_block(&mut self, kind: CodeBlockKind<'a>) {
let syntax = match kind {
CodeBlockKind::Fenced(lang) if !lang.is_empty() => Some(lang.into_string()),
CodeBlockKind::Fenced(_) | CodeBlockKind::Indented => None,
};
self.code_block_buffer = Some(String::new());
self.push_event_start(Event::StartPreformatted { id: None, syntax });
}
fn handle_start_heading(&mut self, level: HeadingLevel) {
let level_u8 = match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
};
self.push_event_start(Event::StartHeading {
level: level_u8,
id: None,
});
}
fn handle_start_image(&mut self, dest_url: CowStr<'a>, title: CowStr<'a>) {
self.flush_pending_paragraph_start();
if let Some(link) = self.link.take() {
if link.started {
self.queue.push_back(Event::EndLink);
} else {
self.queue.push_back(Event::StartLink {
href: link.href,
id: None,
title: link.title,
});
self.queue.push_back(Event::EndLink);
}
}
self.image = Some(ImageBuffer {
alt_buf: String::new(),
title: if title.is_empty() {
None
} else {
Some(title.into_string())
},
url: dest_url.into_string(),
});
}
fn handle_start_link(&mut self, dest_url: CowStr<'a>, title: CowStr<'a>) {
self.link = Some(LinkBuffer {
href: dest_url.into_string(),
started: false,
title: if title.is_empty() {
None
} else {
Some(title.into_string())
},
});
}
fn handle_start_table_cell(&mut self) {
if self.in_table_head {
self.push_event_start(Event::StartTableHeader {
scope: Some(TableHeaderScope::Column),
abbr: None,
colspan: None,
rowspan: None,
id: None,
});
} else {
self.push_event_start(Event::StartTableCell {
colspan: None,
rowspan: None,
id: None,
});
}
}
fn handle_start_table_head(&mut self) {
self.in_table_head = true;
self.push_event_start(Event::StartTableRow { id: None });
}
fn handle_start_tag(&mut self, tag: Tag<'a>) {
match tag {
Tag::BlockQuote(_) => self.push_event_start(Event::StartBlockQuote { id: None }),
Tag::CodeBlock(kind) => self.handle_start_code_block(kind),
Tag::Emphasis => self.italic_depth.inc(),
Tag::Heading { level, .. } => self.handle_start_heading(level),
Tag::Image {
dest_url, title, ..
} => self.handle_start_image(dest_url, title),
Tag::Item => self.handle_item_start(),
Tag::Link {
dest_url, title, ..
} => self.handle_start_link(dest_url, title),
Tag::List(start_opt) => self.handle_list_start(start_opt),
Tag::Paragraph => self.block_state = BlockState::PendingExplicit,
Tag::Strikethrough => self.strikethrough_depth.inc(),
Tag::Strong => self.bold_depth.inc(),
Tag::Table(_) => self.push_event_start(Event::StartTable { id: None }),
Tag::TableCell => self.handle_start_table_cell(),
Tag::TableHead => self.handle_start_table_head(),
Tag::TableRow => self.push_event_start(Event::StartTableRow { id: None }),
Tag::DefinitionList
| Tag::DefinitionListDefinition
| Tag::DefinitionListTitle
| Tag::FootnoteDefinition(_)
| Tag::HtmlBlock
| Tag::MetadataBlock(_)
| Tag::Subscript
| Tag::Superscript => {}
}
}
fn handle_text(&mut self, content: String) {
if let Some(img) = &mut self.image {
img.alt_buf.push_str(&content);
} else if let Some(buf) = &mut self.code_block_buffer {
buf.push_str(&content);
} else {
self.emit_pending_link_start();
if self.block_state == BlockState::None {
self.queue.push_back(Event::StartParagraph {
alignment: None,
id: None,
});
self.block_state = BlockState::AutoParagraph;
}
self.queue.push_back(Event::Text {
content,
style: self.current_text_style(),
});
}
}
#[inline]
#[must_use]
pub fn new(markdown: &'a str) -> Self {
let options = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
let parser = Parser::new_ext(markdown, options);
Self {
block_state: BlockState::None,
bold_depth: Depth::default(),
code_block_buffer: None,
image: None,
in_table_head: false,
italic_depth: Depth::default(),
link: None,
list_stack: Vec::new(),
parser,
phase: Phase::NotStarted,
queue: VecDeque::new(),
strikethrough_depth: Depth::default(),
}
}
fn process_next_pulldown_event(&mut self) {
let Some(pm_event) = self.parser.next() else {
if self.phase != Phase::Finished {
self.phase = Phase::Finished;
self.queue.push_back(Event::EndDocument);
}
return;
};
match pm_event {
pulldown_cmark::Event::Start(tag) => self.handle_start_tag(tag),
pulldown_cmark::Event::End(tag_end) => self.handle_end_tag(tag_end),
pulldown_cmark::Event::Text(text) => self.handle_text(text.into_string()),
pulldown_cmark::Event::Code(code) => self.handle_code(code.into_string()),
pulldown_cmark::Event::HardBreak => {
if let Some(img) = &mut self.image {
img.alt_buf.push(' ');
} else if self.block_state == BlockState::PendingExplicit {
} else {
self.emit_pending_link_start();
self.queue.push_back(Event::LineBreak);
}
}
pulldown_cmark::Event::SoftBreak => {
if let Some(img) = &mut self.image {
img.alt_buf.push(' ');
} else if self.block_state == BlockState::PendingExplicit {
} else {
self.emit_pending_link_start();
self.queue.push_back(Event::SoftBreak);
}
}
pulldown_cmark::Event::Rule => {
self.queue.push_back(Event::ThematicBreak { id: None });
}
pulldown_cmark::Event::DisplayMath(_)
| pulldown_cmark::Event::FootnoteReference(_)
| pulldown_cmark::Event::Html(_)
| pulldown_cmark::Event::InlineHtml(_)
| pulldown_cmark::Event::InlineMath(_)
| pulldown_cmark::Event::TaskListMarker(_) => {}
}
}
fn push_event(&mut self, event: Event, state: BlockState) {
self.queue.push_back(event);
self.block_state = state;
}
fn push_event_end(&mut self, event: Event) {
self.push_event(event, BlockState::None);
}
fn push_event_start(&mut self, event: Event) {
self.push_event(event, BlockState::Explicit);
}
}
impl EventSource for MarkdownReader<'_> {
#[inline]
fn next_event(&mut self) -> Result<Option<Event>> {
if self.phase == Phase::NotStarted {
self.phase = Phase::Running;
return Ok(Some(Event::StartDocument {
id: None,
language: None,
metadata: None,
}));
}
if self.phase == Phase::Finished && self.queue.is_empty() {
return Ok(None);
}
while self.queue.is_empty() && self.phase != Phase::Finished {
self.process_next_pulldown_event();
}
Ok(self.queue.pop_front())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn handle_code_without_open_block_auto_opens_paragraph() {
let mut reader = MarkdownReader::new("");
reader.handle_code("code".to_string());
assert_eq!(reader.queue.len(), 2);
assert_eq!(
reader.queue.front(),
Some(&Event::StartParagraph {
alignment: None,
id: None,
})
);
assert_eq!(
reader.queue.get(1),
Some(&Event::Text {
content: "code".to_string(),
style: TextStyle::default().code(),
})
);
}
#[test]
fn handle_text_without_open_block_auto_opens_paragraph() {
let mut reader = MarkdownReader::new("");
reader.handle_text("hello".to_string());
assert_eq!(reader.queue.len(), 2);
assert_eq!(
reader.queue.front(),
Some(&Event::StartParagraph {
alignment: None,
id: None,
})
);
assert_eq!(
reader.queue.get(1),
Some(&Event::Text {
content: "hello".to_string(),
style: TextStyle::default(),
})
);
}
}