use devup_editor_core::{Block, BlockId, Document, IdGenerator, Mark, TextSpan};
use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
use serde_json::{Map, Value};
pub fn parse_markdown(src: &str, id_gen: &mut dyn IdGenerator) -> Document {
let mut doc = Document::new();
let mut builder = DocBuilder::new(&mut doc, id_gen);
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TASKLISTS);
opts.insert(Options::ENABLE_STRIKETHROUGH);
let parser = Parser::new_ext(src, opts);
for event in parser {
builder.handle(event);
}
builder.finish();
doc
}
#[derive(Default)]
struct InlineState {
bold: u32,
italic: u32,
code: u32,
strike: u32,
html_marks: Vec<Vec<Mark>>,
spans: Vec<TextSpan>,
}
impl InlineState {
fn current_marks(&self) -> Vec<Mark> {
let mut marks = Vec::new();
if self.bold > 0 {
marks.push(Mark::new("bold"));
}
if self.italic > 0 {
marks.push(Mark::new("italic"));
}
if self.code > 0 {
marks.push(Mark::new("code"));
}
if self.strike > 0 {
marks.push(Mark::new("strike"));
}
for html_marks in &self.html_marks {
marks.extend(html_marks.clone());
}
marks
}
fn push_text(&mut self, text: &str) {
if text.is_empty() {
return;
}
let marks = self.current_marks();
if let Some(last) = self.spans.last_mut() {
if last.marks == marks {
last.text.push_str(text);
return;
}
}
self.spans.push(TextSpan {
text: text.to_string(),
marks,
});
}
fn take(&mut self) -> Vec<TextSpan> {
std::mem::take(&mut self.spans)
}
}
enum CurrentBlock {
None,
Paragraph,
Heading,
Code {
language: Option<String>,
},
Quote,
ListItem {
ordered: bool,
indent: usize,
todo: Option<bool>,
},
}
struct DocBuilder<'a> {
doc: &'a mut Document,
id_gen: &'a mut dyn IdGenerator,
inline: InlineState,
current: CurrentBlock,
list_depth: usize,
list_ordered_stack: Vec<bool>,
}
impl<'a> DocBuilder<'a> {
fn new(doc: &'a mut Document, id_gen: &'a mut dyn IdGenerator) -> Self {
Self {
doc,
id_gen,
inline: InlineState::default(),
current: CurrentBlock::None,
list_depth: 0,
list_ordered_stack: Vec::new(),
}
}
fn handle(&mut self, event: Event<'_>) {
match event {
Event::Start(tag) => self.start(tag),
Event::End(tag) => self.end(tag),
Event::Text(t) => self.inline.push_text(&t),
Event::Html(html) | Event::InlineHtml(html) => self.handle_inline_html(&html),
Event::Code(t) => {
self.inline.code += 1;
self.inline.push_text(&t);
self.inline.code -= 1;
}
Event::SoftBreak | Event::HardBreak => self.inline.push_text("\n"),
Event::TaskListMarker(checked) => {
if let CurrentBlock::ListItem { todo, .. } = &mut self.current {
*todo = Some(checked);
}
}
_ => {}
}
}
fn start(&mut self, tag: Tag<'_>) {
match tag {
Tag::Paragraph => {
if matches!(self.current, CurrentBlock::None) {
self.current = CurrentBlock::Paragraph;
}
}
Tag::Heading { .. } => {
self.current = CurrentBlock::Heading;
}
Tag::CodeBlock(kind) => {
let language = match kind {
pulldown_cmark::CodeBlockKind::Fenced(lang) if !lang.is_empty() => {
Some(lang.into_string())
}
_ => None,
};
self.current = CurrentBlock::Code { language };
}
Tag::BlockQuote(_) => {
self.current = CurrentBlock::Quote;
}
Tag::List(start) => {
self.list_depth += 1;
self.list_ordered_stack.push(start.is_some());
}
Tag::Item => {
let ordered = *self.list_ordered_stack.last().unwrap_or(&false);
self.current = CurrentBlock::ListItem {
ordered,
indent: self.list_depth.saturating_sub(1),
todo: None,
};
}
Tag::Strong => self.inline.bold += 1,
Tag::Emphasis => self.inline.italic += 1,
Tag::Strikethrough => self.inline.strike += 1,
_ => {}
}
}
fn end(&mut self, tag: TagEnd) {
match tag {
TagEnd::Paragraph => {
if matches!(self.current, CurrentBlock::Paragraph) {
self.flush_block("paragraph", Map::new());
}
}
TagEnd::Heading(level) => {
if matches!(self.current, CurrentBlock::Heading) {
let mut props = Map::new();
props.insert("level".into(), Value::from(u64::from(heading_level(level))));
self.flush_block("heading", props);
}
}
TagEnd::CodeBlock => {
if let CurrentBlock::Code { language } = &self.current {
let mut props = Map::new();
if let Some(lang) = language.clone() {
props.insert("language".into(), Value::String(lang));
}
self.flush_block("code", props);
}
}
TagEnd::BlockQuote(_) => {
self.flush_block("quote", Map::new());
}
TagEnd::List(_) => {
self.list_depth = self.list_depth.saturating_sub(1);
self.list_ordered_stack.pop();
}
TagEnd::Item => {
if let CurrentBlock::ListItem {
ordered,
indent,
todo,
} = self.current
{
let mut props = Map::new();
let block_type = if let Some(checked) = todo {
props.insert("checked".into(), Value::Bool(checked));
if indent > 0 {
props.insert(
"indent".into(),
Value::from(i64::try_from(indent).unwrap_or(i64::MAX)),
);
}
"todo"
} else {
let style = if ordered { "ordered" } else { "unordered" };
props.insert("style".into(), Value::String(style.into()));
if indent > 0 {
props.insert(
"indent".into(),
Value::from(i64::try_from(indent).unwrap_or(i64::MAX)),
);
}
"list"
};
self.flush_block(block_type, props);
}
}
TagEnd::Strong => self.inline.bold = self.inline.bold.saturating_sub(1),
TagEnd::Emphasis => self.inline.italic = self.inline.italic.saturating_sub(1),
TagEnd::Strikethrough => self.inline.strike = self.inline.strike.saturating_sub(1),
_ => {}
}
}
fn flush_block(&mut self, ty: &str, props: Map<String, Value>) {
let spans = self.inline.take();
let id = BlockId::new(self.id_gen.next_id().to_string());
let mut block = Block::new(id, ty);
block.content = spans;
block.props = props;
self.doc.push_root_block(block);
self.current = CurrentBlock::None;
}
fn finish(&mut self) {
if !matches!(self.current, CurrentBlock::None) {
self.flush_block("paragraph", Map::new());
}
}
fn handle_inline_html(&mut self, html: &str) {
match parse_html_span_marks(html) {
HtmlSpanMarks::Push(marks) => self.inline.html_marks.push(marks),
HtmlSpanMarks::Pop => {
self.inline.html_marks.pop();
}
HtmlSpanMarks::Ignore => {}
}
}
}
enum HtmlSpanMarks {
Push(Vec<Mark>),
Pop,
Ignore,
}
fn parse_html_span_marks(html: &str) -> HtmlSpanMarks {
let trimmed = html.trim();
let lower = trimmed.to_lowercase();
if lower.starts_with("</span") {
return HtmlSpanMarks::Pop;
}
if !lower.starts_with("<span") {
return HtmlSpanMarks::Ignore;
}
let Some(style_attr) = extract_html_attr(trimmed, "style") else {
return HtmlSpanMarks::Ignore;
};
let mut style_map = Map::new();
for decl in style_attr.split(';') {
let Some((raw_key, raw_value)) = decl.split_once(':') else {
continue;
};
let key = raw_key.trim().to_ascii_lowercase();
let value = raw_value.trim();
if value.is_empty() {
continue;
}
match key.as_str() {
"color" => {
style_map.insert("color".into(), Value::String(value.into()));
}
"background-color" => {
style_map.insert("backgroundColor".into(), Value::String(value.into()));
}
_ => {}
}
}
if style_map.is_empty() {
return HtmlSpanMarks::Ignore;
}
let mut marks = Vec::new();
if let Some(color) = style_map.get("color").cloned() {
let mut attrs = Map::new();
let mut style = Map::new();
style.insert("color".into(), color);
attrs.insert("style".into(), Value::Object(style));
marks.push(Mark::with_attrs("color", attrs));
}
if let Some(background) = style_map.get("backgroundColor").cloned() {
let mut attrs = Map::new();
let mut style = Map::new();
style.insert("backgroundColor".into(), background);
attrs.insert("style".into(), Value::Object(style));
marks.push(Mark::with_attrs("highlight", attrs));
}
if marks.is_empty() {
HtmlSpanMarks::Ignore
} else {
HtmlSpanMarks::Push(marks)
}
}
fn extract_html_attr(tag: &str, attr_name: &str) -> Option<String> {
let lower = tag.to_ascii_lowercase();
let needle = format!("{attr_name}=");
let start = lower.find(&needle)? + needle.len();
let rest = &tag[start..];
let quote = rest.chars().next()?;
if quote != '"' && quote != '\'' {
return None;
}
let rest = &rest[1..];
let end = rest.find(quote)?;
Some(rest[..end].to_string())
}
fn heading_level(level: HeadingLevel) -> u8 {
match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
}
}
#[cfg(test)]
mod tests {
use super::*;
use devup_editor_core::SequentialIdGenerator;
fn parse(md: &str) -> Document {
let mut id_gen = SequentialIdGenerator::new("test");
parse_markdown(md, &mut id_gen)
}
#[test]
fn parse_paragraph() {
let doc = parse("hello world");
assert_eq!(doc.root_block_count(), 1);
let block = doc.get_block(&BlockId::new("test-1")).unwrap();
assert_eq!(block.ty, "paragraph");
assert_eq!(block.plain_text(), "hello world");
}
#[test]
fn parse_heading() {
let doc = parse("# Title");
let block = doc.get_block(&BlockId::new("test-1")).unwrap();
assert_eq!(block.ty, "heading");
assert_eq!(
block.props.get("level").and_then(serde_json::Value::as_u64),
Some(1)
);
}
#[test]
fn parse_bold_italic() {
let doc = parse("**bold** and *italic*");
let block = doc.get_block(&BlockId::new("test-1")).unwrap();
assert!(
block
.content
.iter()
.any(|s| s.marks.iter().any(|m| m.ty == "bold"))
);
assert!(
block
.content
.iter()
.any(|s| s.marks.iter().any(|m| m.ty == "italic"))
);
}
#[test]
fn parse_inline_html_color_and_highlight_marks() {
let doc = parse("<span style=\"color:#ff0000;background-color:#fff000\">가나</span>");
let block = doc.get_block(&BlockId::new("test-1")).unwrap();
assert_eq!(block.content.len(), 1);
let marks = &block.content[0].marks;
assert!(marks.iter().any(|m| m.ty == "color"));
assert!(marks.iter().any(|m| m.ty == "highlight"));
}
#[test]
fn parse_todo() {
let doc = parse("- [x] done\n- [ ] todo");
assert_eq!(doc.root_block_count(), 2);
let first = doc.get_block(&BlockId::new("test-1")).unwrap();
assert_eq!(first.ty, "todo");
assert_eq!(
first
.props
.get("checked")
.and_then(serde_json::Value::as_bool),
Some(true)
);
}
#[test]
fn parse_nested_todo_preserves_indent() {
let doc = parse("- [ ] parent\n - [x] child");
let block = doc
.root_block_ids()
.iter()
.filter_map(|id| doc.get_block(id))
.find(|block| {
block.ty == "todo"
&& block
.props
.get("indent")
.and_then(serde_json::Value::as_i64)
== Some(1)
&& block
.props
.get("checked")
.and_then(serde_json::Value::as_bool)
== Some(true)
})
.unwrap();
assert_eq!(block.ty, "todo");
assert_eq!(
block
.props
.get("indent")
.and_then(serde_json::Value::as_i64),
Some(1)
);
assert_eq!(
block
.props
.get("checked")
.and_then(serde_json::Value::as_bool),
Some(true)
);
}
#[test]
fn parse_fenced_code_block() {
let doc = parse("```rust\nfn main() {}\n```");
let block = doc.get_block(&BlockId::new("test-1")).unwrap();
assert_eq!(block.ty, "code");
assert_eq!(
block.props.get("language").and_then(|v| v.as_str()),
Some("rust")
);
assert!(block.plain_text().contains("fn main"));
}
#[test]
fn parse_code_block_without_language() {
let doc = parse("```\nplain code\n```");
let block = doc.get_block(&BlockId::new("test-1")).unwrap();
assert_eq!(block.ty, "code");
assert!(block.props.get("language").is_none());
}
}