use pulldown_cmark::{Alignment, BlockQuoteKind, CodeBlockKind, Event, HeadingLevel, Tag, TagEnd};
use serde_json::{Map, Value, json};
use std::collections::HashMap;
pub fn build_ast(events: Vec<Event<'_>>) -> Vec<Value> {
AstBuilder::new(events).build()
}
struct Frame {
node: Map<String, Value>,
inline: Vec<Value>,
text: String,
}
impl Frame {
fn new(t: &str) -> Self {
let mut node = Map::new();
node.insert("t".into(), Value::String(t.to_owned()));
Self {
node,
inline: Vec::new(),
text: String::new(),
}
}
fn with(mut self, key: &str, val: Value) -> Self {
self.node.insert(key.into(), val);
self
}
}
struct AstBuilder<'a> {
events: std::vec::IntoIter<Event<'a>>,
blocks: Vec<Value>,
stack: Vec<Frame>,
in_table_head: bool,
table_headers: Vec<Value>,
table_rows: Vec<Value>,
current_row: Vec<Value>,
}
impl<'a> AstBuilder<'a> {
fn new(events: Vec<Event<'a>>) -> Self {
Self {
events: events.into_iter(),
blocks: Vec::new(),
stack: Vec::new(),
in_table_head: false,
table_headers: Vec::new(),
table_rows: Vec::new(),
current_row: Vec::new(),
}
}
fn build(mut self) -> Vec<Value> {
while let Some(event) = self.events.next() {
self.handle(event);
}
self.blocks
}
fn handle(&mut self, event: Event<'_>) {
match event {
Event::Start(tag) => self.open(tag),
Event::End(end) => self.close(end),
Event::Rule => self.push_block(json!({"t": "hr"})),
Event::TaskListMarker(checked) => {
self.push_inline(json!({"t": "task_marker", "checked": checked}));
}
Event::Text(t) => {
let s = t.as_ref();
for node in scan_inline_widgets(s) {
self.push_inline(node);
}
}
Event::Code(t) => {
self.push_inline(json!({"t": "code_span", "text": t.as_ref()}));
}
Event::InlineMath(t) => {
self.push_inline(json!({"t": "math_inline", "src": t.as_ref()}));
}
Event::DisplayMath(t) => {
self.push_block(json!({"t": "math_block", "src": t.as_ref()}));
}
Event::Html(t) | Event::InlineHtml(t) => {
let s = t.as_ref();
if self.stack.is_empty() {
self.push_block(json!({"t": "raw_html", "html": s}));
} else {
self.push_inline(json!({"t": "raw_html", "html": s}));
}
}
Event::FootnoteReference(label) => {
self.push_inline(json!({"t": "footnote_ref", "label": label.as_ref()}));
}
Event::SoftBreak => {
self.push_inline(json!({"t": "soft_break"}));
}
Event::HardBreak => {
self.push_inline(json!({"t": "hard_break"}));
}
}
}
fn open(&mut self, tag: Tag<'_>) {
match tag {
Tag::Paragraph => self.stack.push(Frame::new("paragraph")),
Tag::Heading {
level,
id,
classes,
attrs,
} => {
let mut f = Frame::new("heading");
f.node.insert("level".into(), json!(heading_level(level)));
if let Some(id) = id {
f.node.insert("id".into(), json!(id.as_ref()));
}
if !classes.is_empty() {
let cls: Vec<Value> = classes.iter().map(|c| json!(c.as_ref())).collect();
f.node.insert("classes".into(), Value::Array(cls));
}
if !attrs.is_empty() {
let mut m = Map::new();
for (k, v) in &attrs {
m.insert(k.as_ref().to_owned(), json!(v.as_deref()));
}
f.node.insert("attrs".into(), Value::Object(m));
}
self.stack.push(f);
}
Tag::BlockQuote(kind) => {
let mut f = Frame::new("blockquote");
if let Some(k) = kind {
f.node.insert("kind".into(), json!(blockquote_kind(k)));
}
self.stack.push(f);
}
Tag::CodeBlock(CodeBlockKind::Fenced(info)) => {
let (name, attrs) = parse_fence_info(info.as_ref());
let mut f = Frame::new("fenced");
f.node.insert("name".into(), json!(name));
if !attrs.is_empty() {
f.node.insert(
"attrs".into(),
Value::Object(attrs.into_iter().map(|(k, v)| (k, json!(v))).collect()),
);
}
self.stack.push(f);
}
Tag::CodeBlock(CodeBlockKind::Indented) => {
let mut f = Frame::new("fenced");
f.node.insert("name".into(), json!(""));
self.stack.push(f);
}
Tag::HtmlBlock => self.stack.push(Frame::new("html_block")),
Tag::List(start) => {
let mut f = Frame::new("list");
if let Some(n) = start {
f.node.insert("ordered".into(), json!(true));
f.node.insert("start".into(), json!(n));
} else {
f.node.insert("ordered".into(), json!(false));
}
self.stack.push(f);
}
Tag::Item => self.stack.push(Frame::new("list_item")),
Tag::FootnoteDefinition(label) => {
let f = Frame::new("footnote_def").with("label", json!(label.as_ref()));
self.stack.push(f);
}
Tag::Table(alignments) => {
let aligns: Vec<Value> = alignments.iter().map(|a| json!(col_align(*a))).collect();
let mut f = Frame::new("table");
f.node.insert("align".into(), Value::Array(aligns));
self.table_headers.clear();
self.table_rows.clear();
self.stack.push(f);
}
Tag::TableHead => {
self.in_table_head = true;
}
Tag::TableRow => {
self.current_row.clear();
}
Tag::TableCell => self.stack.push(Frame::new("_cell")),
Tag::DefinitionList => self.stack.push(Frame::new("definition_list")),
Tag::DefinitionListTitle => self.stack.push(Frame::new("_def_title")),
Tag::DefinitionListDefinition => self.stack.push(Frame::new("_def_body")),
Tag::Emphasis => self.stack.push(Frame::new("em")),
Tag::Strong => self.stack.push(Frame::new("strong")),
Tag::Strikethrough => self.stack.push(Frame::new("del")),
Tag::Superscript => self.stack.push(Frame::new("sup")),
Tag::Subscript => self.stack.push(Frame::new("sub")),
Tag::Link {
dest_url, title, ..
} => {
let f = Frame::new("link")
.with("href", json!(dest_url.as_ref()))
.with("title", json!(title.as_ref()));
self.stack.push(f);
}
Tag::Image {
dest_url, title, ..
} => {
let f = Frame::new("image")
.with("src", json!(dest_url.as_ref()))
.with("title", json!(title.as_ref()));
self.stack.push(f);
}
Tag::MetadataBlock(_) => {} }
}
fn close(&mut self, end: TagEnd) {
match end {
TagEnd::Emphasis
| TagEnd::Strong
| TagEnd::Strikethrough
| TagEnd::Superscript
| TagEnd::Subscript => {
if let Some(mut f) = self.stack.pop() {
f.node.insert(
"children".into(),
Value::Array(f.inline.drain(..).collect()),
);
let node = Value::Object(f.node);
self.push_inline(node);
}
}
TagEnd::Link | TagEnd::Image => {
if let Some(mut f) = self.stack.pop() {
f.node.insert(
"children".into(),
Value::Array(f.inline.drain(..).collect()),
);
let node = Value::Object(f.node);
self.push_inline(node);
}
}
TagEnd::Paragraph
| TagEnd::Heading(_)
| TagEnd::BlockQuote(_)
| TagEnd::Item
| TagEnd::FootnoteDefinition => {
if let Some(mut f) = self.stack.pop() {
let children = coalesce_and_scan_widgets(f.inline.drain(..).collect());
f.node.insert("children".into(), Value::Array(children));
let node = Value::Object(f.node);
self.flush_block(node);
}
}
TagEnd::CodeBlock => {
if let Some(mut f) = self.stack.pop() {
f.node.insert("raw".into(), json!(f.text.trim_end()));
let node = Value::Object(f.node);
self.flush_block(node);
}
}
TagEnd::HtmlBlock => {
if let Some(mut f) = self.stack.pop() {
f.node.insert("html".into(), json!(f.text));
let node = Value::Object(f.node);
self.flush_block(node);
}
}
TagEnd::List(_) => {
if let Some(mut f) = self.stack.pop() {
f.node
.insert("items".into(), Value::Array(f.inline.drain(..).collect()));
let node = Value::Object(f.node);
self.flush_block(node);
}
}
TagEnd::TableCell => {
if let Some(mut f) = self.stack.pop() {
f.node.insert(
"children".into(),
Value::Array(f.inline.drain(..).collect()),
);
let cell = Value::Object(f.node);
if self.in_table_head {
self.table_headers.push(cell);
} else {
self.current_row.push(cell);
}
}
}
TagEnd::TableHead => {
self.in_table_head = false;
}
TagEnd::TableRow => {
if !self.current_row.is_empty() {
let row = Value::Array(self.current_row.drain(..).collect());
self.table_rows.push(row);
}
}
TagEnd::Table => {
if let Some(mut f) = self.stack.pop() {
f.node.insert(
"headers".into(),
Value::Array(self.table_headers.drain(..).collect()),
);
f.node.insert(
"rows".into(),
Value::Array(self.table_rows.drain(..).collect()),
);
let node = Value::Object(f.node);
self.flush_block(node);
}
}
TagEnd::DefinitionListTitle | TagEnd::DefinitionListDefinition => {
if let Some(mut f) = self.stack.pop() {
f.node.insert(
"children".into(),
Value::Array(f.inline.drain(..).collect()),
);
let node = Value::Object(f.node);
self.push_inline(node);
}
}
TagEnd::DefinitionList => {
if let Some(mut f) = self.stack.pop() {
f.node
.insert("items".into(), Value::Array(f.inline.drain(..).collect()));
let node = Value::Object(f.node);
self.flush_block(node);
}
}
TagEnd::MetadataBlock(_) => {} }
}
fn push_inline(&mut self, node: Value) {
if let Some(f) = self.stack.last_mut() {
match f.node.get("t").and_then(|v| v.as_str()) {
Some("fenced") | Some("html_block") => {
if let Value::Object(ref obj) = node
&& let Some(Value::String(s)) = obj.get("text")
{
f.text.push_str(s);
return;
}
if let Value::Object(ref obj) = node
&& let Some(Value::String(t)) = obj.get("t")
&& (t == "soft_break" || t == "hard_break")
{
f.text.push('\n');
return;
}
}
_ => {}
}
f.inline.push(node);
} else {
self.blocks.push(node);
}
}
fn flush_block(&mut self, node: Value) {
if let Some(f) = self.stack.last_mut() {
f.inline.push(node);
} else {
self.blocks.push(node);
}
}
fn push_block(&mut self, node: Value) {
self.blocks.push(node);
}
}
fn heading_level(level: HeadingLevel) -> u8 {
match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
}
}
fn blockquote_kind(k: BlockQuoteKind) -> &'static str {
match k {
BlockQuoteKind::Note => "note",
BlockQuoteKind::Tip => "tip",
BlockQuoteKind::Important => "important",
BlockQuoteKind::Warning => "warning",
BlockQuoteKind::Caution => "caution",
}
}
fn col_align(a: Alignment) -> &'static str {
match a {
Alignment::Left => "left",
Alignment::Right => "right",
Alignment::Center => "center",
Alignment::None => "none",
}
}
pub fn parse_fence_info(info: &str) -> (String, HashMap<String, Value>) {
let tokens = tokenize_attrs(info);
let mut iter = tokens.into_iter();
let name = iter.next().unwrap_or_default();
let mut attrs = HashMap::new();
for token in iter {
if let Some((k, v)) = token.split_once('=') {
attrs.insert(k.to_owned(), json!(v));
} else {
attrs.insert(token, json!(true));
}
}
(name, attrs)
}
fn parse_attr_block(s: &str) -> HashMap<String, Value> {
let mut attrs = HashMap::new();
for token in tokenize_attrs(s) {
if let Some((k, v)) = token.split_once('=') {
attrs.insert(k.to_owned(), json!(v));
} else {
attrs.insert(token, json!(true));
}
}
attrs
}
fn tokenize_attrs(s: &str) -> Vec<String> {
let mut tokens: Vec<String> = Vec::new();
let mut current = String::new();
let mut in_quote = false;
for ch in s.chars() {
match ch {
'"' => {
in_quote = !in_quote;
}
' ' | '\t' if !in_quote => {
if !current.is_empty() {
tokens.push(std::mem::take(&mut current));
}
}
_ => current.push(ch),
}
}
if !current.is_empty() {
tokens.push(current);
}
tokens
}
fn parse_inline_widget(s: &str) -> Option<(Value, usize)> {
let rest = s.strip_prefix(":[")?;
let bracket_end = rest.find("]{")?;
let text = &rest[..bracket_end];
let after = &rest[bracket_end + 2..];
let brace_end = after.find('}')?;
let attrs_str = &after[..brace_end];
let (name, attrs) = parse_fence_info(attrs_str);
if name.is_empty() {
return None;
}
let consumed = 2 + bracket_end + 2 + brace_end + 1; Some((
json!({
"t": "widget",
"name": name,
"text": text,
"attrs": attrs
}),
consumed,
))
}
fn scan_inline_widgets(s: &str) -> Vec<Value> {
let mut result = Vec::new();
let mut remaining = s;
while !remaining.is_empty() {
if let Some(pos) = remaining.find(":[") {
if pos > 0 {
result.push(json!({"t": "text", "text": &remaining[..pos]}));
}
let candidate = &remaining[pos..];
if let Some((widget, consumed)) = parse_inline_widget(candidate) {
result.push(widget);
remaining = &remaining[pos + consumed..];
} else {
result.push(json!({"t": "text", "text": &remaining[..pos + 2]}));
remaining = &remaining[pos + 2..];
}
} else {
result.push(json!({"t": "text", "text": remaining}));
break;
}
}
result
}
fn coalesce_and_scan_widgets(children: Vec<Value>) -> Vec<Value> {
let mut pass1: Vec<Value> = Vec::new();
let mut text_buf = String::new();
let flush_text = |buf: &mut String, out: &mut Vec<Value>| {
if !buf.is_empty() {
for node in scan_inline_widgets(buf) {
out.push(node);
}
buf.clear();
}
};
for child in children {
match child.get("t").and_then(|v| v.as_str()) {
Some("text") => {
if let Some(s) = child.get("text").and_then(|v| v.as_str()) {
text_buf.push_str(s);
}
}
_ => {
flush_text(&mut text_buf, &mut pass1);
pass1.push(child);
}
}
}
flush_text(&mut text_buf, &mut pass1);
let mut result: Vec<Value> = Vec::new();
let mut iter = pass1.into_iter().peekable();
while let Some(mut node) = iter.next() {
let t = node
.get("t")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_owned();
if matches!(t.as_str(), "link" | "image") {
let maybe_attrs: Option<(String, String)> = iter.peek().and_then(|next| {
let text = next.get("text")?.as_str()?;
let attrs_block = text.strip_prefix('{')?;
let end = attrs_block.find('}')?;
Some((
attrs_block[..end].to_owned(),
attrs_block[end + 1..].to_owned(),
))
});
if let Some((attrs_str, remainder)) = maybe_attrs {
let attrs = parse_attr_block(&attrs_str);
if !attrs.is_empty() {
if let Value::Object(ref mut m) = node {
let existing = m.get("attrs").cloned();
let mut merged = match existing {
Some(Value::Object(e)) => e,
_ => serde_json::Map::new(),
};
merged.extend(attrs);
m.insert("attrs".into(), Value::Object(merged));
}
iter.next(); if !remainder.trim().is_empty() {
result.push(node);
result.push(json!({"t": "text", "text": remainder}));
continue;
}
}
}
}
result.push(node);
}
result
}