use crate::error::Error;
use pulldown_cmark::{CodeBlockKind, Event, Options as CmarkOptions, Parser as CmarkParser};
use serde_yaml;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct ParseOptions {
pub gfm: bool,
pub smart_punctuation: bool,
pub frontmatter: bool,
pub custom_components: bool,
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
gfm: true,
smart_punctuation: true,
frontmatter: true,
custom_components: true,
}
}
}
#[derive(Debug)]
pub struct ParsedDocument {
pub ast: Vec<Node>,
pub frontmatter: Option<HashMap<String, serde_yaml::Value>>,
}
#[derive(Debug, Clone)]
pub enum Node {
Heading {
level: u8,
content: String,
id: String,
},
Paragraph(Vec<InlineNode>),
BlockQuote(Vec<Node>),
CodeBlock {
language: Option<String>,
content: String,
attributes: HashMap<String, String>,
},
List {
ordered: bool,
items: Vec<Vec<Node>>,
},
ThematicBreak,
Component {
name: String,
attributes: HashMap<String, String>,
children: Vec<Node>,
},
Html(String),
Table {
headers: Vec<Vec<InlineNode>>,
rows: Vec<Vec<Vec<InlineNode>>>,
alignments: Vec<Alignment>,
},
}
impl Node {
pub fn name(&self) -> &str {
match self {
Node::Component { name, .. } => name,
_ => "",
}
}
pub fn attributes(&self) -> HashMap<String, String> {
match self {
Node::Component { attributes, .. } => attributes.clone(),
_ => HashMap::new(),
}
}
pub fn children(&self) -> Vec<Node> {
match self {
Node::Component { children, .. } => children.clone(),
_ => Vec::new(),
}
}
}
#[derive(Debug, Clone)]
pub enum InlineNode {
Text(String),
Emphasis(Vec<InlineNode>),
Strong(Vec<InlineNode>),
Strikethrough(Vec<InlineNode>),
Link {
text: Vec<InlineNode>,
url: String,
title: Option<String>,
},
Image {
alt: String,
url: String,
title: Option<String>,
},
Code(String),
LineBreak,
Html(String),
}
#[derive(Debug, Clone, Copy)]
pub enum Alignment {
None,
Left,
Center,
Right,
}
pub fn parse(markdown: &str, options: &ParseOptions) -> Result<ParsedDocument, Error> {
let mut frontmatter = None;
let mut content = markdown.to_string();
if options.frontmatter && content.starts_with("---") {
if let Some((yaml, rest)) = extract_frontmatter(&content) {
frontmatter = parse_yaml_frontmatter(yaml)?;
content = rest.to_string();
}
}
let mut cmark_options = CmarkOptions::empty();
if options.gfm {
cmark_options.insert(CmarkOptions::ENABLE_TABLES);
cmark_options.insert(CmarkOptions::ENABLE_STRIKETHROUGH);
cmark_options.insert(CmarkOptions::ENABLE_TASKLISTS);
}
if options.smart_punctuation {
cmark_options.insert(CmarkOptions::ENABLE_SMART_PUNCTUATION);
}
let parser = CmarkParser::new_ext(&content, cmark_options);
let ast = process_events(parser, options)?;
Ok(ParsedDocument { ast, frontmatter })
}
fn extract_frontmatter(content: &str) -> Option<(&str, &str)> {
let rest = content.strip_prefix("---")?;
let end_index = rest.find("\n---")?;
let yaml = &rest[..end_index];
let content_start = end_index + 5;
if content_start < rest.len() {
Some((yaml, &rest[content_start..]))
} else {
Some((yaml, ""))
}
}
fn parse_yaml_frontmatter(yaml: &str) -> Result<Option<HashMap<String, serde_yaml::Value>>, Error> {
let frontmatter: HashMap<String, serde_yaml::Value> = serde_yaml::from_str(yaml)?;
if frontmatter.is_empty() {
Ok(None)
} else {
Ok(Some(frontmatter))
}
}
fn process_events<'a, I>(events: I, options: &ParseOptions) -> Result<Vec<Node>, Error>
where
I: Iterator<Item = Event<'a>>,
{
let mut nodes = Vec::new();
let mut current_node: Option<Node> = None;
let mut current_inline_nodes: Vec<InlineNode> = Vec::new();
let mut list_stack: Vec<(bool, Vec<Vec<Node>>)> = Vec::new();
let mut block_quote_stack: Vec<Vec<Node>> = Vec::new();
let mut link_stack: Vec<(String, Option<String>, Vec<InlineNode>)> = Vec::new();
let mut component_stack: Vec<(String, HashMap<String, String>, Vec<Node>)> = Vec::new();
let mut table_headers: Vec<Vec<InlineNode>> = Vec::new();
let mut table_alignments: Vec<Alignment> = Vec::new();
let mut table_rows: Vec<Vec<Vec<InlineNode>>> = Vec::new();
let mut in_table_head = false;
let mut in_table_row = false;
let mut current_table_row: Vec<Vec<InlineNode>> = Vec::new();
let mut current_table_cell: Vec<InlineNode> = Vec::new();
let mut _in_emphasis = false;
let mut _in_strong = false;
let mut _in_strikethrough = false;
use pulldown_cmark::{Event, Tag};
let mut events = events.peekable();
while let Some(event) = events.next() {
match event {
Event::Start(Tag::Paragraph) => {
current_inline_nodes = Vec::new();
}
Event::End(Tag::Paragraph) => {
if !current_inline_nodes.is_empty() {
let node = Node::Paragraph(current_inline_nodes.clone());
current_inline_nodes.clear();
if !block_quote_stack.is_empty() {
let last_idx = block_quote_stack.len() - 1;
block_quote_stack[last_idx].push(node);
} else if !list_stack.is_empty() {
let last_list_idx = list_stack.len() - 1;
if let Some(last_item) = list_stack[last_list_idx].1.last_mut() {
last_item.push(node);
}
} else if !component_stack.is_empty() {
let last_idx = component_stack.len() - 1;
component_stack[last_idx].2.push(node);
} else {
nodes.push(node);
}
}
}
Event::Start(Tag::Heading(level, _, _)) => {
current_inline_nodes = Vec::new();
current_node = Some(Node::Heading {
level: level as u8,
content: String::new(),
id: String::new(),
});
}
Event::End(Tag::Heading(..)) => {
if let Some(Node::Heading { level, .. }) = current_node {
let mut content = String::new();
for node in ¤t_inline_nodes {
match node {
InlineNode::Text(text) => content.push_str(text),
InlineNode::Code(code) => content.push_str(code),
_ => {} }
}
let id = content
.to_lowercase()
.replace(|c: char| !c.is_alphanumeric(), "-")
.replace("--", "-")
.trim_matches('-')
.to_string();
let heading = Node::Heading { level, content, id };
if !block_quote_stack.is_empty() {
let last_idx = block_quote_stack.len() - 1;
block_quote_stack[last_idx].push(heading);
} else if !component_stack.is_empty() {
let last_idx = component_stack.len() - 1;
component_stack[last_idx].2.push(heading);
} else {
nodes.push(heading);
}
current_node = None;
current_inline_nodes.clear();
}
}
Event::Start(Tag::BlockQuote) => {
block_quote_stack.push(Vec::new());
}
Event::End(Tag::BlockQuote) => {
if let Some(quote_nodes) = block_quote_stack.pop() {
let node = Node::BlockQuote(quote_nodes);
if !block_quote_stack.is_empty() {
let last_idx = block_quote_stack.len() - 1;
block_quote_stack[last_idx].push(node);
} else if !component_stack.is_empty() {
let last_idx = component_stack.len() - 1;
component_stack[last_idx].2.push(node);
} else {
nodes.push(node);
}
}
}
Event::Start(Tag::CodeBlock(kind)) => {
let mut language = None;
let attributes = HashMap::new();
if let CodeBlockKind::Fenced(lang) = kind {
let lang_str = lang.to_string();
if !lang_str.is_empty() {
language = Some(lang_str);
}
}
current_node = Some(Node::CodeBlock {
language,
content: String::new(),
attributes,
});
}
Event::End(Tag::CodeBlock(_)) => {
if let Some(node) = current_node.take() {
if !block_quote_stack.is_empty() {
let last_idx = block_quote_stack.len() - 1;
block_quote_stack[last_idx].push(node);
} else if !component_stack.is_empty() {
let last_idx = component_stack.len() - 1;
component_stack[last_idx].2.push(node);
} else {
nodes.push(node);
}
}
}
Event::Start(Tag::List(first_item_number)) => {
list_stack.push((first_item_number.is_some(), Vec::new()));
}
Event::End(Tag::List(_)) => {
if let Some((ordered, items)) = list_stack.pop() {
let node = Node::List { ordered, items };
if !block_quote_stack.is_empty() {
let last_idx = block_quote_stack.len() - 1;
block_quote_stack[last_idx].push(node);
} else if !list_stack.is_empty() {
let last_list_idx = list_stack.len() - 1;
if let Some(last_item) = list_stack[last_list_idx].1.last_mut() {
last_item.push(node);
}
} else if !component_stack.is_empty() {
let last_idx = component_stack.len() - 1;
component_stack[last_idx].2.push(node);
} else {
nodes.push(node);
}
}
}
Event::Start(Tag::Item) => {
if !list_stack.is_empty() {
let last_idx = list_stack.len() - 1;
list_stack[last_idx].1.push(Vec::new());
}
}
Event::End(Tag::Item) => {
}
Event::Text(text) => {
if let Some(Node::CodeBlock {
ref mut content, ..
}) = current_node
{
content.push_str(&text);
} else {
current_inline_nodes.push(InlineNode::Text(text.to_string()));
}
}
Event::Code(code) => {
current_inline_nodes.push(InlineNode::Code(code.to_string()));
}
Event::Html(html) => {
let html_str = html.to_string();
if options.custom_components && html_str.trim().starts_with("::") {
if html_str.trim().starts_with(":::") {
if let Some(component_name) = parse_component_start(&html_str) {
let attributes =
extract_component_attributes(&html_str).unwrap_or_default();
if !component_stack.is_empty() {
let child_component = (component_name, attributes, Vec::new());
let last_idx = component_stack.len() - 1;
component_stack[last_idx].2.push(Node::Component {
name: child_component.0.clone(),
attributes: child_component.1.clone(),
children: Vec::new(),
});
component_stack.push(child_component);
}
}
} else if let Some(component_name) = parse_component_start(&html_str) {
let attributes =
extract_component_attributes(&html_str).unwrap_or_default();
component_stack.push((component_name, attributes, Vec::new()));
} else if html_str.trim() == "::" || html_str.trim() == ":::" {
if let Some((name, attributes, children)) = component_stack.pop() {
let node = Node::Component {
name,
attributes,
children,
};
if !component_stack.is_empty() {
let last_idx = component_stack.len() - 1;
if let Some(Node::Component {
name: child_name,
attributes: child_attrs,
children: child_children,
}) = component_stack[last_idx].2.last_mut()
{
if child_name == node.name()
&& *child_attrs == node.attributes()
{
*child_children = node.children();
continue;
}
}
component_stack[last_idx].2.push(node);
} else if !block_quote_stack.is_empty() {
let last_idx = block_quote_stack.len() - 1;
block_quote_stack[last_idx].push(node);
} else {
nodes.push(node);
}
}
} else {
nodes.push(Node::Html(html_str));
}
} else {
nodes.push(Node::Html(html_str));
}
}
Event::Start(Tag::Emphasis) => {
let mut emphasis_nodes = Vec::new();
for next_event in events.by_ref() {
match next_event {
Event::Text(text) => {
emphasis_nodes.push(InlineNode::Text(text.to_string()));
}
Event::End(Tag::Emphasis) => {
break;
}
_ => {} }
}
current_inline_nodes.push(InlineNode::Emphasis(emphasis_nodes));
}
Event::Start(Tag::Strong) => {
let mut strong_nodes = Vec::new();
for next_event in events.by_ref() {
match next_event {
Event::Text(text) => {
strong_nodes.push(InlineNode::Text(text.to_string()));
}
Event::End(Tag::Strong) => {
break;
}
_ => {} }
}
current_inline_nodes.push(InlineNode::Strong(strong_nodes));
}
Event::Start(Tag::Strikethrough) => {
let mut strikethrough_nodes = Vec::new();
for next_event in events.by_ref() {
match next_event {
Event::Text(text) => {
strikethrough_nodes.push(InlineNode::Text(text.to_string()));
}
Event::End(Tag::Strikethrough) => {
break;
}
_ => {} }
}
current_inline_nodes.push(InlineNode::Strikethrough(strikethrough_nodes));
}
Event::Start(Tag::Link(_link_type, url, title)) => {
let url_str = url.to_string();
let title_opt = if title.is_empty() {
None
} else {
Some(title.to_string())
};
link_stack.push((url_str, title_opt, Vec::new()));
}
Event::End(Tag::Link(_, _, _)) => {
if let Some((url, title, text)) = link_stack.pop() {
current_inline_nodes.push(InlineNode::Link { url, title, text });
}
}
Event::Start(Tag::Image(_link_type, url, title)) => {
let url_str = url.to_string();
let title_opt = if title.is_empty() {
None
} else {
Some(title.to_string())
};
if let Some(Event::Text(alt)) = events.next() {
current_inline_nodes.push(InlineNode::Image {
url: url_str,
title: title_opt,
alt: alt.to_string(),
});
} else {
current_inline_nodes.push(InlineNode::Image {
url: url_str,
title: title_opt,
alt: String::new(),
});
}
events.next();
}
Event::SoftBreak | Event::HardBreak => {
current_inline_nodes.push(InlineNode::LineBreak);
}
Event::Start(Tag::Table(alignments)) => {
table_headers = Vec::new();
table_rows = Vec::new();
table_alignments = alignments
.iter()
.map(|a| match a {
pulldown_cmark::Alignment::None => Alignment::None,
pulldown_cmark::Alignment::Left => Alignment::Left,
pulldown_cmark::Alignment::Center => Alignment::Center,
pulldown_cmark::Alignment::Right => Alignment::Right,
})
.collect();
}
Event::End(Tag::Table(_)) => {
let node = Node::Table {
headers: table_headers.clone(),
rows: table_rows.clone(),
alignments: table_alignments.clone(),
};
if !block_quote_stack.is_empty() {
let last_idx = block_quote_stack.len() - 1;
block_quote_stack[last_idx].push(node);
} else if !component_stack.is_empty() {
let last_idx = component_stack.len() - 1;
component_stack[last_idx].2.push(node);
} else {
nodes.push(node);
}
table_headers.clear();
table_rows.clear();
table_alignments.clear();
}
Event::Start(Tag::TableHead) => {
in_table_head = true;
}
Event::End(Tag::TableHead) => {
in_table_head = false;
}
Event::Start(Tag::TableRow) => {
in_table_row = true;
current_table_row = Vec::new();
}
Event::End(Tag::TableRow) => {
in_table_row = false;
if !current_table_row.is_empty() {
if in_table_head {
table_headers = current_table_row.clone();
} else {
table_rows.push(current_table_row.clone());
}
current_table_row.clear();
}
}
Event::Start(Tag::TableCell) => {
current_table_cell = Vec::new();
}
Event::End(Tag::TableCell) => {
if in_table_row {
current_table_row.push(current_table_cell.clone());
current_table_cell.clear();
}
}
Event::Rule => {
nodes.push(Node::ThematicBreak);
}
Event::FootnoteReference(_) => {
}
Event::TaskListMarker(_) => {
}
_ => {
}
}
}
Ok(nodes)
}
fn parse_component_start(html: &str) -> Option<String> {
let html = html.trim();
if !html.starts_with("::") {
return None;
}
let content = if html.starts_with(":::") {
html.trim_start_matches(":::")
} else {
html.trim_start_matches("::")
};
let name_end = content.find('{').unwrap_or(content.len());
let name = content[..name_end].trim();
if name.is_empty() {
None
} else {
Some(name.to_string())
}
}
fn extract_component_attributes(html: &str) -> Option<HashMap<String, String>> {
let html = html.trim();
if let Some(start) = html.find('{') {
if let Some(end) = html.find('}') {
let attrs_str = &html[start + 1..end];
let mut attributes = HashMap::new();
for attr_pair in attrs_str.split_whitespace() {
if let Some(equals_pos) = attr_pair.find('=') {
let name = attr_pair[..equals_pos].trim();
let value_with_quotes = attr_pair[equals_pos + 1..].trim();
let value = value_with_quotes
.trim_start_matches('"')
.trim_start_matches('\'')
.trim_end_matches('"')
.trim_end_matches('\'');
attributes.insert(name.to_string(), value.to_string());
}
}
return Some(attributes);
}
}
None
}