use std::vec::IntoIter;
use pulldown_cmark::{Event, Options, Tag, TagEnd};
#[derive(Clone, Debug, PartialEq)]
pub enum Style {
Code,
Emphasis,
Strikethrough,
Strong,
}
#[derive(Clone, Debug, PartialEq)]
pub enum ItemKind {
HardChecked,
Checked,
Unchecked,
Ordered(u64),
Unordered,
}
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum HeadingLevel {
H1 = 1,
H2,
H3,
H4,
H5,
H6,
}
impl From<pulldown_cmark::HeadingLevel> for HeadingLevel {
fn from(value: pulldown_cmark::HeadingLevel) -> Self {
match value {
pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
}
}
}
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum BlockQuoteKind {
Note,
Tip,
Important,
Warning,
Caution,
}
impl From<pulldown_cmark::BlockQuoteKind> for BlockQuoteKind {
fn from(value: pulldown_cmark::BlockQuoteKind) -> Self {
match value {
pulldown_cmark::BlockQuoteKind::Tip => BlockQuoteKind::Tip,
pulldown_cmark::BlockQuoteKind::Note => BlockQuoteKind::Note,
pulldown_cmark::BlockQuoteKind::Warning => BlockQuoteKind::Warning,
pulldown_cmark::BlockQuoteKind::Caution => BlockQuoteKind::Caution,
pulldown_cmark::BlockQuoteKind::Important => BlockQuoteKind::Important,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum ListKind {
Ordered(u64),
Unordered,
}
#[derive(Clone, Debug, PartialEq, Default)]
pub struct TextNode {
pub content: String,
pub style: Option<Style>,
}
impl From<&str> for TextNode {
fn from(value: &str) -> Self {
value.to_string().into()
}
}
impl From<String> for TextNode {
fn from(value: String) -> Self {
Self {
content: value,
..Default::default()
}
}
}
impl TextNode {
pub fn new(content: String, style: Option<Style>) -> Self {
Self { content, style }
}
}
#[derive(Clone, Debug, PartialEq, Default)]
pub struct Text(Vec<TextNode>);
impl From<&str> for Text {
fn from(value: &str) -> Self {
TextNode::from(value).into()
}
}
impl From<String> for Text {
fn from(value: String) -> Self {
TextNode::from(value).into()
}
}
impl From<TextNode> for Text {
fn from(value: TextNode) -> Self {
Self([value].to_vec())
}
}
impl From<Vec<TextNode>> for Text {
fn from(value: Vec<TextNode>) -> Self {
Self(value)
}
}
impl From<&[TextNode]> for Text {
fn from(value: &[TextNode]) -> Self {
Self(value.to_vec())
}
}
impl IntoIterator for Text {
type Item = TextNode;
type IntoIter = IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl Text {
fn push(&mut self, node: TextNode) {
self.0.push(node);
}
}
pub type Range<Idx> = std::ops::Range<Idx>;
#[derive(Clone, Debug, PartialEq)]
pub struct Node {
pub markdown_node: MarkdownNode,
pub source_range: Range<usize>,
}
impl Node {
pub fn new(markdown_node: MarkdownNode, source_range: Range<usize>) -> Self {
Self {
markdown_node,
source_range,
}
}
pub(crate) fn push_text_node(&mut self, node: TextNode) {
match &mut self.markdown_node {
MarkdownNode::Paragraph { text, .. }
| MarkdownNode::Heading { text, .. }
| MarkdownNode::CodeBlock { text, .. }
| MarkdownNode::Item { text, .. } => text.push(node),
MarkdownNode::BlockQuote { nodes, .. } => {
if let Some(last_node) = nodes.last_mut() {
last_node.push_text_node(node);
}
}
}
}
}
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum MarkdownNode {
Heading {
level: HeadingLevel,
text: Text,
},
Paragraph {
text: Text,
},
BlockQuote {
kind: Option<BlockQuoteKind>,
nodes: Vec<Node>,
},
CodeBlock {
lang: Option<String>,
text: Text,
},
Item {
kind: Option<ItemKind>,
text: Text,
},
}
fn matches_tag_end(node: &Node, tag_end: &TagEnd) -> bool {
matches!(
(&node.markdown_node, tag_end),
(MarkdownNode::Paragraph { .. }, TagEnd::Paragraph)
| (MarkdownNode::Heading { .. }, TagEnd::Heading(..))
| (MarkdownNode::BlockQuote { .. }, TagEnd::BlockQuote(..))
| (MarkdownNode::CodeBlock { .. }, TagEnd::CodeBlock)
| (MarkdownNode::Item { .. }, TagEnd::Item)
)
}
pub fn from_str(text: &str) -> Vec<Node> {
Parser::new(text).parse()
}
pub struct Parser<'a> {
pub output: Vec<Node>,
inner: pulldown_cmark::TextMergeWithOffset<'a, pulldown_cmark::OffsetIter<'a>>,
current_node: Option<Node>,
}
impl<'a> Iterator for Parser<'a> {
type Item = (Event<'a>, Range<usize>);
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
let parser = pulldown_cmark::TextMergeWithOffset::new(
pulldown_cmark::Parser::new_ext(text, Options::all()).into_offset_iter(),
);
Self {
inner: parser,
output: vec![],
current_node: None,
}
}
fn push_node(&mut self, node: Node) {
if let Some(Node {
markdown_node: MarkdownNode::BlockQuote { nodes, .. },
..
}) = &mut self.current_node
{
nodes.push(node);
} else {
self.set_node(&node);
}
}
fn push_text_node(&mut self, node: TextNode) {
if let Some(ref mut current) = self.current_node {
current.push_text_node(node);
}
}
fn set_node(&mut self, block: &Node) {
self.current_node.replace(block.clone());
}
fn tag(&mut self, tag: Tag<'a>, range: Range<usize>) {
match tag {
Tag::Paragraph => self.push_node(Node::new(
MarkdownNode::Paragraph {
text: Text::default(),
},
range,
)),
Tag::Heading { level, .. } => self.push_node(Node::new(
MarkdownNode::Heading {
level: level.into(),
text: Text::default(),
},
range,
)),
Tag::BlockQuote(kind) => self.push_node(Node::new(
MarkdownNode::BlockQuote {
kind: kind.map(|kind| kind.into()),
nodes: vec![],
},
range,
)),
Tag::CodeBlock(_) => self.push_node(Node::new(
MarkdownNode::CodeBlock {
lang: None,
text: Text::default(),
},
range,
)),
Tag::Item => self.push_node(Node::new(
MarkdownNode::Item {
kind: None,
text: Text::default(),
},
range,
)),
Tag::HtmlBlock
| Tag::List(_)
| Tag::FootnoteDefinition(_)
| Tag::Table(_)
| Tag::TableHead
| Tag::TableRow
| Tag::TableCell
| Tag::Emphasis
| Tag::Strong
| Tag::Strikethrough
| Tag::Link { .. }
| Tag::Image { .. }
| Tag::MetadataBlock(_)
| Tag::DefinitionList
| Tag::DefinitionListTitle
| Tag::Subscript
| Tag::Superscript
| Tag::DefinitionListDefinition => {}
}
}
fn tag_end(&mut self, tag_end: TagEnd) {
let Some(node) = self.current_node.take() else {
return;
};
if matches_tag_end(&node, &tag_end) {
self.output.push(node);
} else {
self.set_node(&node);
}
}
fn handle_event(&mut self, event: Event<'a>, range: Range<usize>) {
match event {
Event::Start(tag) => self.tag(tag, range),
Event::End(tag_end) => self.tag_end(tag_end),
Event::Text(text) => self.push_text_node(TextNode::new(text.to_string(), None)),
Event::Code(text) => {
self.push_text_node(TextNode::new(text.to_string(), Some(Style::Code)))
}
Event::TaskListMarker(checked) => {
if checked {
self.set_node(&Node::new(
MarkdownNode::Item {
kind: Some(ItemKind::HardChecked),
text: Text::default(),
},
range,
));
} else {
self.set_node(&Node::new(
MarkdownNode::Item {
kind: Some(ItemKind::Unchecked),
text: Text::default(),
},
range,
));
}
}
Event::InlineMath(_)
| Event::DisplayMath(_)
| Event::Html(_)
| Event::InlineHtml(_)
| Event::SoftBreak
| Event::HardBreak
| Event::Rule
| Event::FootnoteReference(_) => {
}
}
}
pub fn parse(mut self) -> Vec<Node> {
while let Some((event, range)) = self.next() {
self.handle_event(event, range);
}
if let Some(node) = self.current_node.take() {
self.output.push(node);
}
self.output
}
}
#[cfg(test)]
mod tests {
use indoc::indoc;
use similar_asserts::assert_eq;
fn p(str: &str, range: Range<usize>) -> Node {
Node::new(MarkdownNode::Paragraph { text: str.into() }, range)
}
fn blockquote(nodes: Vec<Node>, range: Range<usize>) -> Node {
Node::new(MarkdownNode::BlockQuote { kind: None, nodes }, range)
}
fn item(str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::Item {
kind: None,
text: str.into(),
},
range,
)
}
fn task(str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::Item {
kind: Some(ItemKind::Unchecked),
text: str.into(),
},
range,
)
}
fn completed_task(str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::Item {
kind: Some(ItemKind::HardChecked),
text: str.into(),
},
range,
)
}
fn heading(level: HeadingLevel, str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::Heading {
level,
text: str.into(),
},
range,
)
}
fn h1(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H1, str, range)
}
fn h2(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H2, str, range)
}
fn h3(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H3, str, range)
}
fn h4(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H4, str, range)
}
fn h5(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H5, str, range)
}
fn h6(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H6, str, range)
}
use super::*;
#[test]
fn test_parse() {
let tests = [
(
indoc! {r#"# Heading 1
## Heading 2
### Heading 3
#### Heading 4
##### Heading 5
###### Heading 6
"#},
vec![
h1("Heading 1", 0..12),
h2("Heading 2", 13..26),
h3("Heading 3", 27..41),
h4("Heading 4", 42..57),
h5("Heading 5", 58..74),
h6("Heading 6", 75..92),
],
),
(
indoc! { r#"## Tasks
- [ ] Task
- [x] Completed task
- [?] Completed task
"#},
vec![
h2("Tasks", 0..9),
task("Task", 12..15),
completed_task("Completed task", 24..27),
p("[?] Completed task", 46..65),
],
),
(
indoc! {r#"## Quotes
You _can_ quote text by adding a `>` symbols before the text.
> Human beings face ever more complex and urgent problems, and their effectiveness in dealing with these problems is a matter that is critical to the stability and continued progress of society.
>
>- Doug Engelbart, 1961
"#},
vec![
h2("Quotes", 0..10),
Node::new(MarkdownNode::Paragraph {
text: vec![
TextNode::new("You ".into(), None),
TextNode::new("can".into(),None),
TextNode::new(" quote text by adding a ".into(), None),
TextNode::new(">".into(), Some(Style::Code)),
TextNode::new(" symbols before the text.".into(), None),
]
.into(),
}, 11..73),
blockquote(vec![
p("Human beings face ever more complex and urgent problems, and their effectiveness in dealing with these problems is a matter that is critical to the stability and continued progress of society.", 76..269),
item("Doug Engelbart, 1961", 272..295)
], 74..295),
],
),
];
tests
.iter()
.for_each(|test| assert_eq!(from_str(test.0), test.1));
}
}