use std::{iter::Peekable, vec::IntoIter};
use pulldown_cmark::{Event, Options, Tag, TagEnd};
#[derive(Clone, Debug, PartialEq)]
pub enum Style {
Code,
}
#[derive(Clone, Debug, PartialEq)]
pub enum ItemKind {
Ordered(u64),
Unordered,
}
#[derive(Clone, Debug, PartialEq)]
pub enum TaskListItemKind {
Checked,
Unchecked,
LooselyChecked,
}
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
#[allow(missing_docs)]
pub enum HeadingLevel {
H1 = 1,
H2,
H3,
H4,
H5,
H6,
}
impl From<pulldown_cmark::HeadingLevel> for HeadingLevel {
fn from(value: pulldown_cmark::HeadingLevel) -> Self {
match value {
pulldown_cmark::HeadingLevel::H1 => HeadingLevel::H1,
pulldown_cmark::HeadingLevel::H2 => HeadingLevel::H2,
pulldown_cmark::HeadingLevel::H3 => HeadingLevel::H3,
pulldown_cmark::HeadingLevel::H4 => HeadingLevel::H4,
pulldown_cmark::HeadingLevel::H5 => HeadingLevel::H5,
pulldown_cmark::HeadingLevel::H6 => HeadingLevel::H6,
}
}
}
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum BlockQuoteKind {
Note,
Tip,
Important,
Warning,
Caution,
}
impl From<pulldown_cmark::BlockQuoteKind> for BlockQuoteKind {
fn from(value: pulldown_cmark::BlockQuoteKind) -> Self {
match value {
pulldown_cmark::BlockQuoteKind::Tip => BlockQuoteKind::Tip,
pulldown_cmark::BlockQuoteKind::Note => BlockQuoteKind::Note,
pulldown_cmark::BlockQuoteKind::Warning => BlockQuoteKind::Warning,
pulldown_cmark::BlockQuoteKind::Caution => BlockQuoteKind::Caution,
pulldown_cmark::BlockQuoteKind::Important => BlockQuoteKind::Important,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum ListKind {
Ordered(u64),
Unordered,
}
#[derive(Clone, Debug, PartialEq, Default)]
pub struct TextNode {
pub content: String,
pub style: Option<Style>,
}
impl From<&str> for TextNode {
fn from(value: &str) -> Self {
value.to_string().into()
}
}
impl From<String> for TextNode {
fn from(value: String) -> Self {
Self {
content: value.replace("\t", " "),
..Default::default()
}
}
}
impl TextNode {
pub fn new(content: String, style: Option<Style>) -> Self {
Self { content, style }
}
}
#[derive(Clone, Debug, PartialEq, Default)]
pub struct Text(Vec<TextNode>);
impl From<&Text> for String {
fn from(value: &Text) -> Self {
value.clone().into_iter().map(|node| node.content).collect()
}
}
impl From<Text> for String {
fn from(value: Text) -> Self {
value
.into_iter()
.map(|node| node.content)
.collect::<String>()
}
}
impl From<&str> for Text {
fn from(value: &str) -> Self {
TextNode::from(value).into()
}
}
impl From<String> for Text {
fn from(value: String) -> Self {
TextNode::from(value).into()
}
}
impl From<TextNode> for Text {
fn from(value: TextNode) -> Self {
Self([value].to_vec())
}
}
impl From<Vec<TextNode>> for Text {
fn from(value: Vec<TextNode>) -> Self {
Self(value)
}
}
impl From<&[TextNode]> for Text {
fn from(value: &[TextNode]) -> Self {
Self(value.to_vec())
}
}
impl IntoIterator for Text {
type Item = TextNode;
type IntoIter = IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl Text {
fn push(&mut self, node: TextNode) {
self.0.push(node);
}
}
pub type Range<Idx> = std::ops::Range<Idx>;
#[derive(Clone, Debug, PartialEq)]
pub struct Node {
pub markdown_node: MarkdownNode,
pub source_range: Range<usize>,
}
impl Node {
pub fn new(markdown_node: MarkdownNode, source_range: Range<usize>) -> Self {
Self {
markdown_node,
source_range,
}
}
pub(crate) fn push_text_node(&mut self, node: TextNode) {
match &mut self.markdown_node {
MarkdownNode::Paragraph { text, .. }
| MarkdownNode::Heading { text, .. }
| MarkdownNode::CodeBlock { text, .. }
| MarkdownNode::TaskListItem { text, .. }
| MarkdownNode::Item { text, .. } => text.push(node),
MarkdownNode::List { nodes, .. } | MarkdownNode::BlockQuote { nodes, .. } => {
if let Some(last_node) = nodes.last_mut() {
last_node.push_text_node(node);
}
}
}
}
}
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum MarkdownNode {
Heading {
level: HeadingLevel,
text: Text,
},
Paragraph {
text: Text,
},
BlockQuote {
kind: Option<BlockQuoteKind>,
nodes: Vec<Node>,
},
CodeBlock {
lang: Option<String>,
text: Text,
},
List {
kind: ListKind,
nodes: Vec<Node>,
},
Item {
text: Text,
},
TaskListItem {
kind: TaskListItemKind,
text: Text,
},
}
fn matches_tag_end(tag: &Tag, tag_end: &TagEnd) -> bool {
matches!(
(tag, tag_end),
(Tag::Paragraph { .. }, TagEnd::Paragraph)
| (Tag::Heading { .. }, TagEnd::Heading(..))
| (Tag::BlockQuote { .. }, TagEnd::BlockQuote(..))
| (Tag::CodeBlock { .. }, TagEnd::CodeBlock)
| (Tag::List { .. }, TagEnd::List(..))
| (Tag::Item { .. }, TagEnd::Item)
)
}
pub fn from_str(text: &str) -> Vec<Node> {
Parser::new(text).parse()
}
pub struct Parser<'a>(pulldown_cmark::TextMergeWithOffset<'a, pulldown_cmark::OffsetIter<'a>>);
impl<'a> Iterator for Parser<'a> {
type Item = (Event<'a>, Range<usize>);
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
let parser = pulldown_cmark::TextMergeWithOffset::new(
pulldown_cmark::Parser::new_ext(text, Options::all()).into_offset_iter(),
);
Self(parser)
}
fn parse_tag(
tag: Tag,
events: &mut Peekable<Parser<'a>>,
source_range: Range<usize>,
) -> Option<Node> {
match tag {
Tag::BlockQuote(kind) => Some(Node::new(
MarkdownNode::BlockQuote {
kind: kind.map(|kind| kind.into()),
nodes: Parser::parse_events(events, Some(tag)),
},
source_range,
)),
Tag::List(start) => Some(Node::new(
MarkdownNode::List {
kind: start.map(ListKind::Ordered).unwrap_or(ListKind::Unordered),
nodes: Parser::parse_events(events, Some(tag)),
},
source_range,
)),
Tag::Heading { level, .. } => Some(Node::new(
MarkdownNode::Heading {
level: level.into(),
text: Text::default(),
},
source_range,
)),
Tag::CodeBlock(_) => Some(Node::new(
MarkdownNode::CodeBlock {
lang: None,
text: Text::default(),
},
source_range,
)),
Tag::Paragraph => Some(Node::new(
MarkdownNode::Paragraph {
text: Text::default(),
},
source_range,
)),
Tag::Item => Some(Node::new(
MarkdownNode::Item {
text: Text::default(),
},
source_range,
)),
_ => None,
}
}
fn parse_events(events: &mut Peekable<Parser<'a>>, current_tag: Option<Tag>) -> Vec<Node> {
let mut nodes = Vec::new();
while let Some((event, range)) = events.peek().cloned() {
events.next();
match event {
Event::Start(tag) => {
if let Some(node) = Parser::parse_tag(tag, events, range) {
nodes.push(node);
}
}
Event::End(tag_end) => {
if let Some(ref tag) = current_tag {
if matches_tag_end(tag, &tag_end) {
return nodes;
}
}
}
Event::Text(text) => {
if let Some(node) = nodes.last_mut() {
let is_loosely_checked_task = text
.get(0..4)
.map(|str| str.as_bytes())
.map(|chars| matches!(chars, &[b'[', _, b']', b' ']))
.unwrap_or_default();
if is_loosely_checked_task {
let source_range = node.clone().source_range;
*node = Node::new(
MarkdownNode::TaskListItem {
kind: TaskListItemKind::LooselyChecked,
text: Text::from(text.get(4..).unwrap_or_default()),
},
source_range,
);
} else {
node.push_text_node(text.to_string().into())
}
}
}
Event::Code(text) => {
if let Some(node) = nodes.last_mut() {
node.push_text_node(TextNode::new(text.to_string(), Some(Style::Code)))
}
}
Event::TaskListMarker(checked) => {
if let Some(node) = nodes.last_mut() {
let source_range = node.clone().source_range;
if checked {
*node = Node::new(
MarkdownNode::TaskListItem {
kind: TaskListItemKind::Checked,
text: Text::default(),
},
source_range,
);
} else {
*node = Node::new(
MarkdownNode::TaskListItem {
kind: TaskListItemKind::Unchecked,
text: Text::default(),
},
source_range,
);
}
}
}
_ => {}
}
}
nodes
}
pub fn parse(self) -> Vec<Node> {
Parser::parse_events(&mut self.peekable(), None)
}
}
#[cfg(test)]
mod tests {
use indoc::indoc;
fn p(str: &str, range: Range<usize>) -> Node {
Node::new(MarkdownNode::Paragraph { text: str.into() }, range)
}
fn blockquote(nodes: Vec<Node>, range: Range<usize>) -> Node {
Node::new(MarkdownNode::BlockQuote { kind: None, nodes }, range)
}
fn list(kind: ListKind, nodes: Vec<Node>, range: Range<usize>) -> Node {
Node::new(MarkdownNode::List { kind, nodes }, range)
}
fn item(str: &str, range: Range<usize>) -> Node {
Node::new(MarkdownNode::Item { text: str.into() }, range)
}
fn unchecked_task(str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::TaskListItem {
kind: TaskListItemKind::Unchecked,
text: str.into(),
},
range,
)
}
fn checked_task(str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::TaskListItem {
kind: TaskListItemKind::Checked,
text: str.into(),
},
range,
)
}
fn loosely_checked_task(str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::TaskListItem {
kind: TaskListItemKind::LooselyChecked,
text: str.into(),
},
range,
)
}
fn heading(level: HeadingLevel, str: &str, range: Range<usize>) -> Node {
Node::new(
MarkdownNode::Heading {
level,
text: str.into(),
},
range,
)
}
fn h1(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H1, str, range)
}
fn h2(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H2, str, range)
}
fn h3(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H3, str, range)
}
fn h4(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H4, str, range)
}
fn h5(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H5, str, range)
}
fn h6(str: &str, range: Range<usize>) -> Node {
heading(HeadingLevel::H6, str, range)
}
use super::*;
#[test]
fn test_parse() {
let tests = [
(
indoc! {r#"# Heading 1
## Heading 2
### Heading 3
#### Heading 4
##### Heading 5
###### Heading 6
"#},
vec![
h1("Heading 1", 0..12),
h2("Heading 2", 13..26),
h3("Heading 3", 27..41),
h4("Heading 4", 42..57),
h5("Heading 5", 58..74),
h6("Heading 6", 75..92),
],
),
(
indoc! { r#"- [ ] Task
- [x] Completed task
- [?] Completed task
- [-] Completed task
"#},
vec![list(
ListKind::Unordered,
vec![
unchecked_task("Task", 0..11),
checked_task("Completed task", 11..32),
loosely_checked_task("Completed task", 32..53),
loosely_checked_task("Completed task", 53..74),
],
0..74,
)],
),
(
indoc! {r#"You _can_ quote text by adding a `>` symbols before the text.
> Human beings face ever more complex and urgent problems, and their effectiveness in dealing with these problems is a matter that is critical to the stability and continued progress of society.
> > > Deep Quote
>
> - Doug Engelbart, 1961
"#},
vec![
Node::new(MarkdownNode::Paragraph {
text: vec![
TextNode::new("You ".into(), None),
TextNode::new("can".into(), None),
TextNode::new(" quote text by adding a ".into(), None),
TextNode::new(">".into(), Some(Style::Code)),
TextNode::new(" symbols before the text.".into(), None),
]
.into(),
}, 0..62),
blockquote(
vec![
p("Human beings face ever more complex and urgent problems, and their effectiveness in dealing with these problems is a matter that is critical to the stability and continued progress of society.", 64..257),
blockquote(
vec![blockquote(vec![p("Deep Quote", 263..274)], 261..274)],
259..274,
),
list(
ListKind::Unordered,
vec![item("Doug Engelbart, 1961", 278..301)],
278..301,
),
],
62..301,
),
],
),
];
tests
.iter()
.for_each(|test| assert_eq!(from_str(test.0), test.1));
}
}