use pulldown_cmark::{
Alignment as CmarkAlignment, CodeBlockKind, Event, Options, Parser, Tag, TagEnd,
};
use regex::Regex;
use std::sync::LazyLock;
use turbovault_core::{ContentBlock, InlineElement, ListItem, TableAlignment};
static WIKILINK_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap());
fn preprocess_wikilinks(markdown: &str) -> String {
WIKILINK_RE
.replace_all(markdown, |caps: ®ex::Captures| {
let target = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
let alias = caps.get(2).map(|m| m.as_str().trim());
let display_text = alias.unwrap_or(target);
format!("[{}](wikilink:{})", display_text, target)
})
.to_string()
}
static LINK_WITH_SPACES_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)<>]+\s[^)<>]*)\)").unwrap());
fn preprocess_links_with_spaces(markdown: &str) -> String {
LINK_WITH_SPACES_RE
.replace_all(markdown, |caps: ®ex::Captures| {
let text = &caps[1];
let url = &caps[2];
if url.contains(' ') {
format!("[{}](<{}>)", text, url)
} else {
caps[0].to_string()
}
})
.to_string()
}
fn extract_details_blocks(markdown: &str) -> (String, Vec<ContentBlock>) {
let mut details_blocks = Vec::new();
let mut result = String::new();
let mut current_pos = 0;
while current_pos < markdown.len() {
if markdown[current_pos..].starts_with("<details")
&& let Some(tag_end) = markdown[current_pos..].find('>')
&& let details_start = current_pos + tag_end + 1
&& let Some(details_end_pos) = markdown[details_start..].find("</details>")
{
let details_end = details_start + details_end_pos;
let details_content = &markdown[details_start..details_end];
let summary = extract_summary(details_content);
let content_start = if let Some(summary_end_pos) = details_content.find("</summary>") {
let summary_tag_end = summary_end_pos + "</summary>".len();
&details_content[summary_tag_end..]
} else {
details_content
};
let content_trimmed = content_start.trim();
let nested_blocks = if !content_trimmed.is_empty() {
parse_blocks(content_trimmed)
} else {
Vec::new()
};
details_blocks.push(ContentBlock::Details {
summary,
content: content_trimmed.to_string(),
blocks: nested_blocks,
});
result.push_str(&format!("\n[DETAILS_BLOCK_{}]\n", details_blocks.len() - 1));
current_pos = details_end + "</details>".len();
continue;
}
if let Some(ch) = markdown[current_pos..].chars().next() {
result.push(ch);
current_pos += ch.len_utf8();
} else {
break;
}
}
(result, details_blocks)
}
fn extract_summary(details_content: &str) -> String {
if let Some(summary_start_pos) = details_content.find("<summary")
&& let Some(summary_tag_end) = details_content[summary_start_pos..].find('>')
&& let summary_content_start = summary_start_pos + summary_tag_end + 1
&& let Some(summary_end_pos) = details_content[summary_content_start..].find("</summary>")
{
let summary_end = summary_content_start + summary_end_pos;
return details_content[summary_content_start..summary_end]
.trim()
.to_string();
}
String::new()
}
struct BlockParserState {
current_line: usize,
paragraph_buffer: String,
inline_buffer: Vec<InlineElement>,
list_items: Vec<ListItem>,
list_ordered: bool,
list_depth: usize,
item_depth: usize,
task_list_marker: Option<bool>,
saved_task_markers: Vec<Option<bool>>,
item_blocks: Vec<ContentBlock>,
code_buffer: String,
code_language: Option<String>,
code_start_line: usize,
blockquote_buffer: String,
table_headers: Vec<String>,
table_alignments: Vec<TableAlignment>,
table_rows: Vec<Vec<String>>,
current_row: Vec<String>,
heading_level: Option<usize>,
heading_buffer: String,
heading_inline: Vec<InlineElement>,
in_paragraph: bool,
in_list: bool,
in_code: bool,
in_blockquote: bool,
in_table: bool,
in_heading: bool,
in_strong: bool,
in_emphasis: bool,
in_strikethrough: bool,
in_code_inline: bool,
in_link: bool,
link_url: String,
link_text: String,
image_in_link: bool,
in_image: bool,
saved_link_url: String,
nested_line_offset: usize,
}
impl BlockParserState {
fn new(start_line: usize) -> Self {
Self {
current_line: start_line,
paragraph_buffer: String::new(),
inline_buffer: Vec::new(),
list_items: Vec::new(),
list_ordered: false,
list_depth: 0,
item_depth: 0,
task_list_marker: None,
saved_task_markers: Vec::new(),
item_blocks: Vec::new(),
code_buffer: String::new(),
code_language: None,
code_start_line: 0,
blockquote_buffer: String::new(),
table_headers: Vec::new(),
table_alignments: Vec::new(),
table_rows: Vec::new(),
current_row: Vec::new(),
heading_level: None,
heading_buffer: String::new(),
heading_inline: Vec::new(),
in_paragraph: false,
in_list: false,
in_code: false,
in_blockquote: false,
in_table: false,
in_heading: false,
in_strong: false,
in_emphasis: false,
in_strikethrough: false,
in_code_inline: false,
in_link: false,
link_url: String::new(),
link_text: String::new(),
image_in_link: false,
in_image: false,
saved_link_url: String::new(),
nested_line_offset: 0,
}
}
fn finalize(&mut self, blocks: &mut Vec<ContentBlock>) {
self.flush_paragraph(blocks);
self.flush_list(blocks);
self.flush_code(blocks);
self.flush_blockquote(blocks);
self.flush_table(blocks);
}
fn flush_paragraph(&mut self, blocks: &mut Vec<ContentBlock>) {
if self.in_paragraph && !self.paragraph_buffer.is_empty() {
blocks.push(ContentBlock::Paragraph {
content: self.paragraph_buffer.clone(),
inline: self.inline_buffer.clone(),
});
self.paragraph_buffer.clear();
self.inline_buffer.clear();
self.in_paragraph = false;
}
}
fn flush_list(&mut self, blocks: &mut Vec<ContentBlock>) {
if self.in_list && !self.list_items.is_empty() {
blocks.push(ContentBlock::List {
ordered: self.list_ordered,
items: self.list_items.clone(),
});
self.list_items.clear();
self.in_list = false;
}
}
fn flush_code(&mut self, blocks: &mut Vec<ContentBlock>) {
if self.in_code && !self.code_buffer.is_empty() {
blocks.push(ContentBlock::Code {
language: self.code_language.clone(),
content: self.code_buffer.trim_end().to_string(),
start_line: self.code_start_line,
end_line: self.current_line,
});
self.code_buffer.clear();
self.code_language = None;
self.in_code = false;
}
}
fn flush_blockquote(&mut self, blocks: &mut Vec<ContentBlock>) {
if self.in_blockquote && !self.blockquote_buffer.is_empty() {
let nested_blocks = parse_blocks(&self.blockquote_buffer);
blocks.push(ContentBlock::Blockquote {
content: self.blockquote_buffer.clone(),
blocks: nested_blocks,
});
self.blockquote_buffer.clear();
self.in_blockquote = false;
}
}
fn flush_table(&mut self, blocks: &mut Vec<ContentBlock>) {
if self.in_table && !self.table_headers.is_empty() {
blocks.push(ContentBlock::Table {
headers: self.table_headers.clone(),
alignments: self.table_alignments.clone(),
rows: self.table_rows.clone(),
});
self.table_headers.clear();
self.table_alignments.clear();
self.table_rows.clear();
self.current_row.clear();
self.paragraph_buffer.clear();
self.inline_buffer.clear();
self.in_table = false;
}
}
fn add_inline_text(&mut self, text: &str) {
if text.is_empty() {
return;
}
let element = if self.in_code_inline {
InlineElement::Code {
value: text.to_string(),
}
} else if self.in_strong {
InlineElement::Strong {
value: text.to_string(),
}
} else if self.in_emphasis {
InlineElement::Emphasis {
value: text.to_string(),
}
} else if self.in_strikethrough {
InlineElement::Strikethrough {
value: text.to_string(),
}
} else {
InlineElement::Text {
value: text.to_string(),
}
};
self.inline_buffer.push(element);
self.paragraph_buffer.push_str(text);
}
}
#[allow(clippy::too_many_lines)]
fn process_event(event: Event, state: &mut BlockParserState, blocks: &mut Vec<ContentBlock>) {
match event {
Event::Start(Tag::Paragraph) => {
state.in_paragraph = true;
}
Event::End(TagEnd::Paragraph) => {
if state.item_depth >= 1 && state.in_paragraph && !state.paragraph_buffer.is_empty() {
state.item_blocks.push(ContentBlock::Paragraph {
content: state.paragraph_buffer.clone(),
inline: state.inline_buffer.clone(),
});
state.paragraph_buffer.clear();
state.inline_buffer.clear();
state.in_paragraph = false;
} else {
state.flush_paragraph(blocks);
}
}
Event::Start(Tag::CodeBlock(kind)) => {
state.in_code = true;
state.code_start_line = state.current_line;
state.code_language = match kind {
CodeBlockKind::Fenced(lang) => {
if lang.is_empty() {
None
} else {
Some(lang.to_string())
}
}
CodeBlockKind::Indented => None,
};
}
Event::End(TagEnd::CodeBlock) => {
if state.item_depth >= 1 && state.in_code && !state.code_buffer.is_empty() {
state.item_blocks.push(ContentBlock::Code {
language: state.code_language.clone(),
content: state.code_buffer.trim_end().to_string(),
start_line: state.code_start_line,
end_line: state.current_line,
});
state.code_buffer.clear();
state.code_language = None;
state.in_code = false;
} else {
state.flush_code(blocks);
}
}
Event::Start(Tag::List(start_number)) => {
state.list_depth += 1;
if state.list_depth == 1 {
state.in_list = true;
state.list_ordered = start_number.is_some();
}
}
Event::End(TagEnd::List(_)) => {
state.list_depth = state.list_depth.saturating_sub(1);
if state.list_depth == 0 {
state.flush_list(blocks);
}
}
Event::Start(Tag::Item) => {
state.item_depth += 1;
if state.item_depth > 1 {
state.saved_task_markers.push(state.task_list_marker);
state.task_list_marker = None;
}
if state.item_depth == 1 {
state.paragraph_buffer.clear();
state.inline_buffer.clear();
state.item_blocks.clear();
state.nested_line_offset = 0;
}
}
Event::End(TagEnd::Item) => {
if state.item_depth > 1
&& let Some(saved) = state.saved_task_markers.pop()
{
state.task_list_marker = saved;
}
if state.item_depth == 1 {
let (content, mut inline, remaining_blocks) = if !state.paragraph_buffer.is_empty()
{
let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
(
state.paragraph_buffer.clone(),
state.inline_buffer.clone(),
all_blocks,
)
} else if let Some(ContentBlock::Paragraph { content, inline }) =
state.item_blocks.first().cloned()
{
let remaining: Vec<ContentBlock> = state.item_blocks.drain(1..).collect();
(content, inline, remaining)
} else {
let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
(String::new(), Vec::new(), all_blocks)
};
collect_inline_elements(&remaining_blocks, &mut inline);
state.list_items.push(ListItem {
checked: state.task_list_marker,
content,
inline,
blocks: remaining_blocks,
});
state.paragraph_buffer.clear();
state.inline_buffer.clear();
state.item_blocks.clear();
state.task_list_marker = None;
}
state.item_depth = state.item_depth.saturating_sub(1);
}
Event::TaskListMarker(checked) => {
state.task_list_marker = Some(checked);
}
Event::Start(Tag::BlockQuote(_)) => {
state.in_blockquote = true;
}
Event::End(TagEnd::BlockQuote(_)) => {
state.flush_blockquote(blocks);
}
Event::Start(Tag::Table(alignments)) => {
state.in_table = true;
state.table_alignments = alignments
.iter()
.map(|a| match a {
CmarkAlignment::Left => TableAlignment::Left,
CmarkAlignment::Center => TableAlignment::Center,
CmarkAlignment::Right => TableAlignment::Right,
CmarkAlignment::None => TableAlignment::None,
})
.collect();
}
Event::End(TagEnd::Table) => {
state.flush_table(blocks);
}
Event::Start(Tag::TableHead) => {}
Event::End(TagEnd::TableHead) => {
state.table_headers = state.current_row.clone();
state.current_row.clear();
}
Event::Start(Tag::TableRow) => {}
Event::End(TagEnd::TableRow) => {
state.table_rows.push(state.current_row.clone());
state.current_row.clear();
}
Event::Start(Tag::TableCell) => {
state.paragraph_buffer.clear();
state.inline_buffer.clear();
}
Event::End(TagEnd::TableCell) => {
state.current_row.push(state.paragraph_buffer.clone());
state.paragraph_buffer.clear();
state.inline_buffer.clear();
}
Event::Start(Tag::Strong) => {
state.in_strong = true;
}
Event::End(TagEnd::Strong) => {
state.in_strong = false;
}
Event::Start(Tag::Emphasis) => {
state.in_emphasis = true;
}
Event::End(TagEnd::Emphasis) => {
state.in_emphasis = false;
}
Event::Start(Tag::Strikethrough) => {
state.in_strikethrough = true;
}
Event::End(TagEnd::Strikethrough) => {
state.in_strikethrough = false;
}
Event::Code(text) => {
state.in_code_inline = true;
state.add_inline_text(&text);
state.in_code_inline = false;
}
Event::Start(Tag::Link { dest_url, .. }) => {
if state.in_list && state.item_depth > 1 {
if !state.paragraph_buffer.is_empty() && !state.paragraph_buffer.ends_with('\n') {
state.paragraph_buffer.push('\n');
state.nested_line_offset += 1;
}
let indent = " ".repeat(state.item_depth - 1);
state.paragraph_buffer.push_str(&indent);
if let Some(checked) = state.task_list_marker {
let marker = if checked { "[x] " } else { "[ ] " };
state.paragraph_buffer.push_str(marker);
state.task_list_marker = None;
}
}
state.in_link = true;
state.link_url = dest_url.to_string();
state.link_text.clear();
}
Event::End(TagEnd::Link) => {
state.in_link = false;
let line_offset = if state.in_list && state.item_depth >= 1 {
Some(state.nested_line_offset)
} else {
None
};
if state.image_in_link {
state.inline_buffer.push(InlineElement::Link {
text: state.link_text.clone(),
url: state.saved_link_url.clone(),
title: None,
line_offset,
});
state
.paragraph_buffer
.push_str(&format!("[{}]({})", state.link_text, state.saved_link_url));
} else {
state.inline_buffer.push(InlineElement::Link {
text: state.link_text.clone(),
url: state.link_url.clone(),
title: None,
line_offset,
});
state
.paragraph_buffer
.push_str(&format!("[{}]({})", state.link_text, state.link_url));
}
state.link_text.clear();
state.link_url.clear();
state.saved_link_url.clear();
state.image_in_link = false;
}
Event::Start(Tag::Image {
dest_url, title, ..
}) => {
if state.in_link {
state.image_in_link = true;
state.saved_link_url = state.link_url.clone();
}
state.in_image = true;
state.link_url = dest_url.to_string();
state.link_text.clear();
state.paragraph_buffer = title.to_string();
}
Event::End(TagEnd::Image) => {
state.in_image = false;
if !state.image_in_link {
let title = if state.paragraph_buffer.is_empty() {
None
} else {
Some(state.paragraph_buffer.clone())
};
let line_offset = if state.in_list && state.item_depth >= 1 {
Some(state.nested_line_offset)
} else {
None
};
if state.in_paragraph {
state.paragraph_buffer.clear();
state.inline_buffer.push(InlineElement::Image {
alt: state.link_text.clone(),
src: state.link_url.clone(),
title,
line_offset,
});
state
.paragraph_buffer
.push_str(&format!("", state.link_text, state.link_url));
} else {
state.flush_paragraph(blocks);
blocks.push(ContentBlock::Image {
alt: state.link_text.clone(),
src: state.link_url.clone(),
title,
});
state.paragraph_buffer.clear();
}
state.link_text.clear();
state.link_url.clear();
}
}
Event::Text(text) => {
if state.in_code {
state.code_buffer.push_str(&text);
} else if state.in_blockquote {
state.blockquote_buffer.push_str(&text);
} else if state.in_heading {
state.heading_buffer.push_str(&text);
let element = if state.in_code_inline {
InlineElement::Code {
value: text.to_string(),
}
} else if state.in_strong {
InlineElement::Strong {
value: text.to_string(),
}
} else if state.in_emphasis {
InlineElement::Emphasis {
value: text.to_string(),
}
} else {
InlineElement::Text {
value: text.to_string(),
}
};
state.heading_inline.push(element);
} else if state.in_link || state.in_image {
state.link_text.push_str(&text);
} else {
if state.in_list && state.item_depth > 1 {
if !state.paragraph_buffer.is_empty() && !state.paragraph_buffer.ends_with('\n')
{
state.paragraph_buffer.push('\n');
}
let indent = " ".repeat(state.item_depth - 1);
state.paragraph_buffer.push_str(&indent);
if let Some(checked) = state.task_list_marker {
let marker = if checked { "[x] " } else { "[ ] " };
state.paragraph_buffer.push_str(marker);
state.task_list_marker = None;
}
}
state.add_inline_text(&text);
}
}
Event::SoftBreak => {
if state.in_paragraph {
state.paragraph_buffer.push(' ');
state.inline_buffer.push(InlineElement::Text {
value: " ".to_string(),
});
}
}
Event::HardBreak => {
if state.in_paragraph {
state.paragraph_buffer.push('\n');
state.inline_buffer.push(InlineElement::Text {
value: "\n".to_string(),
});
}
}
Event::Rule => {
state.flush_paragraph(blocks);
blocks.push(ContentBlock::HorizontalRule);
}
Event::Start(Tag::Heading { level, .. }) => {
state.flush_paragraph(blocks);
state.in_heading = true;
state.heading_level = Some(level as usize);
state.heading_buffer.clear();
state.heading_inline.clear();
}
Event::End(TagEnd::Heading(_)) => {
if state.in_heading
&& !state.heading_buffer.is_empty()
&& let Some(level) = state.heading_level
{
let anchor = Some(slugify(&state.heading_buffer));
blocks.push(ContentBlock::Heading {
level,
content: state.heading_buffer.clone(),
inline: state.heading_inline.clone(),
anchor,
});
}
state.in_heading = false;
state.heading_level = None;
state.heading_buffer.clear();
state.heading_inline.clear();
}
_ => {}
}
}
pub fn slugify(text: &str) -> String {
text.to_lowercase()
.chars()
.map(|c| {
if c.is_alphanumeric() {
c
} else if c.is_whitespace() || c == '-' {
'-'
} else {
'\0'
}
})
.filter(|&c| c != '\0')
.collect::<String>()
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("-")
}
fn collect_inline_elements(blocks: &[ContentBlock], output: &mut Vec<InlineElement>) {
for block in blocks {
match block {
ContentBlock::Paragraph { inline, .. } => {
output.extend(inline.iter().cloned());
}
ContentBlock::List { items, .. } => {
for item in items {
output.extend(item.inline.iter().cloned());
collect_inline_elements(&item.blocks, output);
}
}
ContentBlock::Blockquote { blocks, .. } => {
collect_inline_elements(blocks, output);
}
ContentBlock::Details { blocks, .. } => {
collect_inline_elements(blocks, output);
}
_ => {}
}
}
}
pub fn parse_blocks(markdown: &str) -> Vec<ContentBlock> {
parse_blocks_from_line(markdown, 0)
}
pub fn parse_blocks_from_line(markdown: &str, start_line: usize) -> Vec<ContentBlock> {
let preprocessed = preprocess_wikilinks(markdown);
let preprocessed = preprocess_links_with_spaces(&preprocessed);
let (processed_markdown, details_blocks) = extract_details_blocks(&preprocessed);
let mut options = Options::empty();
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_TASKLISTS);
let parser = Parser::new_ext(&processed_markdown, options);
let mut blocks = Vec::new();
let mut state = BlockParserState::new(start_line);
for event in parser {
process_event(event, &mut state, &mut blocks);
}
state.finalize(&mut blocks);
let mut final_blocks = Vec::new();
for block in blocks {
let replaced = if let ContentBlock::Paragraph { content, .. } = &block {
let trimmed = content.trim();
trimmed
.strip_prefix("[DETAILS_BLOCK_")
.and_then(|s| s.strip_suffix(']'))
.and_then(|s| s.parse::<usize>().ok())
.and_then(|idx| details_blocks.get(idx).cloned())
} else {
None
};
final_blocks.push(replaced.unwrap_or(block));
}
final_blocks
}
pub fn to_plain_text(markdown: &str) -> String {
let blocks = parse_blocks(markdown);
blocks
.iter()
.map(ContentBlock::to_plain_text)
.collect::<Vec<_>>()
.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_paragraph() {
let markdown = "This is a simple paragraph.";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
assert!(matches!(blocks[0], ContentBlock::Paragraph { .. }));
if let ContentBlock::Paragraph { content, .. } = &blocks[0] {
assert_eq!(content, "This is a simple paragraph.");
}
}
#[test]
fn test_parse_heading() {
let markdown = "# Hello World";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Heading {
level,
content,
anchor,
..
} = &blocks[0]
{
assert_eq!(*level, 1);
assert_eq!(content, "Hello World");
assert_eq!(anchor.as_deref(), Some("hello-world"));
} else {
panic!("Expected Heading block");
}
}
#[test]
fn test_parse_code_block() {
let markdown = "```rust\nfn main() {}\n```";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Code {
language, content, ..
} = &blocks[0]
{
assert_eq!(language.as_deref(), Some("rust"));
assert_eq!(content, "fn main() {}");
} else {
panic!("Expected Code block");
}
}
#[test]
fn test_parse_unordered_list() {
let markdown = "- Item 1\n- Item 2\n- Item 3";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::List { ordered, items } = &blocks[0] {
assert!(!ordered);
assert_eq!(items.len(), 3);
assert_eq!(items[0].content, "Item 1");
assert_eq!(items[1].content, "Item 2");
assert_eq!(items[2].content, "Item 3");
} else {
panic!("Expected List block");
}
}
#[test]
fn test_parse_ordered_list() {
let markdown = "1. First\n2. Second\n3. Third";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::List { ordered, items } = &blocks[0] {
assert!(ordered);
assert_eq!(items.len(), 3);
} else {
panic!("Expected List block");
}
}
#[test]
fn test_parse_task_list() {
let markdown = "- [ ] Todo\n- [x] Done";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::List { items, .. } = &blocks[0] {
assert_eq!(items.len(), 2);
assert_eq!(items[0].checked, Some(false));
assert_eq!(items[0].content, "Todo");
assert_eq!(items[1].checked, Some(true));
assert_eq!(items[1].content, "Done");
} else {
panic!("Expected List block");
}
}
#[test]
fn test_parse_table() {
let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Table { headers, rows, .. } = &blocks[0] {
assert_eq!(headers.len(), 2);
assert_eq!(headers[0], "A");
assert_eq!(headers[1], "B");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0], "1");
assert_eq!(rows[0][1], "2");
} else {
panic!("Expected Table block");
}
}
#[test]
fn test_parse_blockquote() {
let markdown = "> This is a quote";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Blockquote { content, .. } = &blocks[0] {
assert!(content.contains("This is a quote"));
} else {
panic!("Expected Blockquote block");
}
}
#[test]
fn test_parse_horizontal_rule() {
let markdown = "Before\n\n---\n\nAfter";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 3);
assert!(matches!(blocks[1], ContentBlock::HorizontalRule));
}
#[test]
fn test_parse_inline_formatting() {
let markdown = "This has **bold** and *italic* and `code`.";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
assert!(
inline
.iter()
.any(|e| matches!(e, InlineElement::Strong { .. }))
);
assert!(
inline
.iter()
.any(|e| matches!(e, InlineElement::Emphasis { .. }))
);
assert!(
inline
.iter()
.any(|e| matches!(e, InlineElement::Code { .. }))
);
} else {
panic!("Expected Paragraph block");
}
}
#[test]
fn test_parse_link() {
let markdown = "See [example](https://example.com) for more.";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
let link = inline
.iter()
.find(|e| matches!(e, InlineElement::Link { .. }));
assert!(link.is_some());
if let Some(InlineElement::Link { text, url, .. }) = link {
assert_eq!(text, "example");
assert_eq!(url, "https://example.com");
}
} else {
panic!("Expected Paragraph block");
}
}
#[test]
fn test_wikilink_preprocessing() {
let markdown = "See [[Note]] and [[Other|display]] for info.";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
let links: Vec<_> = inline
.iter()
.filter(|e| matches!(e, InlineElement::Link { .. }))
.collect();
assert_eq!(links.len(), 2);
if let InlineElement::Link { text, url, .. } = &links[0] {
assert_eq!(text, "Note");
assert_eq!(url, "wikilink:Note");
}
if let InlineElement::Link { text, url, .. } = &links[1] {
assert_eq!(text, "display");
assert_eq!(url, "wikilink:Other");
}
} else {
panic!("Expected Paragraph block");
}
}
#[test]
fn test_list_with_nested_code() {
let markdown = r#"1. First item
```rust
code here
```
2. Second item"#;
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::List { items, .. } = &blocks[0] {
assert_eq!(items.len(), 2);
assert!(!items[0].blocks.is_empty());
assert!(matches!(items[0].blocks[0], ContentBlock::Code { .. }));
} else {
panic!("Expected List block");
}
}
#[test]
fn test_parse_image() {
let markdown = "";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
let img = inline
.iter()
.find(|e| matches!(e, InlineElement::Image { .. }));
assert!(img.is_some(), "Should have inline image");
} else {
panic!("Expected Paragraph block with inline image");
}
}
#[test]
fn test_parse_block_image() {
let markdown = "Some text\n\n";
let blocks = parse_blocks(markdown);
assert!(blocks.len() >= 2);
}
#[test]
fn test_parse_details_block() {
let markdown = r#"<details>
<summary>Click to expand</summary>
Inner content here.
</details>"#;
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Details {
summary,
blocks: inner,
..
} = &blocks[0]
{
assert_eq!(summary, "Click to expand");
assert!(!inner.is_empty());
} else {
panic!("Expected Details block");
}
}
#[test]
fn test_slugify() {
assert_eq!(slugify("Hello World"), "hello-world");
assert_eq!(slugify("API Reference"), "api-reference");
assert_eq!(slugify("1. Getting Started"), "1-getting-started");
assert_eq!(slugify("What's New?"), "whats-new");
}
#[test]
fn test_strikethrough() {
let markdown = "This is ~~deleted~~ text.";
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
assert!(
inline
.iter()
.any(|e| matches!(e, InlineElement::Strikethrough { .. }))
);
}
}
#[test]
fn test_indented_code_blocks_in_list_items() {
let markdown = r#"## Installation
1. Install from crates.io:
```bash
cargo install treemd
```
2. Or build from source:
```bash
git clone https://github.com/example/repo
cd repo
cargo install --path .
```"#;
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 2, "Expected 2 blocks (heading + list)");
assert!(
matches!(blocks[0], ContentBlock::Heading { level: 2, .. }),
"First block should be H2"
);
if let ContentBlock::List { ordered, items } = &blocks[1] {
assert!(ordered, "Should be an ordered list");
assert_eq!(items.len(), 2, "Should have 2 list items");
assert!(
!items[0].blocks.is_empty(),
"First item should have nested blocks"
);
assert!(
matches!(items[0].blocks[0], ContentBlock::Code { .. }),
"First item's nested block should be Code"
);
if let ContentBlock::Code {
language, content, ..
} = &items[0].blocks[0]
{
assert_eq!(language.as_deref(), Some("bash"));
assert!(content.contains("cargo install treemd"));
}
assert!(
!items[1].blocks.is_empty(),
"Second item should have nested blocks"
);
assert!(
matches!(items[1].blocks[0], ContentBlock::Code { .. }),
"Second item's nested block should be Code"
);
if let ContentBlock::Code {
language, content, ..
} = &items[1].blocks[0]
{
assert_eq!(language.as_deref(), Some("bash"));
assert!(content.contains("git clone"));
}
} else {
panic!("Expected List block");
}
}
#[test]
fn test_to_plain_text_simple_paragraph() {
let markdown = "This is a simple paragraph.";
let plain = to_plain_text(markdown);
assert_eq!(plain, "This is a simple paragraph.");
}
#[test]
fn test_to_plain_text_with_link() {
let markdown = "[Overview](#overview) and more text";
let plain = to_plain_text(markdown);
assert_eq!(plain, "Overview and more text");
}
#[test]
fn test_to_plain_text_with_bold_and_italic() {
let markdown = "This has **bold** and *italic* text.";
let plain = to_plain_text(markdown);
assert_eq!(plain, "This has bold and italic text.");
}
#[test]
fn test_to_plain_text_with_inline_code() {
let markdown = "Use the `println!` macro.";
let plain = to_plain_text(markdown);
assert_eq!(plain, "Use the println! macro.");
}
#[test]
fn test_to_plain_text_with_strikethrough() {
let markdown = "This is ~~deleted~~ text.";
let plain = to_plain_text(markdown);
assert_eq!(plain, "This is deleted text.");
}
#[test]
fn test_to_plain_text_wikilinks() {
let markdown = "See [[Note]] and [[Other|display]] for info.";
let plain = to_plain_text(markdown);
assert_eq!(plain, "See Note and display for info.");
}
#[test]
fn test_to_plain_text_heading() {
let markdown = "# Hello World";
let plain = to_plain_text(markdown);
assert_eq!(plain, "Hello World");
}
#[test]
fn test_to_plain_text_code_block() {
let markdown = "```rust\nfn main() {}\n```";
let plain = to_plain_text(markdown);
assert_eq!(plain, "fn main() {}");
}
#[test]
fn test_to_plain_text_list() {
let markdown = "- Item 1\n- Item 2\n- Item 3";
let plain = to_plain_text(markdown);
assert_eq!(plain, "Item 1\nItem 2\nItem 3");
}
#[test]
fn test_to_plain_text_table() {
let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
let plain = to_plain_text(markdown);
assert!(plain.contains("A\tB"));
assert!(plain.contains("1\t2"));
}
#[test]
fn test_to_plain_text_blockquote() {
let markdown = "> This is a quote";
let plain = to_plain_text(markdown);
assert!(plain.contains("This is a quote"));
}
#[test]
fn test_to_plain_text_image() {
let markdown = "";
let plain = to_plain_text(markdown);
assert_eq!(plain, "Alt text");
}
#[test]
fn test_to_plain_text_horizontal_rule() {
let markdown = "Before\n\n---\n\nAfter";
let plain = to_plain_text(markdown);
assert!(plain.contains("Before"));
assert!(plain.contains("After"));
}
#[test]
fn test_to_plain_text_complex_document() {
let markdown = r#"# Document Title
This is a paragraph with **bold** and *italic* text.
- [Link One](#one)
- [Link Two](#two)
- [Link Three](#three)
See [[WikiNote]] for more info."#;
let plain = to_plain_text(markdown);
assert!(plain.contains("Document Title"));
assert!(plain.contains("bold"));
assert!(plain.contains("italic"));
assert!(plain.contains("Link One"));
assert!(plain.contains("Link Two"));
assert!(plain.contains("WikiNote"));
assert!(!plain.contains("#one"));
assert!(!plain.contains("#two"));
}
#[test]
fn test_to_plain_text_treemd_use_case() {
let markdown = "[Overview](#overview)";
let plain = to_plain_text(markdown);
assert_eq!(plain, "Overview");
let o_count = plain.chars().filter(|c| *c == 'o' || *c == 'O').count();
assert_eq!(
o_count, 1,
"Should only count 'o' in visible text, not hidden anchor"
);
assert!(!plain.contains("#overview"));
assert!(!plain.contains("overview")); }
#[test]
fn test_to_plain_text_nested_formatting() {
let markdown = "**[bold link](url)** and *[italic link](url2)*";
let plain = to_plain_text(markdown);
assert!(plain.contains("bold link"));
assert!(plain.contains("italic link"));
assert!(!plain.contains("url"));
}
#[test]
fn test_nested_list_item_inline_elements() {
let markdown = r#"- [Features](#features)
- [Interactive TUI](#interactive-tui)
- [CLI Mode](#cli-mode)"#;
let blocks = parse_blocks(markdown);
assert_eq!(blocks.len(), 1);
if let ContentBlock::List { items, .. } = &blocks[0] {
assert_eq!(items.len(), 1, "Should have 1 top-level item");
let item = &items[0];
let links: Vec<_> = item
.inline
.iter()
.filter_map(|e| {
if let InlineElement::Link { text, url, .. } = e {
Some((text.as_str(), url.as_str()))
} else {
None
}
})
.collect();
assert_eq!(links.len(), 3, "Should have 3 links total");
assert!(
links.iter().any(|(text, _)| *text == "Features"),
"Should have Features link"
);
assert!(
links.iter().any(|(text, _)| *text == "Interactive TUI"),
"Should have Interactive TUI link"
);
assert!(
links.iter().any(|(text, _)| *text == "CLI Mode"),
"Should have CLI Mode link"
);
} else {
panic!("Expected List block");
}
}
#[test]
fn test_deeply_nested_list_inline_elements() {
let markdown = r#"- Level 1 [link1](url1)
- Level 2 [link2](url2)
- Level 3 [link3](url3)"#;
let blocks = parse_blocks(markdown);
if let ContentBlock::List { items, .. } = &blocks[0] {
let item = &items[0];
let links: Vec<_> = item
.inline
.iter()
.filter(|e| matches!(e, InlineElement::Link { .. }))
.collect();
assert_eq!(links.len(), 3, "Should collect all 3 nested links");
} else {
panic!("Expected List block");
}
}
#[test]
fn test_inline_element_line_offset() {
let markdown = r#"- [Features](#features)
- [Interactive TUI](#interactive-tui)
- [CLI Mode](#cli-mode)"#;
let blocks = parse_blocks(markdown);
if let ContentBlock::List { items, .. } = &blocks[0] {
let item = &items[0];
let links: Vec<_> = item
.inline
.iter()
.filter_map(|e| {
if let InlineElement::Link {
text, line_offset, ..
} = e
{
Some((text.as_str(), *line_offset))
} else {
None
}
})
.collect();
assert_eq!(links.len(), 3);
let features = links.iter().find(|(t, _)| *t == "Features").unwrap();
assert_eq!(features.1, Some(0), "Features should be on line 0");
let tui = links.iter().find(|(t, _)| *t == "Interactive TUI").unwrap();
assert_eq!(tui.1, Some(1), "Interactive TUI should be on line 1");
let cli = links.iter().find(|(t, _)| *t == "CLI Mode").unwrap();
assert_eq!(cli.1, Some(2), "CLI Mode should be on line 2");
} else {
panic!("Expected List block");
}
}
#[test]
fn test_line_offset_not_set_outside_lists() {
let markdown = "See [example](url) for more.";
let blocks = parse_blocks(markdown);
if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
let link = inline
.iter()
.find(|e| matches!(e, InlineElement::Link { .. }));
if let Some(InlineElement::Link { line_offset, .. }) = link {
assert_eq!(
*line_offset, None,
"line_offset should be None outside lists"
);
}
} else {
panic!("Expected Paragraph block");
}
}
}