1use pulldown_cmark::{
14 Alignment as CmarkAlignment, CodeBlockKind, Event, Options, Parser, Tag, TagEnd,
15};
16use regex::Regex;
17use std::sync::LazyLock;
18use turbovault_core::{ContentBlock, InlineElement, ListItem, TableAlignment};
19
20static WIKILINK_RE: LazyLock<Regex> =
26 LazyLock::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap());
27
28fn preprocess_wikilinks(markdown: &str) -> String {
31 WIKILINK_RE
32 .replace_all(markdown, |caps: ®ex::Captures| {
33 let target = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
34 let alias = caps.get(2).map(|m| m.as_str().trim());
35 let display_text = alias.unwrap_or(target);
36 format!("[{}](wikilink:{})", display_text, target)
37 })
38 .to_string()
39}
40
41static LINK_WITH_SPACES_RE: LazyLock<Regex> =
43 LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)<>]+\s[^)<>]*)\)").unwrap());
44
45fn preprocess_links_with_spaces(markdown: &str) -> String {
47 LINK_WITH_SPACES_RE
48 .replace_all(markdown, |caps: ®ex::Captures| {
49 let text = &caps[1];
50 let url = &caps[2];
51 if url.contains(' ') {
52 format!("[{}](<{}>)", text, url)
53 } else {
54 caps[0].to_string()
55 }
56 })
57 .to_string()
58}
59
60fn extract_details_blocks(markdown: &str) -> (String, Vec<ContentBlock>) {
66 let mut details_blocks = Vec::new();
67 let mut result = String::new();
68 let mut current_pos = 0;
69
70 while current_pos < markdown.len() {
71 if markdown[current_pos..].starts_with("<details")
72 && let Some(tag_end) = markdown[current_pos..].find('>')
73 && let details_start = current_pos + tag_end + 1
74 && let Some(details_end_pos) = markdown[details_start..].find("</details>")
75 {
76 let details_end = details_start + details_end_pos;
77 let details_content = &markdown[details_start..details_end];
78
79 let summary = extract_summary(details_content);
81
82 let content_start = if let Some(summary_end_pos) = details_content.find("</summary>") {
84 let summary_tag_end = summary_end_pos + "</summary>".len();
85 &details_content[summary_tag_end..]
86 } else {
87 details_content
88 };
89
90 let content_trimmed = content_start.trim();
91
92 let nested_blocks = if !content_trimmed.is_empty() {
94 parse_blocks(content_trimmed)
95 } else {
96 Vec::new()
97 };
98
99 details_blocks.push(ContentBlock::Details {
100 summary,
101 content: content_trimmed.to_string(),
102 blocks: nested_blocks,
103 });
104
105 result.push_str(&format!("\n[DETAILS_BLOCK_{}]\n", details_blocks.len() - 1));
106 current_pos = details_end + "</details>".len();
107 continue;
108 }
109
110 if let Some(ch) = markdown[current_pos..].chars().next() {
111 result.push(ch);
112 current_pos += ch.len_utf8();
113 } else {
114 break;
115 }
116 }
117
118 (result, details_blocks)
119}
120
121fn extract_summary(details_content: &str) -> String {
123 if let Some(summary_start_pos) = details_content.find("<summary")
124 && let Some(summary_tag_end) = details_content[summary_start_pos..].find('>')
125 && let summary_content_start = summary_start_pos + summary_tag_end + 1
126 && let Some(summary_end_pos) = details_content[summary_content_start..].find("</summary>")
127 {
128 let summary_end = summary_content_start + summary_end_pos;
129 return details_content[summary_content_start..summary_end]
130 .trim()
131 .to_string();
132 }
133 String::new()
134}
135
136struct BlockParserState {
141 current_line: usize,
142 paragraph_buffer: String,
143 inline_buffer: Vec<InlineElement>,
144 list_items: Vec<ListItem>,
145 list_ordered: bool,
146 list_depth: usize,
147 item_depth: usize,
148 task_list_marker: Option<bool>,
149 saved_task_markers: Vec<Option<bool>>,
150 item_blocks: Vec<ContentBlock>,
151 code_buffer: String,
152 code_language: Option<String>,
153 code_start_line: usize,
154 blockquote_buffer: String,
155 table_headers: Vec<String>,
156 table_alignments: Vec<TableAlignment>,
157 table_rows: Vec<Vec<String>>,
158 current_row: Vec<String>,
159 heading_level: Option<usize>,
160 heading_buffer: String,
161 heading_inline: Vec<InlineElement>,
162 in_paragraph: bool,
163 in_list: bool,
164 in_code: bool,
165 in_blockquote: bool,
166 in_table: bool,
167 in_heading: bool,
168 in_strong: bool,
169 in_emphasis: bool,
170 in_strikethrough: bool,
171 in_code_inline: bool,
172 in_link: bool,
173 link_url: String,
174 link_text: String,
175 image_in_link: bool,
176 in_image: bool,
177 saved_link_url: String,
178 nested_line_offset: usize,
180}
181
182impl BlockParserState {
183 fn new(start_line: usize) -> Self {
184 Self {
185 current_line: start_line,
186 paragraph_buffer: String::new(),
187 inline_buffer: Vec::new(),
188 list_items: Vec::new(),
189 list_ordered: false,
190 list_depth: 0,
191 item_depth: 0,
192 task_list_marker: None,
193 saved_task_markers: Vec::new(),
194 item_blocks: Vec::new(),
195 code_buffer: String::new(),
196 code_language: None,
197 code_start_line: 0,
198 blockquote_buffer: String::new(),
199 table_headers: Vec::new(),
200 table_alignments: Vec::new(),
201 table_rows: Vec::new(),
202 current_row: Vec::new(),
203 heading_level: None,
204 heading_buffer: String::new(),
205 heading_inline: Vec::new(),
206 in_paragraph: false,
207 in_list: false,
208 in_code: false,
209 in_blockquote: false,
210 in_table: false,
211 in_heading: false,
212 in_strong: false,
213 in_emphasis: false,
214 in_strikethrough: false,
215 in_code_inline: false,
216 in_link: false,
217 link_url: String::new(),
218 link_text: String::new(),
219 image_in_link: false,
220 in_image: false,
221 saved_link_url: String::new(),
222 nested_line_offset: 0,
223 }
224 }
225
226 fn finalize(&mut self, blocks: &mut Vec<ContentBlock>) {
227 self.flush_paragraph(blocks);
228 self.flush_list(blocks);
229 self.flush_code(blocks);
230 self.flush_blockquote(blocks);
231 self.flush_table(blocks);
232 }
233
234 fn flush_paragraph(&mut self, blocks: &mut Vec<ContentBlock>) {
235 if self.in_paragraph && !self.paragraph_buffer.is_empty() {
236 blocks.push(ContentBlock::Paragraph {
237 content: self.paragraph_buffer.clone(),
238 inline: self.inline_buffer.clone(),
239 });
240 self.paragraph_buffer.clear();
241 self.inline_buffer.clear();
242 self.in_paragraph = false;
243 }
244 }
245
246 fn flush_list(&mut self, blocks: &mut Vec<ContentBlock>) {
247 if self.in_list && !self.list_items.is_empty() {
248 blocks.push(ContentBlock::List {
249 ordered: self.list_ordered,
250 items: self.list_items.clone(),
251 });
252 self.list_items.clear();
253 self.in_list = false;
254 }
255 }
256
257 fn flush_code(&mut self, blocks: &mut Vec<ContentBlock>) {
258 if self.in_code && !self.code_buffer.is_empty() {
259 blocks.push(ContentBlock::Code {
260 language: self.code_language.clone(),
261 content: self.code_buffer.trim_end().to_string(),
262 start_line: self.code_start_line,
263 end_line: self.current_line,
264 });
265 self.code_buffer.clear();
266 self.code_language = None;
267 self.in_code = false;
268 }
269 }
270
271 fn flush_blockquote(&mut self, blocks: &mut Vec<ContentBlock>) {
272 if self.in_blockquote && !self.blockquote_buffer.is_empty() {
273 let nested_blocks = parse_blocks(&self.blockquote_buffer);
274 blocks.push(ContentBlock::Blockquote {
275 content: self.blockquote_buffer.clone(),
276 blocks: nested_blocks,
277 });
278 self.blockquote_buffer.clear();
279 self.in_blockquote = false;
280 }
281 }
282
283 fn flush_table(&mut self, blocks: &mut Vec<ContentBlock>) {
284 if self.in_table && !self.table_headers.is_empty() {
285 blocks.push(ContentBlock::Table {
286 headers: self.table_headers.clone(),
287 alignments: self.table_alignments.clone(),
288 rows: self.table_rows.clone(),
289 });
290 self.table_headers.clear();
291 self.table_alignments.clear();
292 self.table_rows.clear();
293 self.current_row.clear();
294 self.paragraph_buffer.clear();
295 self.inline_buffer.clear();
296 self.in_table = false;
297 }
298 }
299
300 fn add_inline_text(&mut self, text: &str) {
301 if text.is_empty() {
302 return;
303 }
304
305 let element = if self.in_code_inline {
306 InlineElement::Code {
307 value: text.to_string(),
308 }
309 } else if self.in_strong {
310 InlineElement::Strong {
311 value: text.to_string(),
312 }
313 } else if self.in_emphasis {
314 InlineElement::Emphasis {
315 value: text.to_string(),
316 }
317 } else if self.in_strikethrough {
318 InlineElement::Strikethrough {
319 value: text.to_string(),
320 }
321 } else {
322 InlineElement::Text {
323 value: text.to_string(),
324 }
325 };
326
327 self.inline_buffer.push(element);
328 self.paragraph_buffer.push_str(text);
329 }
330}
331
332#[allow(clippy::too_many_lines)]
337fn process_event(event: Event, state: &mut BlockParserState, blocks: &mut Vec<ContentBlock>) {
338 match event {
339 Event::Start(Tag::Paragraph) => {
340 state.in_paragraph = true;
341 }
342 Event::End(TagEnd::Paragraph) => {
343 if state.item_depth >= 1 && state.in_paragraph && !state.paragraph_buffer.is_empty() {
344 state.item_blocks.push(ContentBlock::Paragraph {
345 content: state.paragraph_buffer.clone(),
346 inline: state.inline_buffer.clone(),
347 });
348 state.paragraph_buffer.clear();
349 state.inline_buffer.clear();
350 state.in_paragraph = false;
351 } else {
352 state.flush_paragraph(blocks);
353 }
354 }
355 Event::Start(Tag::CodeBlock(kind)) => {
356 state.in_code = true;
357 state.code_start_line = state.current_line;
358 state.code_language = match kind {
359 CodeBlockKind::Fenced(lang) => {
360 if lang.is_empty() {
361 None
362 } else {
363 Some(lang.to_string())
364 }
365 }
366 CodeBlockKind::Indented => None,
367 };
368 }
369 Event::End(TagEnd::CodeBlock) => {
370 if state.item_depth >= 1 && state.in_code && !state.code_buffer.is_empty() {
371 state.item_blocks.push(ContentBlock::Code {
372 language: state.code_language.clone(),
373 content: state.code_buffer.trim_end().to_string(),
374 start_line: state.code_start_line,
375 end_line: state.current_line,
376 });
377 state.code_buffer.clear();
378 state.code_language = None;
379 state.in_code = false;
380 } else {
381 state.flush_code(blocks);
382 }
383 }
384 Event::Start(Tag::List(start_number)) => {
385 state.list_depth += 1;
386 if state.list_depth == 1 {
387 state.in_list = true;
388 state.list_ordered = start_number.is_some();
389 }
390 }
391 Event::End(TagEnd::List(_)) => {
392 state.list_depth = state.list_depth.saturating_sub(1);
393 if state.list_depth == 0 {
394 state.flush_list(blocks);
395 }
396 }
397 Event::Start(Tag::Item) => {
398 state.item_depth += 1;
399 if state.item_depth > 1 {
400 state.saved_task_markers.push(state.task_list_marker);
401 state.task_list_marker = None;
402 }
403 if state.item_depth == 1 {
404 state.paragraph_buffer.clear();
405 state.inline_buffer.clear();
406 state.item_blocks.clear();
407 state.nested_line_offset = 0;
408 }
409 }
410 Event::End(TagEnd::Item) => {
411 if state.item_depth > 1
412 && let Some(saved) = state.saved_task_markers.pop()
413 {
414 state.task_list_marker = saved;
415 }
416 if state.item_depth == 1 {
417 let (content, mut inline, remaining_blocks) = if !state.paragraph_buffer.is_empty()
418 {
419 let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
420 (
421 state.paragraph_buffer.clone(),
422 state.inline_buffer.clone(),
423 all_blocks,
424 )
425 } else if let Some(ContentBlock::Paragraph { content, inline }) =
426 state.item_blocks.first().cloned()
427 {
428 let remaining: Vec<ContentBlock> = state.item_blocks.drain(1..).collect();
429 (content, inline, remaining)
430 } else {
431 let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
432 (String::new(), Vec::new(), all_blocks)
433 };
434
435 collect_inline_elements(&remaining_blocks, &mut inline);
437
438 state.list_items.push(ListItem {
439 checked: state.task_list_marker,
440 content,
441 inline,
442 blocks: remaining_blocks,
443 });
444 state.paragraph_buffer.clear();
445 state.inline_buffer.clear();
446 state.item_blocks.clear();
447 state.task_list_marker = None;
448 }
449 state.item_depth = state.item_depth.saturating_sub(1);
450 }
451 Event::TaskListMarker(checked) => {
452 state.task_list_marker = Some(checked);
453 }
454 Event::Start(Tag::BlockQuote(_)) => {
455 state.in_blockquote = true;
456 }
457 Event::End(TagEnd::BlockQuote(_)) => {
458 state.flush_blockquote(blocks);
459 }
460 Event::Start(Tag::Table(alignments)) => {
461 state.in_table = true;
462 state.table_alignments = alignments
463 .iter()
464 .map(|a| match a {
465 CmarkAlignment::Left => TableAlignment::Left,
466 CmarkAlignment::Center => TableAlignment::Center,
467 CmarkAlignment::Right => TableAlignment::Right,
468 CmarkAlignment::None => TableAlignment::None,
469 })
470 .collect();
471 }
472 Event::End(TagEnd::Table) => {
473 state.flush_table(blocks);
474 }
475 Event::Start(Tag::TableHead) => {}
476 Event::End(TagEnd::TableHead) => {
477 state.table_headers = state.current_row.clone();
478 state.current_row.clear();
479 }
480 Event::Start(Tag::TableRow) => {}
481 Event::End(TagEnd::TableRow) => {
482 state.table_rows.push(state.current_row.clone());
483 state.current_row.clear();
484 }
485 Event::Start(Tag::TableCell) => {
486 state.paragraph_buffer.clear();
487 state.inline_buffer.clear();
488 }
489 Event::End(TagEnd::TableCell) => {
490 state.current_row.push(state.paragraph_buffer.clone());
491 state.paragraph_buffer.clear();
492 state.inline_buffer.clear();
493 }
494 Event::Start(Tag::Strong) => {
495 state.in_strong = true;
496 }
497 Event::End(TagEnd::Strong) => {
498 state.in_strong = false;
499 }
500 Event::Start(Tag::Emphasis) => {
501 state.in_emphasis = true;
502 }
503 Event::End(TagEnd::Emphasis) => {
504 state.in_emphasis = false;
505 }
506 Event::Start(Tag::Strikethrough) => {
507 state.in_strikethrough = true;
508 }
509 Event::End(TagEnd::Strikethrough) => {
510 state.in_strikethrough = false;
511 }
512 Event::Code(text) => {
513 state.in_code_inline = true;
514 state.add_inline_text(&text);
515 state.in_code_inline = false;
516 }
517 Event::Start(Tag::Link { dest_url, .. }) => {
518 if state.in_list && state.item_depth > 1 {
521 if !state.paragraph_buffer.is_empty() && !state.paragraph_buffer.ends_with('\n') {
522 state.paragraph_buffer.push('\n');
523 state.nested_line_offset += 1;
524 }
525 let indent = " ".repeat(state.item_depth - 1);
526 state.paragraph_buffer.push_str(&indent);
527
528 if let Some(checked) = state.task_list_marker {
529 let marker = if checked { "[x] " } else { "[ ] " };
530 state.paragraph_buffer.push_str(marker);
531 state.task_list_marker = None;
532 }
533 }
534 state.in_link = true;
535 state.link_url = dest_url.to_string();
536 state.link_text.clear();
537 }
538 Event::End(TagEnd::Link) => {
539 state.in_link = false;
540
541 let line_offset = if state.in_list && state.item_depth >= 1 {
543 Some(state.nested_line_offset)
544 } else {
545 None
546 };
547
548 if state.image_in_link {
549 state.inline_buffer.push(InlineElement::Link {
550 text: state.link_text.clone(),
551 url: state.saved_link_url.clone(),
552 title: None,
553 line_offset,
554 });
555 state
556 .paragraph_buffer
557 .push_str(&format!("[{}]({})", state.link_text, state.saved_link_url));
558 } else {
559 state.inline_buffer.push(InlineElement::Link {
560 text: state.link_text.clone(),
561 url: state.link_url.clone(),
562 title: None,
563 line_offset,
564 });
565 state
566 .paragraph_buffer
567 .push_str(&format!("[{}]({})", state.link_text, state.link_url));
568 }
569
570 state.link_text.clear();
571 state.link_url.clear();
572 state.saved_link_url.clear();
573 state.image_in_link = false;
574 }
575 Event::Start(Tag::Image {
576 dest_url, title, ..
577 }) => {
578 if state.in_link {
579 state.image_in_link = true;
580 state.saved_link_url = state.link_url.clone();
581 }
582 state.in_image = true;
583 state.link_url = dest_url.to_string();
584 state.link_text.clear();
585 state.paragraph_buffer = title.to_string();
586 }
587 Event::End(TagEnd::Image) => {
588 state.in_image = false;
589
590 if !state.image_in_link {
591 let title = if state.paragraph_buffer.is_empty() {
593 None
594 } else {
595 Some(state.paragraph_buffer.clone())
596 };
597
598 let line_offset = if state.in_list && state.item_depth >= 1 {
600 Some(state.nested_line_offset)
601 } else {
602 None
603 };
604
605 if state.in_paragraph {
606 state.paragraph_buffer.clear();
608 state.inline_buffer.push(InlineElement::Image {
609 alt: state.link_text.clone(),
610 src: state.link_url.clone(),
611 title,
612 line_offset,
613 });
614 state
616 .paragraph_buffer
617 .push_str(&format!("", state.link_text, state.link_url));
618 } else {
619 state.flush_paragraph(blocks);
620 blocks.push(ContentBlock::Image {
621 alt: state.link_text.clone(),
622 src: state.link_url.clone(),
623 title,
624 });
625 state.paragraph_buffer.clear();
626 }
627
628 state.link_text.clear();
629 state.link_url.clear();
630 }
631 }
632 Event::Text(text) => {
633 if state.in_code {
634 state.code_buffer.push_str(&text);
635 } else if state.in_blockquote {
636 state.blockquote_buffer.push_str(&text);
637 } else if state.in_heading {
638 state.heading_buffer.push_str(&text);
639 let element = if state.in_code_inline {
640 InlineElement::Code {
641 value: text.to_string(),
642 }
643 } else if state.in_strong {
644 InlineElement::Strong {
645 value: text.to_string(),
646 }
647 } else if state.in_emphasis {
648 InlineElement::Emphasis {
649 value: text.to_string(),
650 }
651 } else {
652 InlineElement::Text {
653 value: text.to_string(),
654 }
655 };
656 state.heading_inline.push(element);
657 } else if state.in_link || state.in_image {
658 state.link_text.push_str(&text);
659 } else {
660 if state.in_list && state.item_depth > 1 {
661 if !state.paragraph_buffer.is_empty() && !state.paragraph_buffer.ends_with('\n')
662 {
663 state.paragraph_buffer.push('\n');
664 }
665 let indent = " ".repeat(state.item_depth - 1);
666 state.paragraph_buffer.push_str(&indent);
667
668 if let Some(checked) = state.task_list_marker {
669 let marker = if checked { "[x] " } else { "[ ] " };
670 state.paragraph_buffer.push_str(marker);
671 state.task_list_marker = None;
672 }
673 }
674 state.add_inline_text(&text);
675 }
676 }
677 Event::SoftBreak => {
678 if state.in_paragraph {
679 state.paragraph_buffer.push(' ');
680 state.inline_buffer.push(InlineElement::Text {
681 value: " ".to_string(),
682 });
683 }
684 }
685 Event::HardBreak => {
686 if state.in_paragraph {
687 state.paragraph_buffer.push('\n');
688 state.inline_buffer.push(InlineElement::Text {
689 value: "\n".to_string(),
690 });
691 }
692 }
693 Event::Rule => {
694 state.flush_paragraph(blocks);
695 blocks.push(ContentBlock::HorizontalRule);
696 }
697 Event::Start(Tag::Heading { level, .. }) => {
698 state.flush_paragraph(blocks);
699 state.in_heading = true;
700 state.heading_level = Some(level as usize);
701 state.heading_buffer.clear();
702 state.heading_inline.clear();
703 }
704 Event::End(TagEnd::Heading(_)) => {
705 if state.in_heading
706 && !state.heading_buffer.is_empty()
707 && let Some(level) = state.heading_level
708 {
709 let anchor = Some(slugify(&state.heading_buffer));
710 blocks.push(ContentBlock::Heading {
711 level,
712 content: state.heading_buffer.clone(),
713 inline: state.heading_inline.clone(),
714 anchor,
715 });
716 }
717 state.in_heading = false;
718 state.heading_level = None;
719 state.heading_buffer.clear();
720 state.heading_inline.clear();
721 }
722 _ => {}
723 }
724}
725
726pub fn slugify(text: &str) -> String {
732 text.to_lowercase()
733 .chars()
734 .map(|c| {
735 if c.is_alphanumeric() {
736 c
737 } else if c.is_whitespace() || c == '-' {
738 '-'
739 } else {
740 '\0'
741 }
742 })
743 .filter(|&c| c != '\0')
744 .collect::<String>()
745 .split('-')
746 .filter(|s| !s.is_empty())
747 .collect::<Vec<_>>()
748 .join("-")
749}
750
751fn collect_inline_elements(blocks: &[ContentBlock], output: &mut Vec<InlineElement>) {
761 for block in blocks {
762 match block {
763 ContentBlock::Paragraph { inline, .. } => {
764 output.extend(inline.iter().cloned());
765 }
766 ContentBlock::List { items, .. } => {
767 for item in items {
768 output.extend(item.inline.iter().cloned());
769 collect_inline_elements(&item.blocks, output);
770 }
771 }
772 ContentBlock::Blockquote { blocks, .. } => {
773 collect_inline_elements(blocks, output);
774 }
775 ContentBlock::Details { blocks, .. } => {
776 collect_inline_elements(blocks, output);
777 }
778 _ => {}
781 }
782 }
783}
784
785pub fn parse_blocks(markdown: &str) -> Vec<ContentBlock> {
808 parse_blocks_from_line(markdown, 0)
809}
810
811pub fn parse_blocks_from_line(markdown: &str, start_line: usize) -> Vec<ContentBlock> {
815 let preprocessed = preprocess_wikilinks(markdown);
817
818 let preprocessed = preprocess_links_with_spaces(&preprocessed);
820
821 let (processed_markdown, details_blocks) = extract_details_blocks(&preprocessed);
823
824 let mut options = Options::empty();
826 options.insert(Options::ENABLE_TABLES);
827 options.insert(Options::ENABLE_STRIKETHROUGH);
828 options.insert(Options::ENABLE_TASKLISTS);
829
830 let parser = Parser::new_ext(&processed_markdown, options);
831 let mut blocks = Vec::new();
832 let mut state = BlockParserState::new(start_line);
833
834 for event in parser {
835 process_event(event, &mut state, &mut blocks);
836 }
837
838 state.finalize(&mut blocks);
839
840 let mut final_blocks = Vec::new();
842 for block in blocks {
843 let replaced = if let ContentBlock::Paragraph { content, .. } = &block {
844 let trimmed = content.trim();
845 trimmed
846 .strip_prefix("[DETAILS_BLOCK_")
847 .and_then(|s| s.strip_suffix(']'))
848 .and_then(|s| s.parse::<usize>().ok())
849 .and_then(|idx| details_blocks.get(idx).cloned())
850 } else {
851 None
852 };
853
854 final_blocks.push(replaced.unwrap_or(block));
855 }
856
857 final_blocks
858}
859
860pub fn to_plain_text(markdown: &str) -> String {
898 let blocks = parse_blocks(markdown);
899 blocks
900 .iter()
901 .map(ContentBlock::to_plain_text)
902 .collect::<Vec<_>>()
903 .join("\n")
904}
905
906#[cfg(test)]
911mod tests {
912 use super::*;
913
914 #[test]
915 fn test_parse_paragraph() {
916 let markdown = "This is a simple paragraph.";
917 let blocks = parse_blocks(markdown);
918
919 assert_eq!(blocks.len(), 1);
920 assert!(matches!(blocks[0], ContentBlock::Paragraph { .. }));
921 if let ContentBlock::Paragraph { content, .. } = &blocks[0] {
922 assert_eq!(content, "This is a simple paragraph.");
923 }
924 }
925
926 #[test]
927 fn test_parse_heading() {
928 let markdown = "# Hello World";
929 let blocks = parse_blocks(markdown);
930
931 assert_eq!(blocks.len(), 1);
932 if let ContentBlock::Heading {
933 level,
934 content,
935 anchor,
936 ..
937 } = &blocks[0]
938 {
939 assert_eq!(*level, 1);
940 assert_eq!(content, "Hello World");
941 assert_eq!(anchor.as_deref(), Some("hello-world"));
942 } else {
943 panic!("Expected Heading block");
944 }
945 }
946
947 #[test]
948 fn test_parse_code_block() {
949 let markdown = "```rust\nfn main() {}\n```";
950 let blocks = parse_blocks(markdown);
951
952 assert_eq!(blocks.len(), 1);
953 if let ContentBlock::Code {
954 language, content, ..
955 } = &blocks[0]
956 {
957 assert_eq!(language.as_deref(), Some("rust"));
958 assert_eq!(content, "fn main() {}");
959 } else {
960 panic!("Expected Code block");
961 }
962 }
963
964 #[test]
965 fn test_parse_unordered_list() {
966 let markdown = "- Item 1\n- Item 2\n- Item 3";
967 let blocks = parse_blocks(markdown);
968
969 assert_eq!(blocks.len(), 1);
970 if let ContentBlock::List { ordered, items } = &blocks[0] {
971 assert!(!ordered);
972 assert_eq!(items.len(), 3);
973 assert_eq!(items[0].content, "Item 1");
974 assert_eq!(items[1].content, "Item 2");
975 assert_eq!(items[2].content, "Item 3");
976 } else {
977 panic!("Expected List block");
978 }
979 }
980
981 #[test]
982 fn test_parse_ordered_list() {
983 let markdown = "1. First\n2. Second\n3. Third";
984 let blocks = parse_blocks(markdown);
985
986 assert_eq!(blocks.len(), 1);
987 if let ContentBlock::List { ordered, items } = &blocks[0] {
988 assert!(ordered);
989 assert_eq!(items.len(), 3);
990 } else {
991 panic!("Expected List block");
992 }
993 }
994
995 #[test]
996 fn test_parse_task_list() {
997 let markdown = "- [ ] Todo\n- [x] Done";
998 let blocks = parse_blocks(markdown);
999
1000 assert_eq!(blocks.len(), 1);
1001 if let ContentBlock::List { items, .. } = &blocks[0] {
1002 assert_eq!(items.len(), 2);
1003 assert_eq!(items[0].checked, Some(false));
1004 assert_eq!(items[0].content, "Todo");
1005 assert_eq!(items[1].checked, Some(true));
1006 assert_eq!(items[1].content, "Done");
1007 } else {
1008 panic!("Expected List block");
1009 }
1010 }
1011
1012 #[test]
1013 fn test_parse_table() {
1014 let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
1015 let blocks = parse_blocks(markdown);
1016
1017 assert_eq!(blocks.len(), 1);
1018 if let ContentBlock::Table { headers, rows, .. } = &blocks[0] {
1019 assert_eq!(headers.len(), 2);
1020 assert_eq!(headers[0], "A");
1021 assert_eq!(headers[1], "B");
1022 assert_eq!(rows.len(), 1);
1023 assert_eq!(rows[0][0], "1");
1024 assert_eq!(rows[0][1], "2");
1025 } else {
1026 panic!("Expected Table block");
1027 }
1028 }
1029
1030 #[test]
1031 fn test_parse_blockquote() {
1032 let markdown = "> This is a quote";
1033 let blocks = parse_blocks(markdown);
1034
1035 assert_eq!(blocks.len(), 1);
1036 if let ContentBlock::Blockquote { content, .. } = &blocks[0] {
1037 assert!(content.contains("This is a quote"));
1038 } else {
1039 panic!("Expected Blockquote block");
1040 }
1041 }
1042
1043 #[test]
1044 fn test_parse_horizontal_rule() {
1045 let markdown = "Before\n\n---\n\nAfter";
1046 let blocks = parse_blocks(markdown);
1047
1048 assert_eq!(blocks.len(), 3);
1049 assert!(matches!(blocks[1], ContentBlock::HorizontalRule));
1050 }
1051
1052 #[test]
1053 fn test_parse_inline_formatting() {
1054 let markdown = "This has **bold** and *italic* and `code`.";
1055 let blocks = parse_blocks(markdown);
1056
1057 assert_eq!(blocks.len(), 1);
1058 if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1059 assert!(
1060 inline
1061 .iter()
1062 .any(|e| matches!(e, InlineElement::Strong { .. }))
1063 );
1064 assert!(
1065 inline
1066 .iter()
1067 .any(|e| matches!(e, InlineElement::Emphasis { .. }))
1068 );
1069 assert!(
1070 inline
1071 .iter()
1072 .any(|e| matches!(e, InlineElement::Code { .. }))
1073 );
1074 } else {
1075 panic!("Expected Paragraph block");
1076 }
1077 }
1078
1079 #[test]
1080 fn test_parse_link() {
1081 let markdown = "See [example](https://example.com) for more.";
1082 let blocks = parse_blocks(markdown);
1083
1084 assert_eq!(blocks.len(), 1);
1085 if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1086 let link = inline
1087 .iter()
1088 .find(|e| matches!(e, InlineElement::Link { .. }));
1089 assert!(link.is_some());
1090 if let Some(InlineElement::Link { text, url, .. }) = link {
1091 assert_eq!(text, "example");
1092 assert_eq!(url, "https://example.com");
1093 }
1094 } else {
1095 panic!("Expected Paragraph block");
1096 }
1097 }
1098
1099 #[test]
1100 fn test_wikilink_preprocessing() {
1101 let markdown = "See [[Note]] and [[Other|display]] for info.";
1102 let blocks = parse_blocks(markdown);
1103
1104 assert_eq!(blocks.len(), 1);
1105 if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1106 let links: Vec<_> = inline
1107 .iter()
1108 .filter(|e| matches!(e, InlineElement::Link { .. }))
1109 .collect();
1110 assert_eq!(links.len(), 2);
1111
1112 if let InlineElement::Link { text, url, .. } = &links[0] {
1113 assert_eq!(text, "Note");
1114 assert_eq!(url, "wikilink:Note");
1115 }
1116 if let InlineElement::Link { text, url, .. } = &links[1] {
1117 assert_eq!(text, "display");
1118 assert_eq!(url, "wikilink:Other");
1119 }
1120 } else {
1121 panic!("Expected Paragraph block");
1122 }
1123 }
1124
1125 #[test]
1126 fn test_list_with_nested_code() {
1127 let markdown = r#"1. First item
1128 ```rust
1129 code here
1130 ```
1131
11322. Second item"#;
1133
1134 let blocks = parse_blocks(markdown);
1135
1136 assert_eq!(blocks.len(), 1);
1137 if let ContentBlock::List { items, .. } = &blocks[0] {
1138 assert_eq!(items.len(), 2);
1139 assert!(!items[0].blocks.is_empty());
1140 assert!(matches!(items[0].blocks[0], ContentBlock::Code { .. }));
1141 } else {
1142 panic!("Expected List block");
1143 }
1144 }
1145
1146 #[test]
1147 fn test_parse_image() {
1148 let markdown = "";
1150 let blocks = parse_blocks(markdown);
1151
1152 assert_eq!(blocks.len(), 1);
1154 if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1155 let img = inline
1156 .iter()
1157 .find(|e| matches!(e, InlineElement::Image { .. }));
1158 assert!(img.is_some(), "Should have inline image");
1159 } else {
1160 panic!("Expected Paragraph block with inline image");
1161 }
1162 }
1163
1164 #[test]
1165 fn test_parse_block_image() {
1166 let markdown = "Some text\n\n";
1168 let blocks = parse_blocks(markdown);
1169
1170 assert!(blocks.len() >= 2);
1172 }
1173
1174 #[test]
1175 fn test_parse_details_block() {
1176 let markdown = r#"<details>
1177<summary>Click to expand</summary>
1178
1179Inner content here.
1180
1181</details>"#;
1182
1183 let blocks = parse_blocks(markdown);
1184
1185 assert_eq!(blocks.len(), 1);
1186 if let ContentBlock::Details {
1187 summary,
1188 blocks: inner,
1189 ..
1190 } = &blocks[0]
1191 {
1192 assert_eq!(summary, "Click to expand");
1193 assert!(!inner.is_empty());
1194 } else {
1195 panic!("Expected Details block");
1196 }
1197 }
1198
1199 #[test]
1200 fn test_slugify() {
1201 assert_eq!(slugify("Hello World"), "hello-world");
1202 assert_eq!(slugify("API Reference"), "api-reference");
1203 assert_eq!(slugify("1. Getting Started"), "1-getting-started");
1204 assert_eq!(slugify("What's New?"), "whats-new");
1205 }
1206
1207 #[test]
1208 fn test_strikethrough() {
1209 let markdown = "This is ~~deleted~~ text.";
1210 let blocks = parse_blocks(markdown);
1211
1212 assert_eq!(blocks.len(), 1);
1213 if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1214 assert!(
1215 inline
1216 .iter()
1217 .any(|e| matches!(e, InlineElement::Strikethrough { .. }))
1218 );
1219 }
1220 }
1221
1222 #[test]
1223 fn test_indented_code_blocks_in_list_items() {
1224 let markdown = r#"## Installation
1227
12281. Install from crates.io:
1229 ```bash
1230 cargo install treemd
1231 ```
1232
12332. Or build from source:
1234 ```bash
1235 git clone https://github.com/example/repo
1236 cd repo
1237 cargo install --path .
1238 ```"#;
1239
1240 let blocks = parse_blocks(markdown);
1241
1242 assert_eq!(blocks.len(), 2, "Expected 2 blocks (heading + list)");
1244 assert!(
1245 matches!(blocks[0], ContentBlock::Heading { level: 2, .. }),
1246 "First block should be H2"
1247 );
1248
1249 if let ContentBlock::List { ordered, items } = &blocks[1] {
1250 assert!(ordered, "Should be an ordered list");
1251 assert_eq!(items.len(), 2, "Should have 2 list items");
1252
1253 assert!(
1255 !items[0].blocks.is_empty(),
1256 "First item should have nested blocks"
1257 );
1258 assert!(
1259 matches!(items[0].blocks[0], ContentBlock::Code { .. }),
1260 "First item's nested block should be Code"
1261 );
1262 if let ContentBlock::Code {
1263 language, content, ..
1264 } = &items[0].blocks[0]
1265 {
1266 assert_eq!(language.as_deref(), Some("bash"));
1267 assert!(content.contains("cargo install treemd"));
1268 }
1269
1270 assert!(
1272 !items[1].blocks.is_empty(),
1273 "Second item should have nested blocks"
1274 );
1275 assert!(
1276 matches!(items[1].blocks[0], ContentBlock::Code { .. }),
1277 "Second item's nested block should be Code"
1278 );
1279 if let ContentBlock::Code {
1280 language, content, ..
1281 } = &items[1].blocks[0]
1282 {
1283 assert_eq!(language.as_deref(), Some("bash"));
1284 assert!(content.contains("git clone"));
1285 }
1286 } else {
1287 panic!("Expected List block");
1288 }
1289 }
1290
1291 #[test]
1296 fn test_to_plain_text_simple_paragraph() {
1297 let markdown = "This is a simple paragraph.";
1298 let plain = to_plain_text(markdown);
1299 assert_eq!(plain, "This is a simple paragraph.");
1300 }
1301
1302 #[test]
1303 fn test_to_plain_text_with_link() {
1304 let markdown = "[Overview](#overview) and more text";
1305 let plain = to_plain_text(markdown);
1306 assert_eq!(plain, "Overview and more text");
1307 }
1308
1309 #[test]
1310 fn test_to_plain_text_with_bold_and_italic() {
1311 let markdown = "This has **bold** and *italic* text.";
1312 let plain = to_plain_text(markdown);
1313 assert_eq!(plain, "This has bold and italic text.");
1314 }
1315
1316 #[test]
1317 fn test_to_plain_text_with_inline_code() {
1318 let markdown = "Use the `println!` macro.";
1319 let plain = to_plain_text(markdown);
1320 assert_eq!(plain, "Use the println! macro.");
1321 }
1322
1323 #[test]
1324 fn test_to_plain_text_with_strikethrough() {
1325 let markdown = "This is ~~deleted~~ text.";
1326 let plain = to_plain_text(markdown);
1327 assert_eq!(plain, "This is deleted text.");
1328 }
1329
1330 #[test]
1331 fn test_to_plain_text_wikilinks() {
1332 let markdown = "See [[Note]] and [[Other|display]] for info.";
1333 let plain = to_plain_text(markdown);
1334 assert_eq!(plain, "See Note and display for info.");
1335 }
1336
1337 #[test]
1338 fn test_to_plain_text_heading() {
1339 let markdown = "# Hello World";
1340 let plain = to_plain_text(markdown);
1341 assert_eq!(plain, "Hello World");
1342 }
1343
1344 #[test]
1345 fn test_to_plain_text_code_block() {
1346 let markdown = "```rust\nfn main() {}\n```";
1347 let plain = to_plain_text(markdown);
1348 assert_eq!(plain, "fn main() {}");
1349 }
1350
1351 #[test]
1352 fn test_to_plain_text_list() {
1353 let markdown = "- Item 1\n- Item 2\n- Item 3";
1354 let plain = to_plain_text(markdown);
1355 assert_eq!(plain, "Item 1\nItem 2\nItem 3");
1356 }
1357
1358 #[test]
1359 fn test_to_plain_text_table() {
1360 let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
1361 let plain = to_plain_text(markdown);
1362 assert!(plain.contains("A\tB"));
1364 assert!(plain.contains("1\t2"));
1365 }
1366
1367 #[test]
1368 fn test_to_plain_text_blockquote() {
1369 let markdown = "> This is a quote";
1370 let plain = to_plain_text(markdown);
1371 assert!(plain.contains("This is a quote"));
1372 }
1373
1374 #[test]
1375 fn test_to_plain_text_image() {
1376 let markdown = "";
1377 let plain = to_plain_text(markdown);
1378 assert_eq!(plain, "Alt text");
1379 }
1380
1381 #[test]
1382 fn test_to_plain_text_horizontal_rule() {
1383 let markdown = "Before\n\n---\n\nAfter";
1384 let plain = to_plain_text(markdown);
1385 assert!(plain.contains("Before"));
1387 assert!(plain.contains("After"));
1388 }
1389
1390 #[test]
1391 fn test_to_plain_text_complex_document() {
1392 let markdown = r#"# Document Title
1393
1394This is a paragraph with **bold** and *italic* text.
1395
1396- [Link One](#one)
1397- [Link Two](#two)
1398- [Link Three](#three)
1399
1400See [[WikiNote]] for more info."#;
1401
1402 let plain = to_plain_text(markdown);
1403
1404 assert!(plain.contains("Document Title"));
1406 assert!(plain.contains("bold"));
1408 assert!(plain.contains("italic"));
1409 assert!(plain.contains("Link One"));
1411 assert!(plain.contains("Link Two"));
1412 assert!(plain.contains("WikiNote"));
1414 assert!(!plain.contains("#one"));
1416 assert!(!plain.contains("#two"));
1417 }
1418
1419 #[test]
1420 fn test_to_plain_text_treemd_use_case() {
1421 let markdown = "[Overview](#overview)";
1425 let plain = to_plain_text(markdown);
1426 assert_eq!(plain, "Overview");
1427
1428 let o_count = plain.chars().filter(|c| *c == 'o' || *c == 'O').count();
1432 assert_eq!(
1433 o_count, 1,
1434 "Should only count 'o' in visible text, not hidden anchor"
1435 );
1436
1437 assert!(!plain.contains("#overview"));
1439 assert!(!plain.contains("overview")); }
1441
1442 #[test]
1443 fn test_to_plain_text_nested_formatting() {
1444 let markdown = "**[bold link](url)** and *[italic link](url2)*";
1446 let plain = to_plain_text(markdown);
1447 assert!(plain.contains("bold link"));
1449 assert!(plain.contains("italic link"));
1450 assert!(!plain.contains("url"));
1452 }
1453
1454 #[test]
1455 fn test_nested_list_item_inline_elements() {
1456 let markdown = r#"- [Features](#features)
1459 - [Interactive TUI](#interactive-tui)
1460 - [CLI Mode](#cli-mode)"#;
1461
1462 let blocks = parse_blocks(markdown);
1463 assert_eq!(blocks.len(), 1);
1464
1465 if let ContentBlock::List { items, .. } = &blocks[0] {
1466 assert_eq!(items.len(), 1, "Should have 1 top-level item");
1467
1468 let item = &items[0];
1469 let links: Vec<_> = item
1471 .inline
1472 .iter()
1473 .filter_map(|e| {
1474 if let InlineElement::Link { text, url, .. } = e {
1475 Some((text.as_str(), url.as_str()))
1476 } else {
1477 None
1478 }
1479 })
1480 .collect();
1481
1482 assert_eq!(links.len(), 3, "Should have 3 links total");
1483 assert!(
1484 links.iter().any(|(text, _)| *text == "Features"),
1485 "Should have Features link"
1486 );
1487 assert!(
1488 links.iter().any(|(text, _)| *text == "Interactive TUI"),
1489 "Should have Interactive TUI link"
1490 );
1491 assert!(
1492 links.iter().any(|(text, _)| *text == "CLI Mode"),
1493 "Should have CLI Mode link"
1494 );
1495 } else {
1496 panic!("Expected List block");
1497 }
1498 }
1499
1500 #[test]
1501 fn test_deeply_nested_list_inline_elements() {
1502 let markdown = r#"- Level 1 [link1](url1)
1504 - Level 2 [link2](url2)
1505 - Level 3 [link3](url3)"#;
1506
1507 let blocks = parse_blocks(markdown);
1508
1509 if let ContentBlock::List { items, .. } = &blocks[0] {
1510 let item = &items[0];
1511 let links: Vec<_> = item
1512 .inline
1513 .iter()
1514 .filter(|e| matches!(e, InlineElement::Link { .. }))
1515 .collect();
1516
1517 assert_eq!(links.len(), 3, "Should collect all 3 nested links");
1518 } else {
1519 panic!("Expected List block");
1520 }
1521 }
1522
1523 #[test]
1524 fn test_inline_element_line_offset() {
1525 let markdown = r#"- [Features](#features)
1527 - [Interactive TUI](#interactive-tui)
1528 - [CLI Mode](#cli-mode)"#;
1529
1530 let blocks = parse_blocks(markdown);
1531
1532 if let ContentBlock::List { items, .. } = &blocks[0] {
1533 let item = &items[0];
1534 let links: Vec<_> = item
1535 .inline
1536 .iter()
1537 .filter_map(|e| {
1538 if let InlineElement::Link {
1539 text, line_offset, ..
1540 } = e
1541 {
1542 Some((text.as_str(), *line_offset))
1543 } else {
1544 None
1545 }
1546 })
1547 .collect();
1548
1549 assert_eq!(links.len(), 3);
1550
1551 let features = links.iter().find(|(t, _)| *t == "Features").unwrap();
1553 assert_eq!(features.1, Some(0), "Features should be on line 0");
1554
1555 let tui = links.iter().find(|(t, _)| *t == "Interactive TUI").unwrap();
1557 assert_eq!(tui.1, Some(1), "Interactive TUI should be on line 1");
1558
1559 let cli = links.iter().find(|(t, _)| *t == "CLI Mode").unwrap();
1561 assert_eq!(cli.1, Some(2), "CLI Mode should be on line 2");
1562 } else {
1563 panic!("Expected List block");
1564 }
1565 }
1566
1567 #[test]
1568 fn test_line_offset_not_set_outside_lists() {
1569 let markdown = "See [example](url) for more.";
1571 let blocks = parse_blocks(markdown);
1572
1573 if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1574 let link = inline
1575 .iter()
1576 .find(|e| matches!(e, InlineElement::Link { .. }));
1577 if let Some(InlineElement::Link { line_offset, .. }) = link {
1578 assert_eq!(
1579 *line_offset, None,
1580 "line_offset should be None outside lists"
1581 );
1582 }
1583 } else {
1584 panic!("Expected Paragraph block");
1585 }
1586 }
1587}