turbovault_parser/
blocks.rs

1//! Block-level content parsing for markdown documents.
2//!
3//! This module provides full block-level parsing using pulldown-cmark,
4//! producing a structured representation of markdown content including:
5//! - Paragraphs, headings, code blocks
6//! - Lists (ordered, unordered, task lists)
7//! - Tables, blockquotes, images
8//! - HTML details blocks
9//!
10//! The parser handles inline formatting within blocks, producing
11//! `InlineElement` vectors for text content.
12
13use pulldown_cmark::{
14    Alignment as CmarkAlignment, CodeBlockKind, Event, Options, Parser, Tag, TagEnd,
15};
16use regex::Regex;
17use std::sync::LazyLock;
18use turbovault_core::{ContentBlock, InlineElement, ListItem, TableAlignment};
19
20// ============================================================================
21// Wikilink preprocessing (converts [[x]] to [x](wikilink:x) for pulldown-cmark)
22// ============================================================================
23
24/// Regex for wikilinks: [[target]] or [[target|alias]]
25static WIKILINK_RE: LazyLock<Regex> =
26    LazyLock::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap());
27
28/// Preprocess wikilinks to standard markdown links with wikilink: prefix.
29/// This allows pulldown-cmark to parse them as regular links.
30fn preprocess_wikilinks(markdown: &str) -> String {
31    WIKILINK_RE
32        .replace_all(markdown, |caps: &regex::Captures| {
33            let target = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
34            let alias = caps.get(2).map(|m| m.as_str().trim());
35            let display_text = alias.unwrap_or(target);
36            format!("[{}](wikilink:{})", display_text, target)
37        })
38        .to_string()
39}
40
41/// Regex for links with spaces in URL (not valid CommonMark but common in wikis)
42static LINK_WITH_SPACES_RE: LazyLock<Regex> =
43    LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)<>]+\s[^)<>]*)\)").unwrap());
44
45/// Preprocess links with spaces to angle bracket syntax.
46fn preprocess_links_with_spaces(markdown: &str) -> String {
47    LINK_WITH_SPACES_RE
48        .replace_all(markdown, |caps: &regex::Captures| {
49            let text = &caps[1];
50            let url = &caps[2];
51            if url.contains(' ') {
52                format!("[{}](<{}>)", text, url)
53            } else {
54                caps[0].to_string()
55            }
56        })
57        .to_string()
58}
59
60// ============================================================================
61// Details block extraction (HTML <details><summary>)
62// ============================================================================
63
64/// Extract HTML <details> blocks and replace with placeholders.
65fn extract_details_blocks(markdown: &str) -> (String, Vec<ContentBlock>) {
66    let mut details_blocks = Vec::new();
67    let mut result = String::new();
68    let mut current_pos = 0;
69
70    while current_pos < markdown.len() {
71        if markdown[current_pos..].starts_with("<details")
72            && let Some(tag_end) = markdown[current_pos..].find('>')
73            && let details_start = current_pos + tag_end + 1
74            && let Some(details_end_pos) = markdown[details_start..].find("</details>")
75        {
76            let details_end = details_start + details_end_pos;
77            let details_content = &markdown[details_start..details_end];
78
79            // Extract summary
80            let summary = extract_summary(details_content);
81
82            // Extract content after </summary>
83            let content_start = if let Some(summary_end_pos) = details_content.find("</summary>") {
84                let summary_tag_end = summary_end_pos + "</summary>".len();
85                &details_content[summary_tag_end..]
86            } else {
87                details_content
88            };
89
90            let content_trimmed = content_start.trim();
91
92            // Parse nested content
93            let nested_blocks = if !content_trimmed.is_empty() {
94                parse_blocks(content_trimmed)
95            } else {
96                Vec::new()
97            };
98
99            details_blocks.push(ContentBlock::Details {
100                summary,
101                content: content_trimmed.to_string(),
102                blocks: nested_blocks,
103            });
104
105            result.push_str(&format!("\n[DETAILS_BLOCK_{}]\n", details_blocks.len() - 1));
106            current_pos = details_end + "</details>".len();
107            continue;
108        }
109
110        if let Some(ch) = markdown[current_pos..].chars().next() {
111            result.push(ch);
112            current_pos += ch.len_utf8();
113        } else {
114            break;
115        }
116    }
117
118    (result, details_blocks)
119}
120
121/// Extract summary text from details content.
122fn extract_summary(details_content: &str) -> String {
123    if let Some(summary_start_pos) = details_content.find("<summary")
124        && let Some(summary_tag_end) = details_content[summary_start_pos..].find('>')
125        && let summary_content_start = summary_start_pos + summary_tag_end + 1
126        && let Some(summary_end_pos) = details_content[summary_content_start..].find("</summary>")
127    {
128        let summary_end = summary_content_start + summary_end_pos;
129        return details_content[summary_content_start..summary_end]
130            .trim()
131            .to_string();
132    }
133    String::new()
134}
135
136// ============================================================================
137// Parser state machine
138// ============================================================================
139
140struct BlockParserState {
141    current_line: usize,
142    paragraph_buffer: String,
143    inline_buffer: Vec<InlineElement>,
144    list_items: Vec<ListItem>,
145    list_ordered: bool,
146    list_depth: usize,
147    item_depth: usize,
148    task_list_marker: Option<bool>,
149    saved_task_markers: Vec<Option<bool>>,
150    item_blocks: Vec<ContentBlock>,
151    code_buffer: String,
152    code_language: Option<String>,
153    code_start_line: usize,
154    blockquote_buffer: String,
155    table_headers: Vec<String>,
156    table_alignments: Vec<TableAlignment>,
157    table_rows: Vec<Vec<String>>,
158    current_row: Vec<String>,
159    heading_level: Option<usize>,
160    heading_buffer: String,
161    heading_inline: Vec<InlineElement>,
162    in_paragraph: bool,
163    in_list: bool,
164    in_code: bool,
165    in_blockquote: bool,
166    in_table: bool,
167    in_heading: bool,
168    in_strong: bool,
169    in_emphasis: bool,
170    in_strikethrough: bool,
171    in_code_inline: bool,
172    in_link: bool,
173    link_url: String,
174    link_text: String,
175    image_in_link: bool,
176    in_image: bool,
177    saved_link_url: String,
178}
179
180impl BlockParserState {
181    fn new(start_line: usize) -> Self {
182        Self {
183            current_line: start_line,
184            paragraph_buffer: String::new(),
185            inline_buffer: Vec::new(),
186            list_items: Vec::new(),
187            list_ordered: false,
188            list_depth: 0,
189            item_depth: 0,
190            task_list_marker: None,
191            saved_task_markers: Vec::new(),
192            item_blocks: Vec::new(),
193            code_buffer: String::new(),
194            code_language: None,
195            code_start_line: 0,
196            blockquote_buffer: String::new(),
197            table_headers: Vec::new(),
198            table_alignments: Vec::new(),
199            table_rows: Vec::new(),
200            current_row: Vec::new(),
201            heading_level: None,
202            heading_buffer: String::new(),
203            heading_inline: Vec::new(),
204            in_paragraph: false,
205            in_list: false,
206            in_code: false,
207            in_blockquote: false,
208            in_table: false,
209            in_heading: false,
210            in_strong: false,
211            in_emphasis: false,
212            in_strikethrough: false,
213            in_code_inline: false,
214            in_link: false,
215            link_url: String::new(),
216            link_text: String::new(),
217            image_in_link: false,
218            in_image: false,
219            saved_link_url: String::new(),
220        }
221    }
222
223    fn finalize(&mut self, blocks: &mut Vec<ContentBlock>) {
224        self.flush_paragraph(blocks);
225        self.flush_list(blocks);
226        self.flush_code(blocks);
227        self.flush_blockquote(blocks);
228        self.flush_table(blocks);
229    }
230
231    fn flush_paragraph(&mut self, blocks: &mut Vec<ContentBlock>) {
232        if self.in_paragraph && !self.paragraph_buffer.is_empty() {
233            blocks.push(ContentBlock::Paragraph {
234                content: self.paragraph_buffer.clone(),
235                inline: self.inline_buffer.clone(),
236            });
237            self.paragraph_buffer.clear();
238            self.inline_buffer.clear();
239            self.in_paragraph = false;
240        }
241    }
242
243    fn flush_list(&mut self, blocks: &mut Vec<ContentBlock>) {
244        if self.in_list && !self.list_items.is_empty() {
245            blocks.push(ContentBlock::List {
246                ordered: self.list_ordered,
247                items: self.list_items.clone(),
248            });
249            self.list_items.clear();
250            self.in_list = false;
251        }
252    }
253
254    fn flush_code(&mut self, blocks: &mut Vec<ContentBlock>) {
255        if self.in_code && !self.code_buffer.is_empty() {
256            blocks.push(ContentBlock::Code {
257                language: self.code_language.clone(),
258                content: self.code_buffer.trim_end().to_string(),
259                start_line: self.code_start_line,
260                end_line: self.current_line,
261            });
262            self.code_buffer.clear();
263            self.code_language = None;
264            self.in_code = false;
265        }
266    }
267
268    fn flush_blockquote(&mut self, blocks: &mut Vec<ContentBlock>) {
269        if self.in_blockquote && !self.blockquote_buffer.is_empty() {
270            let nested_blocks = parse_blocks(&self.blockquote_buffer);
271            blocks.push(ContentBlock::Blockquote {
272                content: self.blockquote_buffer.clone(),
273                blocks: nested_blocks,
274            });
275            self.blockquote_buffer.clear();
276            self.in_blockquote = false;
277        }
278    }
279
280    fn flush_table(&mut self, blocks: &mut Vec<ContentBlock>) {
281        if self.in_table && !self.table_headers.is_empty() {
282            blocks.push(ContentBlock::Table {
283                headers: self.table_headers.clone(),
284                alignments: self.table_alignments.clone(),
285                rows: self.table_rows.clone(),
286            });
287            self.table_headers.clear();
288            self.table_alignments.clear();
289            self.table_rows.clear();
290            self.current_row.clear();
291            self.paragraph_buffer.clear();
292            self.inline_buffer.clear();
293            self.in_table = false;
294        }
295    }
296
297    fn add_inline_text(&mut self, text: &str) {
298        if text.is_empty() {
299            return;
300        }
301
302        let element = if self.in_code_inline {
303            InlineElement::Code {
304                value: text.to_string(),
305            }
306        } else if self.in_strong {
307            InlineElement::Strong {
308                value: text.to_string(),
309            }
310        } else if self.in_emphasis {
311            InlineElement::Emphasis {
312                value: text.to_string(),
313            }
314        } else if self.in_strikethrough {
315            InlineElement::Strikethrough {
316                value: text.to_string(),
317            }
318        } else {
319            InlineElement::Text {
320                value: text.to_string(),
321            }
322        };
323
324        self.inline_buffer.push(element);
325        self.paragraph_buffer.push_str(text);
326    }
327}
328
329// ============================================================================
330// Event processing
331// ============================================================================
332
333#[allow(clippy::too_many_lines)]
334fn process_event(event: Event, state: &mut BlockParserState, blocks: &mut Vec<ContentBlock>) {
335    match event {
336        Event::Start(Tag::Paragraph) => {
337            state.in_paragraph = true;
338        }
339        Event::End(TagEnd::Paragraph) => {
340            if state.item_depth >= 1 && state.in_paragraph && !state.paragraph_buffer.is_empty() {
341                state.item_blocks.push(ContentBlock::Paragraph {
342                    content: state.paragraph_buffer.clone(),
343                    inline: state.inline_buffer.clone(),
344                });
345                state.paragraph_buffer.clear();
346                state.inline_buffer.clear();
347                state.in_paragraph = false;
348            } else {
349                state.flush_paragraph(blocks);
350            }
351        }
352        Event::Start(Tag::CodeBlock(kind)) => {
353            state.in_code = true;
354            state.code_start_line = state.current_line;
355            state.code_language = match kind {
356                CodeBlockKind::Fenced(lang) => {
357                    if lang.is_empty() {
358                        None
359                    } else {
360                        Some(lang.to_string())
361                    }
362                }
363                CodeBlockKind::Indented => None,
364            };
365        }
366        Event::End(TagEnd::CodeBlock) => {
367            if state.item_depth >= 1 && state.in_code && !state.code_buffer.is_empty() {
368                state.item_blocks.push(ContentBlock::Code {
369                    language: state.code_language.clone(),
370                    content: state.code_buffer.trim_end().to_string(),
371                    start_line: state.code_start_line,
372                    end_line: state.current_line,
373                });
374                state.code_buffer.clear();
375                state.code_language = None;
376                state.in_code = false;
377            } else {
378                state.flush_code(blocks);
379            }
380        }
381        Event::Start(Tag::List(start_number)) => {
382            state.list_depth += 1;
383            if state.list_depth == 1 {
384                state.in_list = true;
385                state.list_ordered = start_number.is_some();
386            }
387        }
388        Event::End(TagEnd::List(_)) => {
389            state.list_depth = state.list_depth.saturating_sub(1);
390            if state.list_depth == 0 {
391                state.flush_list(blocks);
392            }
393        }
394        Event::Start(Tag::Item) => {
395            state.item_depth += 1;
396            if state.item_depth > 1 {
397                state.saved_task_markers.push(state.task_list_marker);
398                state.task_list_marker = None;
399            }
400            if state.item_depth == 1 {
401                state.paragraph_buffer.clear();
402                state.inline_buffer.clear();
403                state.item_blocks.clear();
404            }
405        }
406        Event::End(TagEnd::Item) => {
407            if state.item_depth > 1
408                && let Some(saved) = state.saved_task_markers.pop()
409            {
410                state.task_list_marker = saved;
411            }
412            if state.item_depth == 1 {
413                let (content, inline, remaining_blocks) = if !state.paragraph_buffer.is_empty() {
414                    let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
415                    (
416                        state.paragraph_buffer.clone(),
417                        state.inline_buffer.clone(),
418                        all_blocks,
419                    )
420                } else if let Some(ContentBlock::Paragraph { content, inline }) =
421                    state.item_blocks.first().cloned()
422                {
423                    let remaining: Vec<ContentBlock> = state.item_blocks.drain(1..).collect();
424                    (content, inline, remaining)
425                } else {
426                    let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
427                    (String::new(), Vec::new(), all_blocks)
428                };
429
430                state.list_items.push(ListItem {
431                    checked: state.task_list_marker,
432                    content,
433                    inline,
434                    blocks: remaining_blocks,
435                });
436                state.paragraph_buffer.clear();
437                state.inline_buffer.clear();
438                state.item_blocks.clear();
439                state.task_list_marker = None;
440            }
441            state.item_depth = state.item_depth.saturating_sub(1);
442        }
443        Event::TaskListMarker(checked) => {
444            state.task_list_marker = Some(checked);
445        }
446        Event::Start(Tag::BlockQuote(_)) => {
447            state.in_blockquote = true;
448        }
449        Event::End(TagEnd::BlockQuote(_)) => {
450            state.flush_blockquote(blocks);
451        }
452        Event::Start(Tag::Table(alignments)) => {
453            state.in_table = true;
454            state.table_alignments = alignments
455                .iter()
456                .map(|a| match a {
457                    CmarkAlignment::Left => TableAlignment::Left,
458                    CmarkAlignment::Center => TableAlignment::Center,
459                    CmarkAlignment::Right => TableAlignment::Right,
460                    CmarkAlignment::None => TableAlignment::None,
461                })
462                .collect();
463        }
464        Event::End(TagEnd::Table) => {
465            state.flush_table(blocks);
466        }
467        Event::Start(Tag::TableHead) => {}
468        Event::End(TagEnd::TableHead) => {
469            state.table_headers = state.current_row.clone();
470            state.current_row.clear();
471        }
472        Event::Start(Tag::TableRow) => {}
473        Event::End(TagEnd::TableRow) => {
474            state.table_rows.push(state.current_row.clone());
475            state.current_row.clear();
476        }
477        Event::Start(Tag::TableCell) => {
478            state.paragraph_buffer.clear();
479            state.inline_buffer.clear();
480        }
481        Event::End(TagEnd::TableCell) => {
482            state.current_row.push(state.paragraph_buffer.clone());
483            state.paragraph_buffer.clear();
484            state.inline_buffer.clear();
485        }
486        Event::Start(Tag::Strong) => {
487            state.in_strong = true;
488        }
489        Event::End(TagEnd::Strong) => {
490            state.in_strong = false;
491        }
492        Event::Start(Tag::Emphasis) => {
493            state.in_emphasis = true;
494        }
495        Event::End(TagEnd::Emphasis) => {
496            state.in_emphasis = false;
497        }
498        Event::Start(Tag::Strikethrough) => {
499            state.in_strikethrough = true;
500        }
501        Event::End(TagEnd::Strikethrough) => {
502            state.in_strikethrough = false;
503        }
504        Event::Code(text) => {
505            state.in_code_inline = true;
506            state.add_inline_text(&text);
507            state.in_code_inline = false;
508        }
509        Event::Start(Tag::Link { dest_url, .. }) => {
510            state.in_link = true;
511            state.link_url = dest_url.to_string();
512            state.link_text.clear();
513        }
514        Event::End(TagEnd::Link) => {
515            state.in_link = false;
516
517            if state.image_in_link {
518                state.inline_buffer.push(InlineElement::Link {
519                    text: state.link_text.clone(),
520                    url: state.saved_link_url.clone(),
521                    title: None,
522                });
523                state
524                    .paragraph_buffer
525                    .push_str(&format!("[{}]({})", state.link_text, state.saved_link_url));
526            } else {
527                state.inline_buffer.push(InlineElement::Link {
528                    text: state.link_text.clone(),
529                    url: state.link_url.clone(),
530                    title: None,
531                });
532                state
533                    .paragraph_buffer
534                    .push_str(&format!("[{}]({})", state.link_text, state.link_url));
535            }
536
537            state.link_text.clear();
538            state.link_url.clear();
539            state.saved_link_url.clear();
540            state.image_in_link = false;
541        }
542        Event::Start(Tag::Image {
543            dest_url, title, ..
544        }) => {
545            if state.in_link {
546                state.image_in_link = true;
547                state.saved_link_url = state.link_url.clone();
548            }
549            state.in_image = true;
550            state.link_url = dest_url.to_string();
551            state.link_text.clear();
552            state.paragraph_buffer = title.to_string();
553        }
554        Event::End(TagEnd::Image) => {
555            state.in_image = false;
556
557            if !state.image_in_link {
558                // Capture title before we modify paragraph_buffer
559                let title = if state.paragraph_buffer.is_empty() {
560                    None
561                } else {
562                    Some(state.paragraph_buffer.clone())
563                };
564
565                if state.in_paragraph {
566                    // Reset paragraph_buffer for image representation
567                    state.paragraph_buffer.clear();
568                    state.inline_buffer.push(InlineElement::Image {
569                        alt: state.link_text.clone(),
570                        src: state.link_url.clone(),
571                        title,
572                    });
573                    // Add image placeholder to paragraph content
574                    state
575                        .paragraph_buffer
576                        .push_str(&format!("![{}]({})", state.link_text, state.link_url));
577                } else {
578                    state.flush_paragraph(blocks);
579                    blocks.push(ContentBlock::Image {
580                        alt: state.link_text.clone(),
581                        src: state.link_url.clone(),
582                        title,
583                    });
584                    state.paragraph_buffer.clear();
585                }
586
587                state.link_text.clear();
588                state.link_url.clear();
589            }
590        }
591        Event::Text(text) => {
592            if state.in_code {
593                state.code_buffer.push_str(&text);
594            } else if state.in_blockquote {
595                state.blockquote_buffer.push_str(&text);
596            } else if state.in_heading {
597                state.heading_buffer.push_str(&text);
598                let element = if state.in_code_inline {
599                    InlineElement::Code {
600                        value: text.to_string(),
601                    }
602                } else if state.in_strong {
603                    InlineElement::Strong {
604                        value: text.to_string(),
605                    }
606                } else if state.in_emphasis {
607                    InlineElement::Emphasis {
608                        value: text.to_string(),
609                    }
610                } else {
611                    InlineElement::Text {
612                        value: text.to_string(),
613                    }
614                };
615                state.heading_inline.push(element);
616            } else if state.in_link || state.in_image {
617                state.link_text.push_str(&text);
618            } else {
619                if state.in_list && state.item_depth > 1 {
620                    if !state.paragraph_buffer.is_empty() && !state.paragraph_buffer.ends_with('\n')
621                    {
622                        state.paragraph_buffer.push('\n');
623                    }
624                    let indent = "  ".repeat(state.item_depth - 1);
625                    state.paragraph_buffer.push_str(&indent);
626
627                    if let Some(checked) = state.task_list_marker {
628                        let marker = if checked { "[x] " } else { "[ ] " };
629                        state.paragraph_buffer.push_str(marker);
630                        state.task_list_marker = None;
631                    }
632                }
633                state.add_inline_text(&text);
634            }
635        }
636        Event::SoftBreak => {
637            if state.in_paragraph {
638                state.paragraph_buffer.push(' ');
639                state.inline_buffer.push(InlineElement::Text {
640                    value: " ".to_string(),
641                });
642            }
643        }
644        Event::HardBreak => {
645            if state.in_paragraph {
646                state.paragraph_buffer.push('\n');
647                state.inline_buffer.push(InlineElement::Text {
648                    value: "\n".to_string(),
649                });
650            }
651        }
652        Event::Rule => {
653            state.flush_paragraph(blocks);
654            blocks.push(ContentBlock::HorizontalRule);
655        }
656        Event::Start(Tag::Heading { level, .. }) => {
657            state.flush_paragraph(blocks);
658            state.in_heading = true;
659            state.heading_level = Some(level as usize);
660            state.heading_buffer.clear();
661            state.heading_inline.clear();
662        }
663        Event::End(TagEnd::Heading(_)) => {
664            if state.in_heading
665                && !state.heading_buffer.is_empty()
666                && let Some(level) = state.heading_level
667            {
668                let anchor = Some(slugify(&state.heading_buffer));
669                blocks.push(ContentBlock::Heading {
670                    level,
671                    content: state.heading_buffer.clone(),
672                    inline: state.heading_inline.clone(),
673                    anchor,
674                });
675            }
676            state.in_heading = false;
677            state.heading_level = None;
678            state.heading_buffer.clear();
679            state.heading_inline.clear();
680        }
681        _ => {}
682    }
683}
684
685// ============================================================================
686// Slug generation
687// ============================================================================
688
689/// Generate URL-friendly slug from heading text.
690pub fn slugify(text: &str) -> String {
691    text.to_lowercase()
692        .chars()
693        .map(|c| {
694            if c.is_alphanumeric() {
695                c
696            } else if c.is_whitespace() || c == '-' {
697                '-'
698            } else {
699                '\0'
700            }
701        })
702        .filter(|&c| c != '\0')
703        .collect::<String>()
704        .split('-')
705        .filter(|s| !s.is_empty())
706        .collect::<Vec<_>>()
707        .join("-")
708}
709
710// ============================================================================
711// Public API
712// ============================================================================
713
714/// Parse markdown content into structured blocks.
715///
716/// This is the main entry point for block-level parsing. It handles:
717/// - Wikilink preprocessing (converts [[x]] to markdown links)
718/// - HTML details block extraction
719/// - Full pulldown-cmark parsing with GFM extensions
720///
721/// # Example
722///
723/// ```
724/// use turbovault_parser::parse_blocks;
725/// use turbovault_core::ContentBlock;
726///
727/// let markdown = "# Hello World\n\nThis is a **paragraph** with *inline* formatting.";
728///
729/// let blocks = parse_blocks(markdown);
730/// assert!(matches!(blocks[0], ContentBlock::Heading { level: 1, .. }));
731/// ```
732pub fn parse_blocks(markdown: &str) -> Vec<ContentBlock> {
733    parse_blocks_from_line(markdown, 0)
734}
735
736/// Parse markdown content into structured blocks, starting from a specific line.
737///
738/// Use this when you need accurate line numbers for nested content.
739pub fn parse_blocks_from_line(markdown: &str, start_line: usize) -> Vec<ContentBlock> {
740    // Pre-process wikilinks
741    let preprocessed = preprocess_wikilinks(markdown);
742
743    // Pre-process links with spaces
744    let preprocessed = preprocess_links_with_spaces(&preprocessed);
745
746    // Extract details blocks
747    let (processed_markdown, details_blocks) = extract_details_blocks(&preprocessed);
748
749    // Enable GFM extensions
750    let mut options = Options::empty();
751    options.insert(Options::ENABLE_TABLES);
752    options.insert(Options::ENABLE_STRIKETHROUGH);
753    options.insert(Options::ENABLE_TASKLISTS);
754
755    let parser = Parser::new_ext(&processed_markdown, options);
756    let mut blocks = Vec::new();
757    let mut state = BlockParserState::new(start_line);
758
759    for event in parser {
760        process_event(event, &mut state, &mut blocks);
761    }
762
763    state.finalize(&mut blocks);
764
765    // Replace placeholders with actual Details blocks
766    let mut final_blocks = Vec::new();
767    for block in blocks {
768        let replaced = if let ContentBlock::Paragraph { content, .. } = &block {
769            let trimmed = content.trim();
770            trimmed
771                .strip_prefix("[DETAILS_BLOCK_")
772                .and_then(|s| s.strip_suffix(']'))
773                .and_then(|s| s.parse::<usize>().ok())
774                .and_then(|idx| details_blocks.get(idx).cloned())
775        } else {
776            None
777        };
778
779        final_blocks.push(replaced.unwrap_or(block));
780    }
781
782    final_blocks
783}
784
785// ============================================================================
786// Tests
787// ============================================================================
788
789#[cfg(test)]
790mod tests {
791    use super::*;
792
793    #[test]
794    fn test_parse_paragraph() {
795        let markdown = "This is a simple paragraph.";
796        let blocks = parse_blocks(markdown);
797
798        assert_eq!(blocks.len(), 1);
799        assert!(matches!(blocks[0], ContentBlock::Paragraph { .. }));
800        if let ContentBlock::Paragraph { content, .. } = &blocks[0] {
801            assert_eq!(content, "This is a simple paragraph.");
802        }
803    }
804
805    #[test]
806    fn test_parse_heading() {
807        let markdown = "# Hello World";
808        let blocks = parse_blocks(markdown);
809
810        assert_eq!(blocks.len(), 1);
811        if let ContentBlock::Heading {
812            level,
813            content,
814            anchor,
815            ..
816        } = &blocks[0]
817        {
818            assert_eq!(*level, 1);
819            assert_eq!(content, "Hello World");
820            assert_eq!(anchor.as_deref(), Some("hello-world"));
821        } else {
822            panic!("Expected Heading block");
823        }
824    }
825
826    #[test]
827    fn test_parse_code_block() {
828        let markdown = "```rust\nfn main() {}\n```";
829        let blocks = parse_blocks(markdown);
830
831        assert_eq!(blocks.len(), 1);
832        if let ContentBlock::Code {
833            language, content, ..
834        } = &blocks[0]
835        {
836            assert_eq!(language.as_deref(), Some("rust"));
837            assert_eq!(content, "fn main() {}");
838        } else {
839            panic!("Expected Code block");
840        }
841    }
842
843    #[test]
844    fn test_parse_unordered_list() {
845        let markdown = "- Item 1\n- Item 2\n- Item 3";
846        let blocks = parse_blocks(markdown);
847
848        assert_eq!(blocks.len(), 1);
849        if let ContentBlock::List { ordered, items } = &blocks[0] {
850            assert!(!ordered);
851            assert_eq!(items.len(), 3);
852            assert_eq!(items[0].content, "Item 1");
853            assert_eq!(items[1].content, "Item 2");
854            assert_eq!(items[2].content, "Item 3");
855        } else {
856            panic!("Expected List block");
857        }
858    }
859
860    #[test]
861    fn test_parse_ordered_list() {
862        let markdown = "1. First\n2. Second\n3. Third";
863        let blocks = parse_blocks(markdown);
864
865        assert_eq!(blocks.len(), 1);
866        if let ContentBlock::List { ordered, items } = &blocks[0] {
867            assert!(ordered);
868            assert_eq!(items.len(), 3);
869        } else {
870            panic!("Expected List block");
871        }
872    }
873
874    #[test]
875    fn test_parse_task_list() {
876        let markdown = "- [ ] Todo\n- [x] Done";
877        let blocks = parse_blocks(markdown);
878
879        assert_eq!(blocks.len(), 1);
880        if let ContentBlock::List { items, .. } = &blocks[0] {
881            assert_eq!(items.len(), 2);
882            assert_eq!(items[0].checked, Some(false));
883            assert_eq!(items[0].content, "Todo");
884            assert_eq!(items[1].checked, Some(true));
885            assert_eq!(items[1].content, "Done");
886        } else {
887            panic!("Expected List block");
888        }
889    }
890
891    #[test]
892    fn test_parse_table() {
893        let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
894        let blocks = parse_blocks(markdown);
895
896        assert_eq!(blocks.len(), 1);
897        if let ContentBlock::Table { headers, rows, .. } = &blocks[0] {
898            assert_eq!(headers.len(), 2);
899            assert_eq!(headers[0], "A");
900            assert_eq!(headers[1], "B");
901            assert_eq!(rows.len(), 1);
902            assert_eq!(rows[0][0], "1");
903            assert_eq!(rows[0][1], "2");
904        } else {
905            panic!("Expected Table block");
906        }
907    }
908
909    #[test]
910    fn test_parse_blockquote() {
911        let markdown = "> This is a quote";
912        let blocks = parse_blocks(markdown);
913
914        assert_eq!(blocks.len(), 1);
915        if let ContentBlock::Blockquote { content, .. } = &blocks[0] {
916            assert!(content.contains("This is a quote"));
917        } else {
918            panic!("Expected Blockquote block");
919        }
920    }
921
922    #[test]
923    fn test_parse_horizontal_rule() {
924        let markdown = "Before\n\n---\n\nAfter";
925        let blocks = parse_blocks(markdown);
926
927        assert_eq!(blocks.len(), 3);
928        assert!(matches!(blocks[1], ContentBlock::HorizontalRule));
929    }
930
931    #[test]
932    fn test_parse_inline_formatting() {
933        let markdown = "This has **bold** and *italic* and `code`.";
934        let blocks = parse_blocks(markdown);
935
936        assert_eq!(blocks.len(), 1);
937        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
938            assert!(
939                inline
940                    .iter()
941                    .any(|e| matches!(e, InlineElement::Strong { .. }))
942            );
943            assert!(
944                inline
945                    .iter()
946                    .any(|e| matches!(e, InlineElement::Emphasis { .. }))
947            );
948            assert!(
949                inline
950                    .iter()
951                    .any(|e| matches!(e, InlineElement::Code { .. }))
952            );
953        } else {
954            panic!("Expected Paragraph block");
955        }
956    }
957
958    #[test]
959    fn test_parse_link() {
960        let markdown = "See [example](https://example.com) for more.";
961        let blocks = parse_blocks(markdown);
962
963        assert_eq!(blocks.len(), 1);
964        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
965            let link = inline
966                .iter()
967                .find(|e| matches!(e, InlineElement::Link { .. }));
968            assert!(link.is_some());
969            if let Some(InlineElement::Link { text, url, .. }) = link {
970                assert_eq!(text, "example");
971                assert_eq!(url, "https://example.com");
972            }
973        } else {
974            panic!("Expected Paragraph block");
975        }
976    }
977
978    #[test]
979    fn test_wikilink_preprocessing() {
980        let markdown = "See [[Note]] and [[Other|display]] for info.";
981        let blocks = parse_blocks(markdown);
982
983        assert_eq!(blocks.len(), 1);
984        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
985            let links: Vec<_> = inline
986                .iter()
987                .filter(|e| matches!(e, InlineElement::Link { .. }))
988                .collect();
989            assert_eq!(links.len(), 2);
990
991            if let InlineElement::Link { text, url, .. } = &links[0] {
992                assert_eq!(text, "Note");
993                assert_eq!(url, "wikilink:Note");
994            }
995            if let InlineElement::Link { text, url, .. } = &links[1] {
996                assert_eq!(text, "display");
997                assert_eq!(url, "wikilink:Other");
998            }
999        } else {
1000            panic!("Expected Paragraph block");
1001        }
1002    }
1003
1004    #[test]
1005    fn test_list_with_nested_code() {
1006        let markdown = r#"1. First item
1007   ```rust
1008   code here
1009   ```
1010
10112. Second item"#;
1012
1013        let blocks = parse_blocks(markdown);
1014
1015        assert_eq!(blocks.len(), 1);
1016        if let ContentBlock::List { items, .. } = &blocks[0] {
1017            assert_eq!(items.len(), 2);
1018            assert!(!items[0].blocks.is_empty());
1019            assert!(matches!(items[0].blocks[0], ContentBlock::Code { .. }));
1020        } else {
1021            panic!("Expected List block");
1022        }
1023    }
1024
1025    #[test]
1026    fn test_parse_image() {
1027        // Standalone image is wrapped in paragraph by pulldown-cmark
1028        let markdown = "![Alt text](image.png)";
1029        let blocks = parse_blocks(markdown);
1030
1031        // pulldown-cmark wraps standalone images in paragraphs
1032        assert_eq!(blocks.len(), 1);
1033        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1034            let img = inline
1035                .iter()
1036                .find(|e| matches!(e, InlineElement::Image { .. }));
1037            assert!(img.is_some(), "Should have inline image");
1038        } else {
1039            panic!("Expected Paragraph block with inline image");
1040        }
1041    }
1042
1043    #[test]
1044    fn test_parse_block_image() {
1045        // Image following other content becomes a block image
1046        let markdown = "Some text\n\n![Alt](image.png)";
1047        let blocks = parse_blocks(markdown);
1048
1049        // First paragraph, then image (inline or block)
1050        assert!(blocks.len() >= 2);
1051    }
1052
1053    #[test]
1054    fn test_parse_details_block() {
1055        let markdown = r#"<details>
1056<summary>Click to expand</summary>
1057
1058Inner content here.
1059
1060</details>"#;
1061
1062        let blocks = parse_blocks(markdown);
1063
1064        assert_eq!(blocks.len(), 1);
1065        if let ContentBlock::Details {
1066            summary,
1067            blocks: inner,
1068            ..
1069        } = &blocks[0]
1070        {
1071            assert_eq!(summary, "Click to expand");
1072            assert!(!inner.is_empty());
1073        } else {
1074            panic!("Expected Details block");
1075        }
1076    }
1077
1078    #[test]
1079    fn test_slugify() {
1080        assert_eq!(slugify("Hello World"), "hello-world");
1081        assert_eq!(slugify("API Reference"), "api-reference");
1082        assert_eq!(slugify("1. Getting Started"), "1-getting-started");
1083        assert_eq!(slugify("What's New?"), "whats-new");
1084    }
1085
1086    #[test]
1087    fn test_strikethrough() {
1088        let markdown = "This is ~~deleted~~ text.";
1089        let blocks = parse_blocks(markdown);
1090
1091        assert_eq!(blocks.len(), 1);
1092        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1093            assert!(
1094                inline
1095                    .iter()
1096                    .any(|e| matches!(e, InlineElement::Strikethrough { .. }))
1097            );
1098        }
1099    }
1100}