Skip to main content

turbovault_parser/
blocks.rs

1//! Block-level content parsing for markdown documents.
2//!
3//! This module provides full block-level parsing using pulldown-cmark,
4//! producing a structured representation of markdown content including:
5//! - Paragraphs, headings, code blocks
6//! - Lists (ordered, unordered, task lists)
7//! - Tables, blockquotes, images
8//! - HTML details blocks
9//!
10//! The parser handles inline formatting within blocks, producing
11//! `InlineElement` vectors for text content.
12
13use pulldown_cmark::{
14    Alignment as CmarkAlignment, CodeBlockKind, Event, Options, Parser, Tag, TagEnd,
15};
16use regex::Regex;
17use std::sync::LazyLock;
18use turbovault_core::{ContentBlock, InlineElement, ListItem, TableAlignment};
19
20// ============================================================================
21// Wikilink preprocessing (converts [[x]] to [x](wikilink:x) for pulldown-cmark)
22// ============================================================================
23
24/// Regex for wikilinks: [[target]] or [[target|alias]]
25static WIKILINK_RE: LazyLock<Regex> =
26    LazyLock::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap());
27
28/// Preprocess wikilinks to standard markdown links with wikilink: prefix.
29/// This allows pulldown-cmark to parse them as regular links.
30fn preprocess_wikilinks(markdown: &str) -> String {
31    WIKILINK_RE
32        .replace_all(markdown, |caps: &regex::Captures| {
33            let target = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("");
34            let alias = caps.get(2).map(|m| m.as_str().trim());
35            let display_text = alias.unwrap_or(target);
36            format!("[{}](wikilink:{})", display_text, target)
37        })
38        .to_string()
39}
40
41/// Regex for links with spaces in URL (not valid CommonMark but common in wikis)
42static LINK_WITH_SPACES_RE: LazyLock<Regex> =
43    LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)<>]+\s[^)<>]*)\)").unwrap());
44
45/// Preprocess links with spaces to angle bracket syntax.
46fn preprocess_links_with_spaces(markdown: &str) -> String {
47    LINK_WITH_SPACES_RE
48        .replace_all(markdown, |caps: &regex::Captures| {
49            let text = &caps[1];
50            let url = &caps[2];
51            if url.contains(' ') {
52                format!("[{}](<{}>)", text, url)
53            } else {
54                caps[0].to_string()
55            }
56        })
57        .to_string()
58}
59
60// ============================================================================
61// Details block extraction (HTML <details><summary>)
62// ============================================================================
63
64/// Extract HTML <details> blocks and replace with placeholders.
65fn extract_details_blocks(markdown: &str) -> (String, Vec<ContentBlock>) {
66    let mut details_blocks = Vec::new();
67    let mut result = String::new();
68    let mut current_pos = 0;
69
70    while current_pos < markdown.len() {
71        if markdown[current_pos..].starts_with("<details")
72            && let Some(tag_end) = markdown[current_pos..].find('>')
73            && let details_start = current_pos + tag_end + 1
74            && let Some(details_end_pos) = markdown[details_start..].find("</details>")
75        {
76            let details_end = details_start + details_end_pos;
77            let details_content = &markdown[details_start..details_end];
78
79            // Extract summary
80            let summary = extract_summary(details_content);
81
82            // Extract content after </summary>
83            let content_start = if let Some(summary_end_pos) = details_content.find("</summary>") {
84                let summary_tag_end = summary_end_pos + "</summary>".len();
85                &details_content[summary_tag_end..]
86            } else {
87                details_content
88            };
89
90            let content_trimmed = content_start.trim();
91
92            // Parse nested content
93            let nested_blocks = if !content_trimmed.is_empty() {
94                parse_blocks(content_trimmed)
95            } else {
96                Vec::new()
97            };
98
99            details_blocks.push(ContentBlock::Details {
100                summary,
101                content: content_trimmed.to_string(),
102                blocks: nested_blocks,
103            });
104
105            result.push_str(&format!("\n[DETAILS_BLOCK_{}]\n", details_blocks.len() - 1));
106            current_pos = details_end + "</details>".len();
107            continue;
108        }
109
110        if let Some(ch) = markdown[current_pos..].chars().next() {
111            result.push(ch);
112            current_pos += ch.len_utf8();
113        } else {
114            break;
115        }
116    }
117
118    (result, details_blocks)
119}
120
121/// Extract summary text from details content.
122fn extract_summary(details_content: &str) -> String {
123    if let Some(summary_start_pos) = details_content.find("<summary")
124        && let Some(summary_tag_end) = details_content[summary_start_pos..].find('>')
125        && let summary_content_start = summary_start_pos + summary_tag_end + 1
126        && let Some(summary_end_pos) = details_content[summary_content_start..].find("</summary>")
127    {
128        let summary_end = summary_content_start + summary_end_pos;
129        return details_content[summary_content_start..summary_end]
130            .trim()
131            .to_string();
132    }
133    String::new()
134}
135
136// ============================================================================
137// Parser state machine
138// ============================================================================
139
140struct BlockParserState {
141    current_line: usize,
142    paragraph_buffer: String,
143    inline_buffer: Vec<InlineElement>,
144    list_items: Vec<ListItem>,
145    list_ordered: bool,
146    list_depth: usize,
147    item_depth: usize,
148    task_list_marker: Option<bool>,
149    saved_task_markers: Vec<Option<bool>>,
150    item_blocks: Vec<ContentBlock>,
151    code_buffer: String,
152    code_language: Option<String>,
153    code_start_line: usize,
154    blockquote_buffer: String,
155    table_headers: Vec<String>,
156    table_alignments: Vec<TableAlignment>,
157    table_rows: Vec<Vec<String>>,
158    current_row: Vec<String>,
159    heading_level: Option<usize>,
160    heading_buffer: String,
161    heading_inline: Vec<InlineElement>,
162    in_paragraph: bool,
163    in_list: bool,
164    in_code: bool,
165    in_blockquote: bool,
166    in_table: bool,
167    in_heading: bool,
168    in_strong: bool,
169    in_emphasis: bool,
170    in_strikethrough: bool,
171    in_code_inline: bool,
172    in_link: bool,
173    link_url: String,
174    link_text: String,
175    image_in_link: bool,
176    in_image: bool,
177    saved_link_url: String,
178    /// Tracks relative line offset within current list item (for nested items)
179    nested_line_offset: usize,
180}
181
182impl BlockParserState {
183    fn new(start_line: usize) -> Self {
184        Self {
185            current_line: start_line,
186            paragraph_buffer: String::new(),
187            inline_buffer: Vec::new(),
188            list_items: Vec::new(),
189            list_ordered: false,
190            list_depth: 0,
191            item_depth: 0,
192            task_list_marker: None,
193            saved_task_markers: Vec::new(),
194            item_blocks: Vec::new(),
195            code_buffer: String::new(),
196            code_language: None,
197            code_start_line: 0,
198            blockquote_buffer: String::new(),
199            table_headers: Vec::new(),
200            table_alignments: Vec::new(),
201            table_rows: Vec::new(),
202            current_row: Vec::new(),
203            heading_level: None,
204            heading_buffer: String::new(),
205            heading_inline: Vec::new(),
206            in_paragraph: false,
207            in_list: false,
208            in_code: false,
209            in_blockquote: false,
210            in_table: false,
211            in_heading: false,
212            in_strong: false,
213            in_emphasis: false,
214            in_strikethrough: false,
215            in_code_inline: false,
216            in_link: false,
217            link_url: String::new(),
218            link_text: String::new(),
219            image_in_link: false,
220            in_image: false,
221            saved_link_url: String::new(),
222            nested_line_offset: 0,
223        }
224    }
225
226    fn finalize(&mut self, blocks: &mut Vec<ContentBlock>) {
227        self.flush_paragraph(blocks);
228        self.flush_list(blocks);
229        self.flush_code(blocks);
230        self.flush_blockquote(blocks);
231        self.flush_table(blocks);
232    }
233
234    fn flush_paragraph(&mut self, blocks: &mut Vec<ContentBlock>) {
235        if self.in_paragraph && !self.paragraph_buffer.is_empty() {
236            blocks.push(ContentBlock::Paragraph {
237                content: self.paragraph_buffer.clone(),
238                inline: self.inline_buffer.clone(),
239            });
240            self.paragraph_buffer.clear();
241            self.inline_buffer.clear();
242            self.in_paragraph = false;
243        }
244    }
245
246    fn flush_list(&mut self, blocks: &mut Vec<ContentBlock>) {
247        if self.in_list && !self.list_items.is_empty() {
248            blocks.push(ContentBlock::List {
249                ordered: self.list_ordered,
250                items: self.list_items.clone(),
251            });
252            self.list_items.clear();
253            self.in_list = false;
254        }
255    }
256
257    fn flush_code(&mut self, blocks: &mut Vec<ContentBlock>) {
258        if self.in_code && !self.code_buffer.is_empty() {
259            blocks.push(ContentBlock::Code {
260                language: self.code_language.clone(),
261                content: self.code_buffer.trim_end().to_string(),
262                start_line: self.code_start_line,
263                end_line: self.current_line,
264            });
265            self.code_buffer.clear();
266            self.code_language = None;
267            self.in_code = false;
268        }
269    }
270
271    fn flush_blockquote(&mut self, blocks: &mut Vec<ContentBlock>) {
272        if self.in_blockquote && !self.blockquote_buffer.is_empty() {
273            let nested_blocks = parse_blocks(&self.blockquote_buffer);
274            blocks.push(ContentBlock::Blockquote {
275                content: self.blockquote_buffer.clone(),
276                blocks: nested_blocks,
277            });
278            self.blockquote_buffer.clear();
279            self.in_blockquote = false;
280        }
281    }
282
283    fn flush_table(&mut self, blocks: &mut Vec<ContentBlock>) {
284        if self.in_table && !self.table_headers.is_empty() {
285            blocks.push(ContentBlock::Table {
286                headers: self.table_headers.clone(),
287                alignments: self.table_alignments.clone(),
288                rows: self.table_rows.clone(),
289            });
290            self.table_headers.clear();
291            self.table_alignments.clear();
292            self.table_rows.clear();
293            self.current_row.clear();
294            self.paragraph_buffer.clear();
295            self.inline_buffer.clear();
296            self.in_table = false;
297        }
298    }
299
300    fn add_inline_text(&mut self, text: &str) {
301        if text.is_empty() {
302            return;
303        }
304
305        let element = if self.in_code_inline {
306            InlineElement::Code {
307                value: text.to_string(),
308            }
309        } else if self.in_strong {
310            InlineElement::Strong {
311                value: text.to_string(),
312            }
313        } else if self.in_emphasis {
314            InlineElement::Emphasis {
315                value: text.to_string(),
316            }
317        } else if self.in_strikethrough {
318            InlineElement::Strikethrough {
319                value: text.to_string(),
320            }
321        } else {
322            InlineElement::Text {
323                value: text.to_string(),
324            }
325        };
326
327        self.inline_buffer.push(element);
328        self.paragraph_buffer.push_str(text);
329    }
330}
331
332// ============================================================================
333// Event processing
334// ============================================================================
335
336#[allow(clippy::too_many_lines)]
337fn process_event(event: Event, state: &mut BlockParserState, blocks: &mut Vec<ContentBlock>) {
338    match event {
339        Event::Start(Tag::Paragraph) => {
340            state.in_paragraph = true;
341        }
342        Event::End(TagEnd::Paragraph) => {
343            if state.item_depth >= 1 && state.in_paragraph && !state.paragraph_buffer.is_empty() {
344                state.item_blocks.push(ContentBlock::Paragraph {
345                    content: state.paragraph_buffer.clone(),
346                    inline: state.inline_buffer.clone(),
347                });
348                state.paragraph_buffer.clear();
349                state.inline_buffer.clear();
350                state.in_paragraph = false;
351            } else {
352                state.flush_paragraph(blocks);
353            }
354        }
355        Event::Start(Tag::CodeBlock(kind)) => {
356            state.in_code = true;
357            state.code_start_line = state.current_line;
358            state.code_language = match kind {
359                CodeBlockKind::Fenced(lang) => {
360                    if lang.is_empty() {
361                        None
362                    } else {
363                        Some(lang.to_string())
364                    }
365                }
366                CodeBlockKind::Indented => None,
367            };
368        }
369        Event::End(TagEnd::CodeBlock) => {
370            if state.item_depth >= 1 && state.in_code && !state.code_buffer.is_empty() {
371                state.item_blocks.push(ContentBlock::Code {
372                    language: state.code_language.clone(),
373                    content: state.code_buffer.trim_end().to_string(),
374                    start_line: state.code_start_line,
375                    end_line: state.current_line,
376                });
377                state.code_buffer.clear();
378                state.code_language = None;
379                state.in_code = false;
380            } else {
381                state.flush_code(blocks);
382            }
383        }
384        Event::Start(Tag::List(start_number)) => {
385            state.list_depth += 1;
386            if state.list_depth == 1 {
387                state.in_list = true;
388                state.list_ordered = start_number.is_some();
389            }
390        }
391        Event::End(TagEnd::List(_)) => {
392            state.list_depth = state.list_depth.saturating_sub(1);
393            if state.list_depth == 0 {
394                state.flush_list(blocks);
395            }
396        }
397        Event::Start(Tag::Item) => {
398            state.item_depth += 1;
399            if state.item_depth > 1 {
400                state.saved_task_markers.push(state.task_list_marker);
401                state.task_list_marker = None;
402            }
403            if state.item_depth == 1 {
404                state.paragraph_buffer.clear();
405                state.inline_buffer.clear();
406                state.item_blocks.clear();
407                state.nested_line_offset = 0;
408            }
409        }
410        Event::End(TagEnd::Item) => {
411            if state.item_depth > 1
412                && let Some(saved) = state.saved_task_markers.pop()
413            {
414                state.task_list_marker = saved;
415            }
416            if state.item_depth == 1 {
417                let (content, mut inline, remaining_blocks) = if !state.paragraph_buffer.is_empty()
418                {
419                    let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
420                    (
421                        state.paragraph_buffer.clone(),
422                        state.inline_buffer.clone(),
423                        all_blocks,
424                    )
425                } else if let Some(ContentBlock::Paragraph { content, inline }) =
426                    state.item_blocks.first().cloned()
427                {
428                    let remaining: Vec<ContentBlock> = state.item_blocks.drain(1..).collect();
429                    (content, inline, remaining)
430                } else {
431                    let all_blocks: Vec<ContentBlock> = state.item_blocks.drain(..).collect();
432                    (String::new(), Vec::new(), all_blocks)
433                };
434
435                // Collect inline elements from all nested blocks (paragraphs, lists, etc.)
436                collect_inline_elements(&remaining_blocks, &mut inline);
437
438                state.list_items.push(ListItem {
439                    checked: state.task_list_marker,
440                    content,
441                    inline,
442                    blocks: remaining_blocks,
443                });
444                state.paragraph_buffer.clear();
445                state.inline_buffer.clear();
446                state.item_blocks.clear();
447                state.task_list_marker = None;
448            }
449            state.item_depth = state.item_depth.saturating_sub(1);
450        }
451        Event::TaskListMarker(checked) => {
452            state.task_list_marker = Some(checked);
453        }
454        Event::Start(Tag::BlockQuote(_)) => {
455            state.in_blockquote = true;
456        }
457        Event::End(TagEnd::BlockQuote(_)) => {
458            state.flush_blockquote(blocks);
459        }
460        Event::Start(Tag::Table(alignments)) => {
461            state.in_table = true;
462            state.table_alignments = alignments
463                .iter()
464                .map(|a| match a {
465                    CmarkAlignment::Left => TableAlignment::Left,
466                    CmarkAlignment::Center => TableAlignment::Center,
467                    CmarkAlignment::Right => TableAlignment::Right,
468                    CmarkAlignment::None => TableAlignment::None,
469                })
470                .collect();
471        }
472        Event::End(TagEnd::Table) => {
473            state.flush_table(blocks);
474        }
475        Event::Start(Tag::TableHead) => {}
476        Event::End(TagEnd::TableHead) => {
477            state.table_headers = state.current_row.clone();
478            state.current_row.clear();
479        }
480        Event::Start(Tag::TableRow) => {}
481        Event::End(TagEnd::TableRow) => {
482            state.table_rows.push(state.current_row.clone());
483            state.current_row.clear();
484        }
485        Event::Start(Tag::TableCell) => {
486            state.paragraph_buffer.clear();
487            state.inline_buffer.clear();
488        }
489        Event::End(TagEnd::TableCell) => {
490            state.current_row.push(state.paragraph_buffer.clone());
491            state.paragraph_buffer.clear();
492            state.inline_buffer.clear();
493        }
494        Event::Start(Tag::Strong) => {
495            state.in_strong = true;
496        }
497        Event::End(TagEnd::Strong) => {
498            state.in_strong = false;
499        }
500        Event::Start(Tag::Emphasis) => {
501            state.in_emphasis = true;
502        }
503        Event::End(TagEnd::Emphasis) => {
504            state.in_emphasis = false;
505        }
506        Event::Start(Tag::Strikethrough) => {
507            state.in_strikethrough = true;
508        }
509        Event::End(TagEnd::Strikethrough) => {
510            state.in_strikethrough = false;
511        }
512        Event::Code(text) => {
513            state.in_code_inline = true;
514            state.add_inline_text(&text);
515            state.in_code_inline = false;
516        }
517        Event::Start(Tag::Link { dest_url, .. }) => {
518            // For nested list items, add newline and indent before the link
519            // (same logic as in Event::Text for nested items)
520            if state.in_list && state.item_depth > 1 {
521                if !state.paragraph_buffer.is_empty() && !state.paragraph_buffer.ends_with('\n') {
522                    state.paragraph_buffer.push('\n');
523                    state.nested_line_offset += 1;
524                }
525                let indent = "  ".repeat(state.item_depth - 1);
526                state.paragraph_buffer.push_str(&indent);
527
528                if let Some(checked) = state.task_list_marker {
529                    let marker = if checked { "[x] " } else { "[ ] " };
530                    state.paragraph_buffer.push_str(marker);
531                    state.task_list_marker = None;
532                }
533            }
534            state.in_link = true;
535            state.link_url = dest_url.to_string();
536            state.link_text.clear();
537        }
538        Event::End(TagEnd::Link) => {
539            state.in_link = false;
540
541            // Capture line_offset for nested list items
542            let line_offset = if state.in_list && state.item_depth >= 1 {
543                Some(state.nested_line_offset)
544            } else {
545                None
546            };
547
548            if state.image_in_link {
549                state.inline_buffer.push(InlineElement::Link {
550                    text: state.link_text.clone(),
551                    url: state.saved_link_url.clone(),
552                    title: None,
553                    line_offset,
554                });
555                state
556                    .paragraph_buffer
557                    .push_str(&format!("[{}]({})", state.link_text, state.saved_link_url));
558            } else {
559                state.inline_buffer.push(InlineElement::Link {
560                    text: state.link_text.clone(),
561                    url: state.link_url.clone(),
562                    title: None,
563                    line_offset,
564                });
565                state
566                    .paragraph_buffer
567                    .push_str(&format!("[{}]({})", state.link_text, state.link_url));
568            }
569
570            state.link_text.clear();
571            state.link_url.clear();
572            state.saved_link_url.clear();
573            state.image_in_link = false;
574        }
575        Event::Start(Tag::Image {
576            dest_url, title, ..
577        }) => {
578            if state.in_link {
579                state.image_in_link = true;
580                state.saved_link_url = state.link_url.clone();
581            }
582            state.in_image = true;
583            state.link_url = dest_url.to_string();
584            state.link_text.clear();
585            state.paragraph_buffer = title.to_string();
586        }
587        Event::End(TagEnd::Image) => {
588            state.in_image = false;
589
590            if !state.image_in_link {
591                // Capture title before we modify paragraph_buffer
592                let title = if state.paragraph_buffer.is_empty() {
593                    None
594                } else {
595                    Some(state.paragraph_buffer.clone())
596                };
597
598                // Capture line_offset for inline images in list items
599                let line_offset = if state.in_list && state.item_depth >= 1 {
600                    Some(state.nested_line_offset)
601                } else {
602                    None
603                };
604
605                if state.in_paragraph {
606                    // Reset paragraph_buffer for image representation
607                    state.paragraph_buffer.clear();
608                    state.inline_buffer.push(InlineElement::Image {
609                        alt: state.link_text.clone(),
610                        src: state.link_url.clone(),
611                        title,
612                        line_offset,
613                    });
614                    // Add image placeholder to paragraph content
615                    state
616                        .paragraph_buffer
617                        .push_str(&format!("![{}]({})", state.link_text, state.link_url));
618                } else {
619                    state.flush_paragraph(blocks);
620                    blocks.push(ContentBlock::Image {
621                        alt: state.link_text.clone(),
622                        src: state.link_url.clone(),
623                        title,
624                    });
625                    state.paragraph_buffer.clear();
626                }
627
628                state.link_text.clear();
629                state.link_url.clear();
630            }
631        }
632        Event::Text(text) => {
633            if state.in_code {
634                state.code_buffer.push_str(&text);
635            } else if state.in_blockquote {
636                state.blockquote_buffer.push_str(&text);
637            } else if state.in_heading {
638                state.heading_buffer.push_str(&text);
639                let element = if state.in_code_inline {
640                    InlineElement::Code {
641                        value: text.to_string(),
642                    }
643                } else if state.in_strong {
644                    InlineElement::Strong {
645                        value: text.to_string(),
646                    }
647                } else if state.in_emphasis {
648                    InlineElement::Emphasis {
649                        value: text.to_string(),
650                    }
651                } else {
652                    InlineElement::Text {
653                        value: text.to_string(),
654                    }
655                };
656                state.heading_inline.push(element);
657            } else if state.in_link || state.in_image {
658                state.link_text.push_str(&text);
659            } else {
660                if state.in_list && state.item_depth > 1 {
661                    if !state.paragraph_buffer.is_empty() && !state.paragraph_buffer.ends_with('\n')
662                    {
663                        state.paragraph_buffer.push('\n');
664                    }
665                    let indent = "  ".repeat(state.item_depth - 1);
666                    state.paragraph_buffer.push_str(&indent);
667
668                    if let Some(checked) = state.task_list_marker {
669                        let marker = if checked { "[x] " } else { "[ ] " };
670                        state.paragraph_buffer.push_str(marker);
671                        state.task_list_marker = None;
672                    }
673                }
674                state.add_inline_text(&text);
675            }
676        }
677        Event::SoftBreak => {
678            if state.in_paragraph {
679                state.paragraph_buffer.push(' ');
680                state.inline_buffer.push(InlineElement::Text {
681                    value: " ".to_string(),
682                });
683            }
684        }
685        Event::HardBreak => {
686            if state.in_paragraph {
687                state.paragraph_buffer.push('\n');
688                state.inline_buffer.push(InlineElement::Text {
689                    value: "\n".to_string(),
690                });
691            }
692        }
693        Event::Rule => {
694            state.flush_paragraph(blocks);
695            blocks.push(ContentBlock::HorizontalRule);
696        }
697        Event::Start(Tag::Heading { level, .. }) => {
698            state.flush_paragraph(blocks);
699            state.in_heading = true;
700            state.heading_level = Some(level as usize);
701            state.heading_buffer.clear();
702            state.heading_inline.clear();
703        }
704        Event::End(TagEnd::Heading(_)) => {
705            if state.in_heading
706                && !state.heading_buffer.is_empty()
707                && let Some(level) = state.heading_level
708            {
709                let anchor = Some(slugify(&state.heading_buffer));
710                blocks.push(ContentBlock::Heading {
711                    level,
712                    content: state.heading_buffer.clone(),
713                    inline: state.heading_inline.clone(),
714                    anchor,
715                });
716            }
717            state.in_heading = false;
718            state.heading_level = None;
719            state.heading_buffer.clear();
720            state.heading_inline.clear();
721        }
722        _ => {}
723    }
724}
725
726// ============================================================================
727// Slug generation
728// ============================================================================
729
730/// Generate URL-friendly slug from heading text.
731pub fn slugify(text: &str) -> String {
732    text.to_lowercase()
733        .chars()
734        .map(|c| {
735            if c.is_alphanumeric() {
736                c
737            } else if c.is_whitespace() || c == '-' {
738                '-'
739            } else {
740                '\0'
741            }
742        })
743        .filter(|&c| c != '\0')
744        .collect::<String>()
745        .split('-')
746        .filter(|s| !s.is_empty())
747        .collect::<Vec<_>>()
748        .join("-")
749}
750
751// ============================================================================
752// Helper functions
753// ============================================================================
754
755/// Recursively collect inline elements from content blocks.
756///
757/// This traverses nested structures (paragraphs, lists, blockquotes) to gather
758/// all inline elements, enabling consumers to find links and other inline
759/// content from nested list items.
760fn collect_inline_elements(blocks: &[ContentBlock], output: &mut Vec<InlineElement>) {
761    for block in blocks {
762        match block {
763            ContentBlock::Paragraph { inline, .. } => {
764                output.extend(inline.iter().cloned());
765            }
766            ContentBlock::List { items, .. } => {
767                for item in items {
768                    output.extend(item.inline.iter().cloned());
769                    collect_inline_elements(&item.blocks, output);
770                }
771            }
772            ContentBlock::Blockquote { blocks, .. } => {
773                collect_inline_elements(blocks, output);
774            }
775            ContentBlock::Details { blocks, .. } => {
776                collect_inline_elements(blocks, output);
777            }
778            // Headings, Code, HorizontalRule, Table, Image don't have nested inline elements
779            // that we need to collect (or they store them differently)
780            _ => {}
781        }
782    }
783}
784
785// ============================================================================
786// Public API
787// ============================================================================
788
789/// Parse markdown content into structured blocks.
790///
791/// This is the main entry point for block-level parsing. It handles:
792/// - Wikilink preprocessing (converts [[x]] to markdown links)
793/// - HTML details block extraction
794/// - Full pulldown-cmark parsing with GFM extensions
795///
796/// # Example
797///
798/// ```
799/// use turbovault_parser::parse_blocks;
800/// use turbovault_core::ContentBlock;
801///
802/// let markdown = "# Hello World\n\nThis is a **paragraph** with *inline* formatting.";
803///
804/// let blocks = parse_blocks(markdown);
805/// assert!(matches!(blocks[0], ContentBlock::Heading { level: 1, .. }));
806/// ```
807pub fn parse_blocks(markdown: &str) -> Vec<ContentBlock> {
808    parse_blocks_from_line(markdown, 0)
809}
810
811/// Parse markdown content into structured blocks, starting from a specific line.
812///
813/// Use this when you need accurate line numbers for nested content.
814pub fn parse_blocks_from_line(markdown: &str, start_line: usize) -> Vec<ContentBlock> {
815    // Pre-process wikilinks
816    let preprocessed = preprocess_wikilinks(markdown);
817
818    // Pre-process links with spaces
819    let preprocessed = preprocess_links_with_spaces(&preprocessed);
820
821    // Extract details blocks
822    let (processed_markdown, details_blocks) = extract_details_blocks(&preprocessed);
823
824    // Enable GFM extensions
825    let mut options = Options::empty();
826    options.insert(Options::ENABLE_TABLES);
827    options.insert(Options::ENABLE_STRIKETHROUGH);
828    options.insert(Options::ENABLE_TASKLISTS);
829
830    let parser = Parser::new_ext(&processed_markdown, options);
831    let mut blocks = Vec::new();
832    let mut state = BlockParserState::new(start_line);
833
834    for event in parser {
835        process_event(event, &mut state, &mut blocks);
836    }
837
838    state.finalize(&mut blocks);
839
840    // Replace placeholders with actual Details blocks
841    let mut final_blocks = Vec::new();
842    for block in blocks {
843        let replaced = if let ContentBlock::Paragraph { content, .. } = &block {
844            let trimmed = content.trim();
845            trimmed
846                .strip_prefix("[DETAILS_BLOCK_")
847                .and_then(|s| s.strip_suffix(']'))
848                .and_then(|s| s.parse::<usize>().ok())
849                .and_then(|idx| details_blocks.get(idx).cloned())
850        } else {
851            None
852        };
853
854        final_blocks.push(replaced.unwrap_or(block));
855    }
856
857    final_blocks
858}
859
860/// Extract plain text from markdown content.
861///
862/// Strips all markdown syntax, returning only text that would be
863/// visible when rendered. This is useful for:
864/// - **Search indexing**: Index only searchable text
865/// - **Accessibility**: Screen reader text extraction
866/// - **Word counts**: Accurate content word counts
867/// - **Diffs**: Compare semantic content, not syntax
868///
869/// # Elements stripped
870///
871/// | Markdown | Plain Text |
872/// |----------|------------|
873/// | `[text](url)` | `text` |
874/// | `![alt](url)` | `alt` |
875/// | `[[Page]]` | `Page` |
876/// | `[[Page\|Display]]` | `Display` |
877/// | `**bold**` | `bold` |
878/// | `*italic*` | `italic` |
879/// | `` `code` `` | `code` |
880/// | `~~strike~~` | `strike` |
881/// | `# Heading` | `Heading` |
882/// | `> quote` | (quote content) |
883/// | Code fences | (content preserved) |
884///
885/// # Example
886///
887/// ```
888/// use turbovault_parser::to_plain_text;
889///
890/// let plain = to_plain_text("[Overview](#overview) and **bold**");
891/// assert_eq!(plain, "Overview and bold");
892///
893/// // Wikilinks are handled properly
894/// let plain = to_plain_text("See [[Note]] and [[Other|display]]");
895/// assert_eq!(plain, "See Note and display");
896/// ```
897pub fn to_plain_text(markdown: &str) -> String {
898    let blocks = parse_blocks(markdown);
899    blocks
900        .iter()
901        .map(ContentBlock::to_plain_text)
902        .collect::<Vec<_>>()
903        .join("\n")
904}
905
906// ============================================================================
907// Tests
908// ============================================================================
909
910#[cfg(test)]
911mod tests {
912    use super::*;
913
914    #[test]
915    fn test_parse_paragraph() {
916        let markdown = "This is a simple paragraph.";
917        let blocks = parse_blocks(markdown);
918
919        assert_eq!(blocks.len(), 1);
920        assert!(matches!(blocks[0], ContentBlock::Paragraph { .. }));
921        if let ContentBlock::Paragraph { content, .. } = &blocks[0] {
922            assert_eq!(content, "This is a simple paragraph.");
923        }
924    }
925
926    #[test]
927    fn test_parse_heading() {
928        let markdown = "# Hello World";
929        let blocks = parse_blocks(markdown);
930
931        assert_eq!(blocks.len(), 1);
932        if let ContentBlock::Heading {
933            level,
934            content,
935            anchor,
936            ..
937        } = &blocks[0]
938        {
939            assert_eq!(*level, 1);
940            assert_eq!(content, "Hello World");
941            assert_eq!(anchor.as_deref(), Some("hello-world"));
942        } else {
943            panic!("Expected Heading block");
944        }
945    }
946
947    #[test]
948    fn test_parse_code_block() {
949        let markdown = "```rust\nfn main() {}\n```";
950        let blocks = parse_blocks(markdown);
951
952        assert_eq!(blocks.len(), 1);
953        if let ContentBlock::Code {
954            language, content, ..
955        } = &blocks[0]
956        {
957            assert_eq!(language.as_deref(), Some("rust"));
958            assert_eq!(content, "fn main() {}");
959        } else {
960            panic!("Expected Code block");
961        }
962    }
963
964    #[test]
965    fn test_parse_unordered_list() {
966        let markdown = "- Item 1\n- Item 2\n- Item 3";
967        let blocks = parse_blocks(markdown);
968
969        assert_eq!(blocks.len(), 1);
970        if let ContentBlock::List { ordered, items } = &blocks[0] {
971            assert!(!ordered);
972            assert_eq!(items.len(), 3);
973            assert_eq!(items[0].content, "Item 1");
974            assert_eq!(items[1].content, "Item 2");
975            assert_eq!(items[2].content, "Item 3");
976        } else {
977            panic!("Expected List block");
978        }
979    }
980
981    #[test]
982    fn test_parse_ordered_list() {
983        let markdown = "1. First\n2. Second\n3. Third";
984        let blocks = parse_blocks(markdown);
985
986        assert_eq!(blocks.len(), 1);
987        if let ContentBlock::List { ordered, items } = &blocks[0] {
988            assert!(ordered);
989            assert_eq!(items.len(), 3);
990        } else {
991            panic!("Expected List block");
992        }
993    }
994
995    #[test]
996    fn test_parse_task_list() {
997        let markdown = "- [ ] Todo\n- [x] Done";
998        let blocks = parse_blocks(markdown);
999
1000        assert_eq!(blocks.len(), 1);
1001        if let ContentBlock::List { items, .. } = &blocks[0] {
1002            assert_eq!(items.len(), 2);
1003            assert_eq!(items[0].checked, Some(false));
1004            assert_eq!(items[0].content, "Todo");
1005            assert_eq!(items[1].checked, Some(true));
1006            assert_eq!(items[1].content, "Done");
1007        } else {
1008            panic!("Expected List block");
1009        }
1010    }
1011
1012    #[test]
1013    fn test_parse_table() {
1014        let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
1015        let blocks = parse_blocks(markdown);
1016
1017        assert_eq!(blocks.len(), 1);
1018        if let ContentBlock::Table { headers, rows, .. } = &blocks[0] {
1019            assert_eq!(headers.len(), 2);
1020            assert_eq!(headers[0], "A");
1021            assert_eq!(headers[1], "B");
1022            assert_eq!(rows.len(), 1);
1023            assert_eq!(rows[0][0], "1");
1024            assert_eq!(rows[0][1], "2");
1025        } else {
1026            panic!("Expected Table block");
1027        }
1028    }
1029
1030    #[test]
1031    fn test_parse_blockquote() {
1032        let markdown = "> This is a quote";
1033        let blocks = parse_blocks(markdown);
1034
1035        assert_eq!(blocks.len(), 1);
1036        if let ContentBlock::Blockquote { content, .. } = &blocks[0] {
1037            assert!(content.contains("This is a quote"));
1038        } else {
1039            panic!("Expected Blockquote block");
1040        }
1041    }
1042
1043    #[test]
1044    fn test_parse_horizontal_rule() {
1045        let markdown = "Before\n\n---\n\nAfter";
1046        let blocks = parse_blocks(markdown);
1047
1048        assert_eq!(blocks.len(), 3);
1049        assert!(matches!(blocks[1], ContentBlock::HorizontalRule));
1050    }
1051
1052    #[test]
1053    fn test_parse_inline_formatting() {
1054        let markdown = "This has **bold** and *italic* and `code`.";
1055        let blocks = parse_blocks(markdown);
1056
1057        assert_eq!(blocks.len(), 1);
1058        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1059            assert!(
1060                inline
1061                    .iter()
1062                    .any(|e| matches!(e, InlineElement::Strong { .. }))
1063            );
1064            assert!(
1065                inline
1066                    .iter()
1067                    .any(|e| matches!(e, InlineElement::Emphasis { .. }))
1068            );
1069            assert!(
1070                inline
1071                    .iter()
1072                    .any(|e| matches!(e, InlineElement::Code { .. }))
1073            );
1074        } else {
1075            panic!("Expected Paragraph block");
1076        }
1077    }
1078
1079    #[test]
1080    fn test_parse_link() {
1081        let markdown = "See [example](https://example.com) for more.";
1082        let blocks = parse_blocks(markdown);
1083
1084        assert_eq!(blocks.len(), 1);
1085        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1086            let link = inline
1087                .iter()
1088                .find(|e| matches!(e, InlineElement::Link { .. }));
1089            assert!(link.is_some());
1090            if let Some(InlineElement::Link { text, url, .. }) = link {
1091                assert_eq!(text, "example");
1092                assert_eq!(url, "https://example.com");
1093            }
1094        } else {
1095            panic!("Expected Paragraph block");
1096        }
1097    }
1098
1099    #[test]
1100    fn test_wikilink_preprocessing() {
1101        let markdown = "See [[Note]] and [[Other|display]] for info.";
1102        let blocks = parse_blocks(markdown);
1103
1104        assert_eq!(blocks.len(), 1);
1105        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1106            let links: Vec<_> = inline
1107                .iter()
1108                .filter(|e| matches!(e, InlineElement::Link { .. }))
1109                .collect();
1110            assert_eq!(links.len(), 2);
1111
1112            if let InlineElement::Link { text, url, .. } = &links[0] {
1113                assert_eq!(text, "Note");
1114                assert_eq!(url, "wikilink:Note");
1115            }
1116            if let InlineElement::Link { text, url, .. } = &links[1] {
1117                assert_eq!(text, "display");
1118                assert_eq!(url, "wikilink:Other");
1119            }
1120        } else {
1121            panic!("Expected Paragraph block");
1122        }
1123    }
1124
1125    #[test]
1126    fn test_list_with_nested_code() {
1127        let markdown = r#"1. First item
1128   ```rust
1129   code here
1130   ```
1131
11322. Second item"#;
1133
1134        let blocks = parse_blocks(markdown);
1135
1136        assert_eq!(blocks.len(), 1);
1137        if let ContentBlock::List { items, .. } = &blocks[0] {
1138            assert_eq!(items.len(), 2);
1139            assert!(!items[0].blocks.is_empty());
1140            assert!(matches!(items[0].blocks[0], ContentBlock::Code { .. }));
1141        } else {
1142            panic!("Expected List block");
1143        }
1144    }
1145
1146    #[test]
1147    fn test_parse_image() {
1148        // Standalone image is wrapped in paragraph by pulldown-cmark
1149        let markdown = "![Alt text](image.png)";
1150        let blocks = parse_blocks(markdown);
1151
1152        // pulldown-cmark wraps standalone images in paragraphs
1153        assert_eq!(blocks.len(), 1);
1154        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1155            let img = inline
1156                .iter()
1157                .find(|e| matches!(e, InlineElement::Image { .. }));
1158            assert!(img.is_some(), "Should have inline image");
1159        } else {
1160            panic!("Expected Paragraph block with inline image");
1161        }
1162    }
1163
1164    #[test]
1165    fn test_parse_block_image() {
1166        // Image following other content becomes a block image
1167        let markdown = "Some text\n\n![Alt](image.png)";
1168        let blocks = parse_blocks(markdown);
1169
1170        // First paragraph, then image (inline or block)
1171        assert!(blocks.len() >= 2);
1172    }
1173
1174    #[test]
1175    fn test_parse_details_block() {
1176        let markdown = r#"<details>
1177<summary>Click to expand</summary>
1178
1179Inner content here.
1180
1181</details>"#;
1182
1183        let blocks = parse_blocks(markdown);
1184
1185        assert_eq!(blocks.len(), 1);
1186        if let ContentBlock::Details {
1187            summary,
1188            blocks: inner,
1189            ..
1190        } = &blocks[0]
1191        {
1192            assert_eq!(summary, "Click to expand");
1193            assert!(!inner.is_empty());
1194        } else {
1195            panic!("Expected Details block");
1196        }
1197    }
1198
1199    #[test]
1200    fn test_slugify() {
1201        assert_eq!(slugify("Hello World"), "hello-world");
1202        assert_eq!(slugify("API Reference"), "api-reference");
1203        assert_eq!(slugify("1. Getting Started"), "1-getting-started");
1204        assert_eq!(slugify("What's New?"), "whats-new");
1205    }
1206
1207    #[test]
1208    fn test_strikethrough() {
1209        let markdown = "This is ~~deleted~~ text.";
1210        let blocks = parse_blocks(markdown);
1211
1212        assert_eq!(blocks.len(), 1);
1213        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1214            assert!(
1215                inline
1216                    .iter()
1217                    .any(|e| matches!(e, InlineElement::Strikethrough { .. }))
1218            );
1219        }
1220    }
1221
1222    #[test]
1223    fn test_indented_code_blocks_in_list_items() {
1224        // Bug report: indented fenced code blocks in list items should be recognized
1225        // Per CommonMark spec, code blocks can be indented up to 3 spaces to be part of a list item
1226        let markdown = r#"## Installation
1227
12281. Install from crates.io:
1229   ```bash
1230   cargo install treemd
1231   ```
1232
12332. Or build from source:
1234   ```bash
1235   git clone https://github.com/example/repo
1236   cd repo
1237   cargo install --path .
1238   ```"#;
1239
1240        let blocks = parse_blocks(markdown);
1241
1242        // Should have: Heading, List
1243        assert_eq!(blocks.len(), 2, "Expected 2 blocks (heading + list)");
1244        assert!(
1245            matches!(blocks[0], ContentBlock::Heading { level: 2, .. }),
1246            "First block should be H2"
1247        );
1248
1249        if let ContentBlock::List { ordered, items } = &blocks[1] {
1250            assert!(ordered, "Should be an ordered list");
1251            assert_eq!(items.len(), 2, "Should have 2 list items");
1252
1253            // First item should have code block in its nested blocks
1254            assert!(
1255                !items[0].blocks.is_empty(),
1256                "First item should have nested blocks"
1257            );
1258            assert!(
1259                matches!(items[0].blocks[0], ContentBlock::Code { .. }),
1260                "First item's nested block should be Code"
1261            );
1262            if let ContentBlock::Code {
1263                language, content, ..
1264            } = &items[0].blocks[0]
1265            {
1266                assert_eq!(language.as_deref(), Some("bash"));
1267                assert!(content.contains("cargo install treemd"));
1268            }
1269
1270            // Second item should also have code block in its nested blocks
1271            assert!(
1272                !items[1].blocks.is_empty(),
1273                "Second item should have nested blocks"
1274            );
1275            assert!(
1276                matches!(items[1].blocks[0], ContentBlock::Code { .. }),
1277                "Second item's nested block should be Code"
1278            );
1279            if let ContentBlock::Code {
1280                language, content, ..
1281            } = &items[1].blocks[0]
1282            {
1283                assert_eq!(language.as_deref(), Some("bash"));
1284                assert!(content.contains("git clone"));
1285            }
1286        } else {
1287            panic!("Expected List block");
1288        }
1289    }
1290
1291    // ========================================================================
1292    // to_plain_text tests
1293    // ========================================================================
1294
1295    #[test]
1296    fn test_to_plain_text_simple_paragraph() {
1297        let markdown = "This is a simple paragraph.";
1298        let plain = to_plain_text(markdown);
1299        assert_eq!(plain, "This is a simple paragraph.");
1300    }
1301
1302    #[test]
1303    fn test_to_plain_text_with_link() {
1304        let markdown = "[Overview](#overview) and more text";
1305        let plain = to_plain_text(markdown);
1306        assert_eq!(plain, "Overview and more text");
1307    }
1308
1309    #[test]
1310    fn test_to_plain_text_with_bold_and_italic() {
1311        let markdown = "This has **bold** and *italic* text.";
1312        let plain = to_plain_text(markdown);
1313        assert_eq!(plain, "This has bold and italic text.");
1314    }
1315
1316    #[test]
1317    fn test_to_plain_text_with_inline_code() {
1318        let markdown = "Use the `println!` macro.";
1319        let plain = to_plain_text(markdown);
1320        assert_eq!(plain, "Use the println! macro.");
1321    }
1322
1323    #[test]
1324    fn test_to_plain_text_with_strikethrough() {
1325        let markdown = "This is ~~deleted~~ text.";
1326        let plain = to_plain_text(markdown);
1327        assert_eq!(plain, "This is deleted text.");
1328    }
1329
1330    #[test]
1331    fn test_to_plain_text_wikilinks() {
1332        let markdown = "See [[Note]] and [[Other|display]] for info.";
1333        let plain = to_plain_text(markdown);
1334        assert_eq!(plain, "See Note and display for info.");
1335    }
1336
1337    #[test]
1338    fn test_to_plain_text_heading() {
1339        let markdown = "# Hello World";
1340        let plain = to_plain_text(markdown);
1341        assert_eq!(plain, "Hello World");
1342    }
1343
1344    #[test]
1345    fn test_to_plain_text_code_block() {
1346        let markdown = "```rust\nfn main() {}\n```";
1347        let plain = to_plain_text(markdown);
1348        assert_eq!(plain, "fn main() {}");
1349    }
1350
1351    #[test]
1352    fn test_to_plain_text_list() {
1353        let markdown = "- Item 1\n- Item 2\n- Item 3";
1354        let plain = to_plain_text(markdown);
1355        assert_eq!(plain, "Item 1\nItem 2\nItem 3");
1356    }
1357
1358    #[test]
1359    fn test_to_plain_text_table() {
1360        let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
1361        let plain = to_plain_text(markdown);
1362        // Table headers and rows separated by tabs
1363        assert!(plain.contains("A\tB"));
1364        assert!(plain.contains("1\t2"));
1365    }
1366
1367    #[test]
1368    fn test_to_plain_text_blockquote() {
1369        let markdown = "> This is a quote";
1370        let plain = to_plain_text(markdown);
1371        assert!(plain.contains("This is a quote"));
1372    }
1373
1374    #[test]
1375    fn test_to_plain_text_image() {
1376        let markdown = "![Alt text](image.png)";
1377        let plain = to_plain_text(markdown);
1378        assert_eq!(plain, "Alt text");
1379    }
1380
1381    #[test]
1382    fn test_to_plain_text_horizontal_rule() {
1383        let markdown = "Before\n\n---\n\nAfter";
1384        let plain = to_plain_text(markdown);
1385        // Horizontal rules produce empty strings, paragraphs separated by newlines
1386        assert!(plain.contains("Before"));
1387        assert!(plain.contains("After"));
1388    }
1389
1390    #[test]
1391    fn test_to_plain_text_complex_document() {
1392        let markdown = r#"# Document Title
1393
1394This is a paragraph with **bold** and *italic* text.
1395
1396- [Link One](#one)
1397- [Link Two](#two)
1398- [Link Three](#three)
1399
1400See [[WikiNote]] for more info."#;
1401
1402        let plain = to_plain_text(markdown);
1403
1404        // Should contain heading text
1405        assert!(plain.contains("Document Title"));
1406        // Should contain paragraph with formatting stripped
1407        assert!(plain.contains("bold"));
1408        assert!(plain.contains("italic"));
1409        // Should contain link text, not URLs
1410        assert!(plain.contains("Link One"));
1411        assert!(plain.contains("Link Two"));
1412        // Should contain wikilink display text
1413        assert!(plain.contains("WikiNote"));
1414        // Should NOT contain URLs
1415        assert!(!plain.contains("#one"));
1416        assert!(!plain.contains("#two"));
1417    }
1418
1419    #[test]
1420    fn test_to_plain_text_treemd_use_case() {
1421        // This test validates the original treemd use case:
1422        // searching in "[Overview](#overview)" should only match visible text "Overview"
1423        // not the hidden anchor "#overview"
1424        let markdown = "[Overview](#overview)";
1425        let plain = to_plain_text(markdown);
1426        assert_eq!(plain, "Overview");
1427
1428        // The visible text "Overview" has 1 'O', while raw markdown has 2 'o's total
1429        // (capital O in "Overview" + lowercase o in "#overview")
1430        // Plain text extraction should only show the visible part
1431        let o_count = plain.chars().filter(|c| *c == 'o' || *c == 'O').count();
1432        assert_eq!(
1433            o_count, 1,
1434            "Should only count 'o' in visible text, not hidden anchor"
1435        );
1436
1437        // More explicitly: the anchor URL should not be in plain text
1438        assert!(!plain.contains("#overview"));
1439        assert!(!plain.contains("overview")); // lowercase version from anchor
1440    }
1441
1442    #[test]
1443    fn test_to_plain_text_nested_formatting() {
1444        // Test nested structures
1445        let markdown = "**[bold link](url)** and *[italic link](url2)*";
1446        let plain = to_plain_text(markdown);
1447        // The link text should be extracted
1448        assert!(plain.contains("bold link"));
1449        assert!(plain.contains("italic link"));
1450        // URLs should not appear
1451        assert!(!plain.contains("url"));
1452    }
1453
1454    #[test]
1455    fn test_nested_list_item_inline_elements() {
1456        // Test that inline elements from nested list items are collected
1457        // into the parent item's inline field
1458        let markdown = r#"- [Features](#features)
1459  - [Interactive TUI](#interactive-tui)
1460  - [CLI Mode](#cli-mode)"#;
1461
1462        let blocks = parse_blocks(markdown);
1463        assert_eq!(blocks.len(), 1);
1464
1465        if let ContentBlock::List { items, .. } = &blocks[0] {
1466            assert_eq!(items.len(), 1, "Should have 1 top-level item");
1467
1468            let item = &items[0];
1469            // The inline field should contain ALL links, including from nested items
1470            let links: Vec<_> = item
1471                .inline
1472                .iter()
1473                .filter_map(|e| {
1474                    if let InlineElement::Link { text, url, .. } = e {
1475                        Some((text.as_str(), url.as_str()))
1476                    } else {
1477                        None
1478                    }
1479                })
1480                .collect();
1481
1482            assert_eq!(links.len(), 3, "Should have 3 links total");
1483            assert!(
1484                links.iter().any(|(text, _)| *text == "Features"),
1485                "Should have Features link"
1486            );
1487            assert!(
1488                links.iter().any(|(text, _)| *text == "Interactive TUI"),
1489                "Should have Interactive TUI link"
1490            );
1491            assert!(
1492                links.iter().any(|(text, _)| *text == "CLI Mode"),
1493                "Should have CLI Mode link"
1494            );
1495        } else {
1496            panic!("Expected List block");
1497        }
1498    }
1499
1500    #[test]
1501    fn test_deeply_nested_list_inline_elements() {
1502        // Test deeply nested list items
1503        let markdown = r#"- Level 1 [link1](url1)
1504  - Level 2 [link2](url2)
1505    - Level 3 [link3](url3)"#;
1506
1507        let blocks = parse_blocks(markdown);
1508
1509        if let ContentBlock::List { items, .. } = &blocks[0] {
1510            let item = &items[0];
1511            let links: Vec<_> = item
1512                .inline
1513                .iter()
1514                .filter(|e| matches!(e, InlineElement::Link { .. }))
1515                .collect();
1516
1517            assert_eq!(links.len(), 3, "Should collect all 3 nested links");
1518        } else {
1519            panic!("Expected List block");
1520        }
1521    }
1522
1523    #[test]
1524    fn test_inline_element_line_offset() {
1525        // Test that line_offset is correctly tracked for nested list items
1526        let markdown = r#"- [Features](#features)
1527  - [Interactive TUI](#interactive-tui)
1528  - [CLI Mode](#cli-mode)"#;
1529
1530        let blocks = parse_blocks(markdown);
1531
1532        if let ContentBlock::List { items, .. } = &blocks[0] {
1533            let item = &items[0];
1534            let links: Vec<_> = item
1535                .inline
1536                .iter()
1537                .filter_map(|e| {
1538                    if let InlineElement::Link {
1539                        text, line_offset, ..
1540                    } = e
1541                    {
1542                        Some((text.as_str(), *line_offset))
1543                    } else {
1544                        None
1545                    }
1546                })
1547                .collect();
1548
1549            assert_eq!(links.len(), 3);
1550
1551            // Features is on line 0 (first line of the item)
1552            let features = links.iter().find(|(t, _)| *t == "Features").unwrap();
1553            assert_eq!(features.1, Some(0), "Features should be on line 0");
1554
1555            // Interactive TUI is on line 1 (after first newline)
1556            let tui = links.iter().find(|(t, _)| *t == "Interactive TUI").unwrap();
1557            assert_eq!(tui.1, Some(1), "Interactive TUI should be on line 1");
1558
1559            // CLI Mode is on line 2 (after second newline)
1560            let cli = links.iter().find(|(t, _)| *t == "CLI Mode").unwrap();
1561            assert_eq!(cli.1, Some(2), "CLI Mode should be on line 2");
1562        } else {
1563            panic!("Expected List block");
1564        }
1565    }
1566
1567    #[test]
1568    fn test_line_offset_not_set_outside_lists() {
1569        // line_offset should be None for links outside of list items
1570        let markdown = "See [example](url) for more.";
1571        let blocks = parse_blocks(markdown);
1572
1573        if let ContentBlock::Paragraph { inline, .. } = &blocks[0] {
1574            let link = inline
1575                .iter()
1576                .find(|e| matches!(e, InlineElement::Link { .. }));
1577            if let Some(InlineElement::Link { line_offset, .. }) = link {
1578                assert_eq!(
1579                    *line_offset, None,
1580                    "line_offset should be None outside lists"
1581                );
1582            }
1583        } else {
1584            panic!("Expected Paragraph block");
1585        }
1586    }
1587}