rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
3use lazy_static::lazy_static;
4use regex::Regex;
5
6lazy_static! {
7    // Comprehensive link pattern that captures both inline and reference links
8    // Use (?s) flag to make . match newlines
9    static ref LINK_PATTERN: Regex = Regex::new(
10        r"(?sx)
11        \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]          # Link text in group 1 (handles nested brackets)
12        (?:
13            \(([^)]*)\)       # Inline URL in group 2 (can be empty)
14            |
15            \[([^\]]*)\]      # Reference ID in group 3
16        )"
17    ).unwrap();
18
19    // Image pattern (similar to links but with ! prefix)
20    // Use (?s) flag to make . match newlines
21    static ref IMAGE_PATTERN: Regex = Regex::new(
22        r"(?sx)
23        !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]         # Alt text in group 1 (handles nested brackets)
24        (?:
25            \(([^)]*)\)       # Inline URL in group 2 (can be empty)
26            |
27            \[([^\]]*)\]      # Reference ID in group 3
28        )"
29    ).unwrap();
30
31    // Reference definition pattern
32    static ref REF_DEF_PATTERN: Regex = Regex::new(
33        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
34    ).unwrap();
35
36    // Code span pattern - matches backticks and captures content
37    // This handles multi-backtick code spans correctly
38    static ref CODE_SPAN_PATTERN: Regex = Regex::new(
39        r"`+"
40    ).unwrap();
41
42    // Pattern for bare URLs
43    static ref BARE_URL_PATTERN: Regex = Regex::new(
44        r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
45    ).unwrap();
46
47    // Pattern for email addresses
48    static ref BARE_EMAIL_PATTERN: Regex = Regex::new(
49        r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
50    ).unwrap();
51
52    // Pattern for angle bracket links (to exclude from bare URL detection)
53    static ref ANGLE_BRACKET_PATTERN: Regex = Regex::new(
54        r"<((?:https?|ftp)://[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"
55    ).unwrap();
56
57    // Pattern for blockquote prefix in parse_list_blocks
58    static ref BLOCKQUOTE_PREFIX_REGEX: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
59}
60
61/// Pre-computed information about a line
62#[derive(Debug, Clone)]
63pub struct LineInfo {
64    /// The actual line content (without newline)
65    pub content: String,
66    /// Byte offset where this line starts in the document
67    pub byte_offset: usize,
68    /// Number of leading spaces/tabs
69    pub indent: usize,
70    /// Whether the line is blank (empty or only whitespace)
71    pub is_blank: bool,
72    /// Whether this line is inside a code block
73    pub in_code_block: bool,
74    /// Whether this line is inside front matter
75    pub in_front_matter: bool,
76    /// List item information if this line starts a list item
77    pub list_item: Option<ListItemInfo>,
78    /// Heading information if this line is a heading
79    pub heading: Option<HeadingInfo>,
80    /// Blockquote information if this line is a blockquote
81    pub blockquote: Option<BlockquoteInfo>,
82}
83
84/// Information about a list item
85#[derive(Debug, Clone)]
86pub struct ListItemInfo {
87    /// The marker used (*, -, +, or number with . or ))
88    pub marker: String,
89    /// Whether it's ordered (true) or unordered (false)
90    pub is_ordered: bool,
91    /// The number for ordered lists
92    pub number: Option<usize>,
93    /// Column where the marker starts (0-based)
94    pub marker_column: usize,
95    /// Column where content after marker starts
96    pub content_column: usize,
97}
98
99/// Heading style type
100#[derive(Debug, Clone, PartialEq)]
101pub enum HeadingStyle {
102    /// ATX style heading (# Heading)
103    ATX,
104    /// Setext style heading with = underline
105    Setext1,
106    /// Setext style heading with - underline
107    Setext2,
108}
109
110/// Parsed link information
111#[derive(Debug, Clone)]
112pub struct ParsedLink {
113    /// Line number (1-indexed)
114    pub line: usize,
115    /// Start column (0-indexed) in the line
116    pub start_col: usize,
117    /// End column (0-indexed) in the line
118    pub end_col: usize,
119    /// Byte offset in document
120    pub byte_offset: usize,
121    /// End byte offset in document
122    pub byte_end: usize,
123    /// Link text
124    pub text: String,
125    /// Link URL or reference
126    pub url: String,
127    /// Whether this is a reference link [text][ref] vs inline [text](url)
128    pub is_reference: bool,
129    /// Reference ID for reference links
130    pub reference_id: Option<String>,
131}
132
133/// Parsed image information
134#[derive(Debug, Clone)]
135pub struct ParsedImage {
136    /// Line number (1-indexed)
137    pub line: usize,
138    /// Start column (0-indexed) in the line
139    pub start_col: usize,
140    /// End column (0-indexed) in the line
141    pub end_col: usize,
142    /// Byte offset in document
143    pub byte_offset: usize,
144    /// End byte offset in document
145    pub byte_end: usize,
146    /// Alt text
147    pub alt_text: String,
148    /// Image URL or reference
149    pub url: String,
150    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
151    pub is_reference: bool,
152    /// Reference ID for reference images
153    pub reference_id: Option<String>,
154}
155
156/// Reference definition [ref]: url "title"
157#[derive(Debug, Clone)]
158pub struct ReferenceDef {
159    /// Line number (1-indexed)
160    pub line: usize,
161    /// Reference ID (normalized to lowercase)
162    pub id: String,
163    /// URL
164    pub url: String,
165    /// Optional title
166    pub title: Option<String>,
167}
168
169/// Parsed code span information
170#[derive(Debug, Clone)]
171pub struct CodeSpan {
172    /// Line number (1-indexed)
173    pub line: usize,
174    /// Start column (0-indexed) in the line
175    pub start_col: usize,
176    /// End column (0-indexed) in the line
177    pub end_col: usize,
178    /// Byte offset in document
179    pub byte_offset: usize,
180    /// End byte offset in document
181    pub byte_end: usize,
182    /// Number of backticks used (1, 2, 3, etc.)
183    pub backtick_count: usize,
184    /// Content inside the code span (without backticks)
185    pub content: String,
186}
187
188/// Information about a heading
189#[derive(Debug, Clone)]
190pub struct HeadingInfo {
191    /// Heading level (1-6 for ATX, 1-2 for Setext)
192    pub level: u8,
193    /// Style of heading
194    pub style: HeadingStyle,
195    /// The heading marker (# characters or underline)
196    pub marker: String,
197    /// Column where the marker starts (0-based)
198    pub marker_column: usize,
199    /// Column where heading text starts
200    pub content_column: usize,
201    /// The heading text (without markers and without custom ID syntax)
202    pub text: String,
203    /// Custom header ID if present (e.g., from {#custom-id} syntax)
204    pub custom_id: Option<String>,
205    /// Original heading text including custom ID syntax
206    pub raw_text: String,
207    /// Whether it has a closing sequence (for ATX)
208    pub has_closing_sequence: bool,
209    /// The closing sequence if present
210    pub closing_sequence: String,
211}
212
213/// Information about a blockquote line
214#[derive(Debug, Clone)]
215pub struct BlockquoteInfo {
216    /// Nesting level (1 for >, 2 for >>, etc.)
217    pub nesting_level: usize,
218    /// The indentation before the blockquote marker
219    pub indent: String,
220    /// Column where the first > starts (0-based)
221    pub marker_column: usize,
222    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
223    pub prefix: String,
224    /// Content after the blockquote marker(s)
225    pub content: String,
226    /// Whether the line has no space after the marker
227    pub has_no_space_after_marker: bool,
228    /// Whether the line has multiple spaces after the marker
229    pub has_multiple_spaces_after_marker: bool,
230    /// Whether this is an empty blockquote line needing MD028 fix
231    pub needs_md028_fix: bool,
232}
233
234/// Information about a list block
235#[derive(Debug, Clone)]
236pub struct ListBlock {
237    /// Line number where the list starts (1-indexed)
238    pub start_line: usize,
239    /// Line number where the list ends (1-indexed)
240    pub end_line: usize,
241    /// Whether it's ordered or unordered
242    pub is_ordered: bool,
243    /// The consistent marker for unordered lists (if any)
244    pub marker: Option<String>,
245    /// Blockquote prefix for this list (empty if not in blockquote)
246    pub blockquote_prefix: String,
247    /// Lines that are list items within this block
248    pub item_lines: Vec<usize>,
249    /// Nesting level (0 for top-level lists)
250    pub nesting_level: usize,
251    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
252    pub max_marker_width: usize,
253}
254
255use std::sync::{Arc, Mutex};
256
257/// Character frequency data for fast content analysis
258#[derive(Debug, Clone, Default)]
259pub struct CharFrequency {
260    /// Count of # characters (headings)
261    pub hash_count: usize,
262    /// Count of * characters (emphasis, lists, horizontal rules)
263    pub asterisk_count: usize,
264    /// Count of _ characters (emphasis, horizontal rules)
265    pub underscore_count: usize,
266    /// Count of - characters (lists, horizontal rules, setext headings)
267    pub hyphen_count: usize,
268    /// Count of + characters (lists)
269    pub plus_count: usize,
270    /// Count of > characters (blockquotes)
271    pub gt_count: usize,
272    /// Count of | characters (tables)
273    pub pipe_count: usize,
274    /// Count of [ characters (links, images)
275    pub bracket_count: usize,
276    /// Count of ` characters (code spans, code blocks)
277    pub backtick_count: usize,
278    /// Count of < characters (HTML tags, autolinks)
279    pub lt_count: usize,
280    /// Count of ! characters (images)
281    pub exclamation_count: usize,
282    /// Count of newline characters
283    pub newline_count: usize,
284}
285
286/// Pre-parsed HTML tag information
287#[derive(Debug, Clone)]
288pub struct HtmlTag {
289    /// Line number (1-indexed)
290    pub line: usize,
291    /// Start column (0-indexed) in the line
292    pub start_col: usize,
293    /// End column (0-indexed) in the line
294    pub end_col: usize,
295    /// Byte offset in document
296    pub byte_offset: usize,
297    /// End byte offset in document
298    pub byte_end: usize,
299    /// Tag name (e.g., "div", "img", "br")
300    pub tag_name: String,
301    /// Whether it's a closing tag (</tag>)
302    pub is_closing: bool,
303    /// Whether it's self-closing (<tag />)
304    pub is_self_closing: bool,
305    /// Raw tag content
306    pub raw_content: String,
307}
308
309/// Pre-parsed emphasis span information
310#[derive(Debug, Clone)]
311pub struct EmphasisSpan {
312    /// Line number (1-indexed)
313    pub line: usize,
314    /// Start column (0-indexed) in the line
315    pub start_col: usize,
316    /// End column (0-indexed) in the line
317    pub end_col: usize,
318    /// Byte offset in document
319    pub byte_offset: usize,
320    /// End byte offset in document
321    pub byte_end: usize,
322    /// Type of emphasis ('*' or '_')
323    pub marker: char,
324    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
325    pub marker_count: usize,
326    /// Content inside the emphasis
327    pub content: String,
328}
329
330/// Pre-parsed table row information
331#[derive(Debug, Clone)]
332pub struct TableRow {
333    /// Line number (1-indexed)
334    pub line: usize,
335    /// Whether this is a separator row (contains only |, -, :, and spaces)
336    pub is_separator: bool,
337    /// Number of columns (pipe-separated cells)
338    pub column_count: usize,
339    /// Alignment info from separator row
340    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
341}
342
343/// Pre-parsed bare URL information (not in links)
344#[derive(Debug, Clone)]
345pub struct BareUrl {
346    /// Line number (1-indexed)
347    pub line: usize,
348    /// Start column (0-indexed) in the line
349    pub start_col: usize,
350    /// End column (0-indexed) in the line
351    pub end_col: usize,
352    /// Byte offset in document
353    pub byte_offset: usize,
354    /// End byte offset in document
355    pub byte_end: usize,
356    /// The URL string
357    pub url: String,
358    /// Type of URL ("http", "https", "ftp", "email")
359    pub url_type: String,
360}
361
362pub struct LintContext<'a> {
363    pub content: &'a str,
364    pub line_offsets: Vec<usize>,
365    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
366    pub lines: Vec<LineInfo>,             // Pre-computed line information
367    pub links: Vec<ParsedLink>,           // Pre-parsed links
368    pub images: Vec<ParsedImage>,         // Pre-parsed images
369    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
370    code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, // Lazy-loaded inline code spans
371    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
372    pub char_frequency: CharFrequency,    // Character frequency analysis
373    html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, // Lazy-loaded HTML tags
374    emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, // Lazy-loaded emphasis spans
375    table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, // Lazy-loaded table rows
376    bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, // Lazy-loaded bare URLs
377    pub flavor: MarkdownFlavor,           // Markdown flavor being used
378}
379
380impl<'a> LintContext<'a> {
381    pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
382        let mut line_offsets = vec![0];
383        for (i, c) in content.char_indices() {
384            if c == '\n' {
385                line_offsets.push(i + 1);
386            }
387        }
388
389        // Detect code blocks once and cache them
390        let code_blocks = CodeBlockUtils::detect_code_blocks(content);
391
392        // Pre-compute line information
393        let lines = Self::compute_line_info(content, &line_offsets, &code_blocks, flavor);
394
395        // Parse links, images, references, and list blocks
396        // Skip code spans - they'll be computed lazily
397        let links = Self::parse_links(content, &lines, &code_blocks, flavor);
398        let images = Self::parse_images(content, &lines, &code_blocks);
399        let reference_defs = Self::parse_reference_defs(content, &lines);
400        let list_blocks = Self::parse_list_blocks(&lines);
401
402        // Compute character frequency for fast content analysis
403        let char_frequency = Self::compute_char_frequency(content);
404
405        Self {
406            content,
407            line_offsets,
408            code_blocks,
409            lines,
410            links,
411            images,
412            reference_defs,
413            code_spans_cache: Mutex::new(None),
414            list_blocks,
415            char_frequency,
416            html_tags_cache: Mutex::new(None),
417            emphasis_spans_cache: Mutex::new(None),
418            table_rows_cache: Mutex::new(None),
419            bare_urls_cache: Mutex::new(None),
420            flavor,
421        }
422    }
423
424    /// Get code spans - computed lazily on first access
425    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
426        let mut cache = self.code_spans_cache.lock().unwrap();
427
428        // Check if we need to compute code spans
429        if cache.is_none() {
430            let code_spans = Self::parse_code_spans(self.content, &self.lines);
431            *cache = Some(Arc::new(code_spans));
432        }
433
434        // Return a reference to the cached code spans
435        cache.as_ref().unwrap().clone()
436    }
437
438    /// Get HTML tags - computed lazily on first access
439    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
440        let mut cache = self.html_tags_cache.lock().unwrap();
441
442        if cache.is_none() {
443            let html_tags = Self::parse_html_tags(self.content, &self.lines, &self.code_blocks);
444            *cache = Some(Arc::new(html_tags));
445        }
446
447        cache.as_ref().unwrap().clone()
448    }
449
450    /// Get emphasis spans - computed lazily on first access
451    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
452        let mut cache = self.emphasis_spans_cache.lock().unwrap();
453
454        if cache.is_none() {
455            let emphasis_spans = Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks);
456            *cache = Some(Arc::new(emphasis_spans));
457        }
458
459        cache.as_ref().unwrap().clone()
460    }
461
462    /// Get table rows - computed lazily on first access
463    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
464        let mut cache = self.table_rows_cache.lock().unwrap();
465
466        if cache.is_none() {
467            let table_rows = Self::parse_table_rows(&self.lines);
468            *cache = Some(Arc::new(table_rows));
469        }
470
471        cache.as_ref().unwrap().clone()
472    }
473
474    /// Get bare URLs - computed lazily on first access
475    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
476        let mut cache = self.bare_urls_cache.lock().unwrap();
477
478        if cache.is_none() {
479            let bare_urls = Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks);
480            *cache = Some(Arc::new(bare_urls));
481        }
482
483        cache.as_ref().unwrap().clone()
484    }
485
486    /// Map a byte offset to (line, column)
487    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
488        match self.line_offsets.binary_search(&offset) {
489            Ok(line) => (line + 1, 1),
490            Err(line) => {
491                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
492                (line, offset - line_start + 1)
493            }
494        }
495    }
496
497    /// Check if a position is within a code block or code span
498    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
499        // Check code blocks first
500        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
501            return true;
502        }
503
504        // Check inline code spans (lazy load if needed)
505        self.code_spans()
506            .iter()
507            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
508    }
509
510    /// Get line information by line number (1-indexed)
511    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
512        if line_num > 0 {
513            self.lines.get(line_num - 1)
514        } else {
515            None
516        }
517    }
518
519    /// Get byte offset for a line number (1-indexed)
520    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
521        self.line_info(line_num).map(|info| info.byte_offset)
522    }
523
524    /// Get URL for a reference link/image by its ID
525    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
526        let normalized_id = ref_id.to_lowercase();
527        self.reference_defs
528            .iter()
529            .find(|def| def.id == normalized_id)
530            .map(|def| def.url.as_str())
531    }
532
533    /// Get links on a specific line
534    pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink> {
535        self.links.iter().filter(|link| link.line == line_num).collect()
536    }
537
538    /// Get images on a specific line
539    pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage> {
540        self.images.iter().filter(|img| img.line == line_num).collect()
541    }
542
543    /// Check if a line is part of a list block
544    pub fn is_in_list_block(&self, line_num: usize) -> bool {
545        self.list_blocks
546            .iter()
547            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
548    }
549
550    /// Get the list block containing a specific line
551    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
552        self.list_blocks
553            .iter()
554            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
555    }
556
557    /// Check if content has any instances of a specific character (fast)
558    pub fn has_char(&self, ch: char) -> bool {
559        match ch {
560            '#' => self.char_frequency.hash_count > 0,
561            '*' => self.char_frequency.asterisk_count > 0,
562            '_' => self.char_frequency.underscore_count > 0,
563            '-' => self.char_frequency.hyphen_count > 0,
564            '+' => self.char_frequency.plus_count > 0,
565            '>' => self.char_frequency.gt_count > 0,
566            '|' => self.char_frequency.pipe_count > 0,
567            '[' => self.char_frequency.bracket_count > 0,
568            '`' => self.char_frequency.backtick_count > 0,
569            '<' => self.char_frequency.lt_count > 0,
570            '!' => self.char_frequency.exclamation_count > 0,
571            '\n' => self.char_frequency.newline_count > 0,
572            _ => self.content.contains(ch), // Fallback for other characters
573        }
574    }
575
576    /// Get count of a specific character (fast)
577    pub fn char_count(&self, ch: char) -> usize {
578        match ch {
579            '#' => self.char_frequency.hash_count,
580            '*' => self.char_frequency.asterisk_count,
581            '_' => self.char_frequency.underscore_count,
582            '-' => self.char_frequency.hyphen_count,
583            '+' => self.char_frequency.plus_count,
584            '>' => self.char_frequency.gt_count,
585            '|' => self.char_frequency.pipe_count,
586            '[' => self.char_frequency.bracket_count,
587            '`' => self.char_frequency.backtick_count,
588            '<' => self.char_frequency.lt_count,
589            '!' => self.char_frequency.exclamation_count,
590            '\n' => self.char_frequency.newline_count,
591            _ => self.content.matches(ch).count(), // Fallback for other characters
592        }
593    }
594
595    /// Check if content likely contains headings (fast)
596    pub fn likely_has_headings(&self) -> bool {
597        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
598    }
599
600    /// Check if content likely contains lists (fast)
601    pub fn likely_has_lists(&self) -> bool {
602        self.char_frequency.asterisk_count > 0
603            || self.char_frequency.hyphen_count > 0
604            || self.char_frequency.plus_count > 0
605    }
606
607    /// Check if content likely contains emphasis (fast)
608    pub fn likely_has_emphasis(&self) -> bool {
609        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
610    }
611
612    /// Check if content likely contains tables (fast)
613    pub fn likely_has_tables(&self) -> bool {
614        self.char_frequency.pipe_count > 2
615    }
616
617    /// Check if content likely contains blockquotes (fast)
618    pub fn likely_has_blockquotes(&self) -> bool {
619        self.char_frequency.gt_count > 0
620    }
621
622    /// Check if content likely contains code (fast)
623    pub fn likely_has_code(&self) -> bool {
624        self.char_frequency.backtick_count > 0
625    }
626
627    /// Check if content likely contains links or images (fast)
628    pub fn likely_has_links_or_images(&self) -> bool {
629        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
630    }
631
632    /// Check if content likely contains HTML (fast)
633    pub fn likely_has_html(&self) -> bool {
634        self.char_frequency.lt_count > 0
635    }
636
637    /// Get HTML tags on a specific line
638    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
639        self.html_tags()
640            .iter()
641            .filter(|tag| tag.line == line_num)
642            .cloned()
643            .collect()
644    }
645
646    /// Get emphasis spans on a specific line
647    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
648        self.emphasis_spans()
649            .iter()
650            .filter(|span| span.line == line_num)
651            .cloned()
652            .collect()
653    }
654
655    /// Get table rows on a specific line
656    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
657        self.table_rows()
658            .iter()
659            .filter(|row| row.line == line_num)
660            .cloned()
661            .collect()
662    }
663
664    /// Get bare URLs on a specific line
665    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
666        self.bare_urls()
667            .iter()
668            .filter(|url| url.line == line_num)
669            .cloned()
670            .collect()
671    }
672
673    /// Parse all links in the content
674    fn parse_links(
675        content: &str,
676        lines: &[LineInfo],
677        code_blocks: &[(usize, usize)],
678        flavor: MarkdownFlavor,
679    ) -> Vec<ParsedLink> {
680        use crate::utils::skip_context::is_mkdocs_snippet_line;
681
682        // Pre-size based on a heuristic: most markdown files have relatively few links
683        let mut links = Vec::with_capacity(content.len() / 500); // ~1 link per 500 chars
684
685        // Parse links across the entire content, not line by line
686        for cap in LINK_PATTERN.captures_iter(content) {
687            let full_match = cap.get(0).unwrap();
688            let match_start = full_match.start();
689            let match_end = full_match.end();
690
691            // Skip if the opening bracket is escaped (preceded by \)
692            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
693                continue;
694            }
695
696            // Skip if this is actually an image (preceded by !)
697            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
698                continue;
699            }
700
701            // Skip if in code block or span
702            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
703                continue;
704            }
705
706            // Skip if this link is on a MkDocs snippet line
707            // Find which line this link is on
708            let line_idx = lines
709                .iter()
710                .position(|line| {
711                    match_start >= line.byte_offset && (match_start < line.byte_offset + line.content.len() + 1)
712                })
713                .unwrap_or(0);
714
715            if is_mkdocs_snippet_line(&lines[line_idx].content, flavor) {
716                continue;
717            }
718
719            // Find which line this link starts on
720            let mut line_num = 1;
721            let mut col_start = match_start;
722            for (idx, line_info) in lines.iter().enumerate() {
723                if match_start >= line_info.byte_offset {
724                    line_num = idx + 1;
725                    col_start = match_start - line_info.byte_offset;
726                } else {
727                    break;
728                }
729            }
730
731            // Find which line this link ends on (and calculate column on that line)
732            let mut end_line_num = 1;
733            let mut col_end = match_end;
734            for (idx, line_info) in lines.iter().enumerate() {
735                if match_end > line_info.byte_offset {
736                    end_line_num = idx + 1;
737                    col_end = match_end - line_info.byte_offset;
738                } else {
739                    break;
740                }
741            }
742
743            // For single-line links, use the same approach as before
744            if line_num == end_line_num {
745                // col_end is already correct
746            } else {
747                // For multi-line links, col_end represents the column on the ending line
748                // which is what we want
749            }
750
751            let text = cap.get(1).map_or("", |m| m.as_str()).to_string();
752
753            if let Some(inline_url) = cap.get(2) {
754                // Inline link
755                links.push(ParsedLink {
756                    line: line_num,
757                    start_col: col_start,
758                    end_col: col_end,
759                    byte_offset: match_start,
760                    byte_end: match_end,
761                    text,
762                    url: inline_url.as_str().to_string(),
763                    is_reference: false,
764                    reference_id: None,
765                });
766            } else if let Some(ref_id) = cap.get(3) {
767                // Reference link
768                let ref_id_str = ref_id.as_str();
769                let normalized_ref = if ref_id_str.is_empty() {
770                    text.to_lowercase() // Implicit reference
771                } else {
772                    ref_id_str.to_lowercase()
773                };
774
775                links.push(ParsedLink {
776                    line: line_num,
777                    start_col: col_start,
778                    end_col: col_end,
779                    byte_offset: match_start,
780                    byte_end: match_end,
781                    text,
782                    url: String::new(), // Will be resolved with reference_defs
783                    is_reference: true,
784                    reference_id: Some(normalized_ref),
785                });
786            }
787        }
788
789        links
790    }
791
792    /// Parse all images in the content
793    fn parse_images(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<ParsedImage> {
794        // Pre-size based on a heuristic: images are less common than links
795        let mut images = Vec::with_capacity(content.len() / 1000); // ~1 image per 1000 chars
796
797        // Parse images across the entire content, not line by line
798        for cap in IMAGE_PATTERN.captures_iter(content) {
799            let full_match = cap.get(0).unwrap();
800            let match_start = full_match.start();
801            let match_end = full_match.end();
802
803            // Skip if the ! is escaped (preceded by \)
804            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
805                continue;
806            }
807
808            // Skip if in code block or span
809            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
810                continue;
811            }
812
813            // Find which line this image starts on
814            let mut line_num = 1;
815            let mut col_start = match_start;
816            for (idx, line_info) in lines.iter().enumerate() {
817                if match_start >= line_info.byte_offset {
818                    line_num = idx + 1;
819                    col_start = match_start - line_info.byte_offset;
820                } else {
821                    break;
822                }
823            }
824
825            // Find which line this image ends on (and calculate column on that line)
826            let mut end_line_num = 1;
827            let mut col_end = match_end;
828            for (idx, line_info) in lines.iter().enumerate() {
829                if match_end > line_info.byte_offset {
830                    end_line_num = idx + 1;
831                    col_end = match_end - line_info.byte_offset;
832                } else {
833                    break;
834                }
835            }
836
837            // For single-line images, use the same approach as before
838            if line_num == end_line_num {
839                // col_end is already correct
840            } else {
841                // For multi-line images, col_end represents the column on the ending line
842                // which is what we want
843            }
844
845            let alt_text = cap.get(1).map_or("", |m| m.as_str()).to_string();
846
847            if let Some(inline_url) = cap.get(2) {
848                // Inline image
849                images.push(ParsedImage {
850                    line: line_num,
851                    start_col: col_start,
852                    end_col: col_end,
853                    byte_offset: match_start,
854                    byte_end: match_end,
855                    alt_text,
856                    url: inline_url.as_str().to_string(),
857                    is_reference: false,
858                    reference_id: None,
859                });
860            } else if let Some(ref_id) = cap.get(3) {
861                // Reference image
862                let ref_id_str = ref_id.as_str();
863                let normalized_ref = if ref_id_str.is_empty() {
864                    alt_text.to_lowercase() // Implicit reference
865                } else {
866                    ref_id_str.to_lowercase()
867                };
868
869                images.push(ParsedImage {
870                    line: line_num,
871                    start_col: col_start,
872                    end_col: col_end,
873                    byte_offset: match_start,
874                    byte_end: match_end,
875                    alt_text,
876                    url: String::new(), // Will be resolved with reference_defs
877                    is_reference: true,
878                    reference_id: Some(normalized_ref),
879                });
880            }
881        }
882
883        images
884    }
885
886    /// Parse reference definitions
887    fn parse_reference_defs(_content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
888        // Pre-size based on lines count as reference definitions are line-based
889        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
890
891        for (line_idx, line_info) in lines.iter().enumerate() {
892            // Skip lines in code blocks
893            if line_info.in_code_block {
894                continue;
895            }
896
897            let line = &line_info.content;
898            let line_num = line_idx + 1;
899
900            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
901                let id = cap.get(1).unwrap().as_str().to_lowercase();
902                let url = cap.get(2).unwrap().as_str().to_string();
903                let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
904
905                refs.push(ReferenceDef {
906                    line: line_num,
907                    id,
908                    url,
909                    title,
910                });
911            }
912        }
913
914        refs
915    }
916
917    /// Pre-compute line information
918    fn compute_line_info(
919        content: &str,
920        line_offsets: &[usize],
921        code_blocks: &[(usize, usize)],
922        flavor: MarkdownFlavor,
923    ) -> Vec<LineInfo> {
924        lazy_static! {
925            // Regex for list detection - allow any whitespace including no space (to catch malformed lists)
926            static ref UNORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)([-*+])([ \t]*)(.*)").unwrap();
927            static ref ORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(\d+)([.)])([ \t]*)(.*)").unwrap();
928
929            // Regex for blockquote prefix
930            static ref BLOCKQUOTE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*>\s*)(.*)").unwrap();
931
932            // Regex for heading detection
933            static ref ATX_HEADING_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap();
934            static ref SETEXT_UNDERLINE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap();
935
936            // Regex for blockquote detection
937            static ref BLOCKQUOTE_REGEX_FULL: regex::Regex = regex::Regex::new(r"^(\s*)(>+)(\s*)(.*)$").unwrap();
938        }
939
940        let content_lines: Vec<&str> = content.lines().collect();
941        let mut lines = Vec::with_capacity(content_lines.len());
942
943        // Detect front matter boundaries FIRST, before any other parsing
944        let mut in_front_matter = false;
945        let mut front_matter_end = 0;
946        if content_lines.first().map(|l| l.trim()) == Some("---") {
947            in_front_matter = true;
948            for (idx, line) in content_lines.iter().enumerate().skip(1) {
949                if line.trim() == "---" {
950                    front_matter_end = idx;
951                    break;
952                }
953            }
954        }
955
956        for (i, line) in content_lines.iter().enumerate() {
957            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
958            let indent = line.len() - line.trim_start().len();
959            // For blank detection, consider blockquote context
960            let is_blank = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
961                // In blockquote context, check if content after prefix is blank
962                let after_prefix = caps.get(2).map_or("", |m| m.as_str());
963                after_prefix.trim().is_empty()
964            } else {
965                line.trim().is_empty()
966            };
967            // Check if this line is inside a code block (not inline code span)
968            // We only want to check for fenced/indented code blocks, not inline code
969            let in_code_block = code_blocks.iter().any(|&(start, end)| {
970                // Only consider ranges that span multiple lines (code blocks)
971                // Inline code spans are typically on a single line
972
973                // Ensure we're at valid UTF-8 boundaries
974                let safe_start = if start > 0 && !content.is_char_boundary(start) {
975                    // Find the nearest valid boundary before start
976                    let mut boundary = start;
977                    while boundary > 0 && !content.is_char_boundary(boundary) {
978                        boundary -= 1;
979                    }
980                    boundary
981                } else {
982                    start
983                };
984
985                let safe_end = if end < content.len() && !content.is_char_boundary(end) {
986                    // Find the nearest valid boundary after end
987                    let mut boundary = end;
988                    while boundary < content.len() && !content.is_char_boundary(boundary) {
989                        boundary += 1;
990                    }
991                    boundary
992                } else {
993                    end.min(content.len())
994                };
995
996                let block_content = &content[safe_start..safe_end];
997                let is_multiline = block_content.contains('\n');
998                let is_fenced = block_content.starts_with("```") || block_content.starts_with("~~~");
999                let is_indented = !is_fenced
1000                    && block_content
1001                        .lines()
1002                        .all(|l| l.starts_with("    ") || l.starts_with("\t") || l.trim().is_empty());
1003
1004                byte_offset >= start && byte_offset < end && (is_multiline || is_fenced || is_indented)
1005            });
1006
1007            // Detect list items (skip if in frontmatter)
1008            let list_item = if !(in_code_block || is_blank || in_front_matter && i <= front_matter_end) {
1009                // Strip blockquote prefix if present for list detection
1010                let (line_for_list_check, blockquote_prefix_len) = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1011                    let prefix = caps.get(1).unwrap().as_str();
1012                    let content = caps.get(2).unwrap().as_str();
1013                    (content, prefix.len())
1014                } else {
1015                    (&**line, 0)
1016                };
1017
1018                if let Some(caps) = UNORDERED_REGEX.captures(line_for_list_check) {
1019                    let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1020                    let marker = caps.get(2).map_or("", |m| m.as_str());
1021                    let spacing = caps.get(3).map_or("", |m| m.as_str());
1022                    let _content = caps.get(4).map_or("", |m| m.as_str());
1023                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1024                    let content_column = marker_column + marker.len() + spacing.len();
1025
1026                    // According to CommonMark spec, unordered list items MUST have at least one space
1027                    // after the marker (-, *, or +). Without a space, it's not a list item.
1028                    // This also naturally handles cases like:
1029                    // - *emphasis* (not a list)
1030                    // - **bold** (not a list)
1031                    // - --- (horizontal rule, not a list)
1032                    if spacing.is_empty() {
1033                        None
1034                    } else {
1035                        Some(ListItemInfo {
1036                            marker: marker.to_string(),
1037                            is_ordered: false,
1038                            number: None,
1039                            marker_column,
1040                            content_column,
1041                        })
1042                    }
1043                } else if let Some(caps) = ORDERED_REGEX.captures(line_for_list_check) {
1044                    let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1045                    let number_str = caps.get(2).map_or("", |m| m.as_str());
1046                    let delimiter = caps.get(3).map_or("", |m| m.as_str());
1047                    let spacing = caps.get(4).map_or("", |m| m.as_str());
1048                    let _content = caps.get(5).map_or("", |m| m.as_str());
1049                    let marker = format!("{number_str}{delimiter}");
1050                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1051                    let content_column = marker_column + marker.len() + spacing.len();
1052
1053                    // According to CommonMark spec, ordered list items MUST have at least one space
1054                    // after the marker (period or parenthesis). Without a space, it's not a list item.
1055                    if spacing.is_empty() {
1056                        None
1057                    } else {
1058                        Some(ListItemInfo {
1059                            marker,
1060                            is_ordered: true,
1061                            number: number_str.parse().ok(),
1062                            marker_column,
1063                            content_column,
1064                        })
1065                    }
1066                } else {
1067                    None
1068                }
1069            } else {
1070                None
1071            };
1072
1073            lines.push(LineInfo {
1074                content: line.to_string(),
1075                byte_offset,
1076                indent,
1077                is_blank,
1078                in_code_block,
1079                in_front_matter: in_front_matter && i <= front_matter_end,
1080                list_item,
1081                heading: None,    // Will be populated in second pass for Setext headings
1082                blockquote: None, // Will be populated after line creation
1083            });
1084        }
1085
1086        // Second pass: detect headings (including Setext which needs look-ahead) and blockquotes
1087        for i in 0..content_lines.len() {
1088            if lines[i].in_code_block {
1089                continue;
1090            }
1091
1092            // Skip lines in front matter
1093            if in_front_matter && i <= front_matter_end {
1094                continue;
1095            }
1096
1097            let line = content_lines[i];
1098
1099            // Check for blockquotes (even on blank lines within blockquotes)
1100            if let Some(caps) = BLOCKQUOTE_REGEX_FULL.captures(line) {
1101                let indent_str = caps.get(1).map_or("", |m| m.as_str());
1102                let markers = caps.get(2).map_or("", |m| m.as_str());
1103                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1104                let content = caps.get(4).map_or("", |m| m.as_str());
1105
1106                let nesting_level = markers.chars().filter(|&c| c == '>').count();
1107                let marker_column = indent_str.len();
1108
1109                // Build the prefix (indentation + markers + space)
1110                let prefix = format!("{indent_str}{markers}{spaces_after}");
1111
1112                // Check for various blockquote issues
1113                let has_no_space = spaces_after.is_empty() && !content.is_empty();
1114                // Consider tabs as multiple spaces, or actual multiple spaces
1115                let has_multiple_spaces = spaces_after.len() > 1 || spaces_after.contains('\t');
1116
1117                // Check if needs MD028 fix (empty blockquote line without proper spacing)
1118                // MD028 flags empty blockquote lines that don't have a single space after the marker
1119                // Lines like "> " or ">> " are already correct and don't need fixing
1120                let needs_md028_fix = content.is_empty() && spaces_after.is_empty();
1121
1122                lines[i].blockquote = Some(BlockquoteInfo {
1123                    nesting_level,
1124                    indent: indent_str.to_string(),
1125                    marker_column,
1126                    prefix,
1127                    content: content.to_string(),
1128                    has_no_space_after_marker: has_no_space,
1129                    has_multiple_spaces_after_marker: has_multiple_spaces,
1130                    needs_md028_fix,
1131                });
1132            }
1133
1134            // Skip heading detection for blank lines
1135            if lines[i].is_blank {
1136                continue;
1137            }
1138
1139            // Check for ATX headings (but skip MkDocs snippet lines)
1140            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
1141            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1142                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1143                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1144            } else {
1145                false
1146            };
1147
1148            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1149                // Skip headings inside HTML comments
1150                if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1151                    continue;
1152                }
1153                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1154                let hashes = caps.get(2).map_or("", |m| m.as_str());
1155                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1156                let rest = caps.get(4).map_or("", |m| m.as_str());
1157
1158                let level = hashes.len() as u8;
1159                let marker_column = leading_spaces.len();
1160
1161                // Check for closing sequence, but handle custom IDs that might come after
1162                let (text, has_closing, closing_seq) = {
1163                    // First check if there's a custom ID at the end
1164                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1165                        // Check if this looks like a valid custom ID (ends with })
1166                        if rest[id_start..].trim_end().ends_with('}') {
1167                            // Split off the custom ID
1168                            (&rest[..id_start], &rest[id_start..])
1169                        } else {
1170                            (rest, "")
1171                        }
1172                    } else {
1173                        (rest, "")
1174                    };
1175
1176                    // Now look for closing hashes in the part before the custom ID
1177                    let trimmed_rest = rest_without_id.trim_end();
1178                    if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1179                        // Look for the start of the hash sequence
1180                        let mut start_of_hashes = last_hash_pos;
1181                        while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1182                            start_of_hashes -= 1;
1183                        }
1184
1185                        // Check if there's at least one space before the closing hashes
1186                        let has_space_before = start_of_hashes == 0
1187                            || trimmed_rest
1188                                .chars()
1189                                .nth(start_of_hashes - 1)
1190                                .is_some_and(|c| c.is_whitespace());
1191
1192                        // Check if this is a valid closing sequence (all hashes to end of trimmed part)
1193                        let potential_closing = &trimmed_rest[start_of_hashes..];
1194                        let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1195
1196                        if is_all_hashes && has_space_before {
1197                            // This is a closing sequence
1198                            let closing_hashes = potential_closing.to_string();
1199                            // The text is everything before the closing hashes
1200                            // Don't include the custom ID here - it will be extracted later
1201                            let text_part = if !custom_id_part.is_empty() {
1202                                // If we have a custom ID, append it back to get the full rest
1203                                // This allows the extract_header_id function to handle it properly
1204                                format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1205                            } else {
1206                                rest_without_id[..start_of_hashes].trim_end().to_string()
1207                            };
1208                            (text_part, true, closing_hashes)
1209                        } else {
1210                            // Not a valid closing sequence, return the full content
1211                            (rest.to_string(), false, String::new())
1212                        }
1213                    } else {
1214                        // No hashes found, return the full content
1215                        (rest.to_string(), false, String::new())
1216                    }
1217                };
1218
1219                let content_column = marker_column + hashes.len() + spaces_after.len();
1220
1221                // Extract custom header ID if present
1222                let raw_text = text.trim().to_string();
1223                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1224
1225                // If no custom ID was found on the header line, check the next line for standalone attr-list
1226                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1227                    let next_line = content_lines[i + 1];
1228                    if !lines[i + 1].in_code_block
1229                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1230                        && let Some(next_line_id) =
1231                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1232                    {
1233                        custom_id = Some(next_line_id);
1234                    }
1235                }
1236
1237                lines[i].heading = Some(HeadingInfo {
1238                    level,
1239                    style: HeadingStyle::ATX,
1240                    marker: hashes.to_string(),
1241                    marker_column,
1242                    content_column,
1243                    text: clean_text,
1244                    custom_id,
1245                    raw_text,
1246                    has_closing_sequence: has_closing,
1247                    closing_sequence: closing_seq,
1248                });
1249            }
1250            // Check for Setext headings (need to look at next line)
1251            else if i + 1 < content_lines.len() {
1252                let next_line = content_lines[i + 1];
1253                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1254                    // Skip if next line is front matter delimiter
1255                    if in_front_matter && i < front_matter_end {
1256                        continue;
1257                    }
1258
1259                    // Skip Setext headings inside HTML comments
1260                    if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1261                        continue;
1262                    }
1263
1264                    let underline = next_line.trim();
1265
1266                    // Skip if the underline looks like YAML delimiter (exactly 3 or more dashes)
1267                    // YAML uses exactly `---` while Setext headings typically use longer underlines
1268                    if underline == "---" {
1269                        continue;
1270                    }
1271
1272                    // Skip if the current line looks like YAML key-value syntax
1273                    let current_line_trimmed = line.trim();
1274                    if current_line_trimmed.contains(':')
1275                        && !current_line_trimmed.starts_with('#')
1276                        && !current_line_trimmed.contains('[')
1277                        && !current_line_trimmed.contains("](")
1278                    {
1279                        // This looks like "key: value" which suggests YAML, not a heading
1280                        continue;
1281                    }
1282
1283                    let level = if underline.starts_with('=') { 1 } else { 2 };
1284                    let style = if level == 1 {
1285                        HeadingStyle::Setext1
1286                    } else {
1287                        HeadingStyle::Setext2
1288                    };
1289
1290                    // Extract custom header ID if present
1291                    let raw_text = line.trim().to_string();
1292                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1293
1294                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
1295                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1296                        let attr_line = content_lines[i + 2];
1297                        if !lines[i + 2].in_code_block
1298                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1299                            && let Some(attr_line_id) =
1300                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
1301                        {
1302                            custom_id = Some(attr_line_id);
1303                        }
1304                    }
1305
1306                    lines[i].heading = Some(HeadingInfo {
1307                        level,
1308                        style,
1309                        marker: underline.to_string(),
1310                        marker_column: next_line.len() - next_line.trim_start().len(),
1311                        content_column: lines[i].indent,
1312                        text: clean_text,
1313                        custom_id,
1314                        raw_text,
1315                        has_closing_sequence: false,
1316                        closing_sequence: String::new(),
1317                    });
1318                }
1319            }
1320        }
1321
1322        lines
1323    }
1324
1325    /// Parse all inline code spans in the content
1326    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
1327        // Pre-size based on content - code spans are fairly common
1328        let mut code_spans = Vec::with_capacity(content.matches('`').count() / 2);
1329
1330        // Quick check - if no backticks, no code spans
1331        if !content.contains('`') {
1332            return code_spans;
1333        }
1334
1335        let bytes = content.as_bytes();
1336
1337        // First pass: identify which backticks are escaped
1338        let mut escaped_positions = Vec::new();
1339        for i in 0..bytes.len() {
1340            if i > 0 && bytes[i - 1] == b'\\' && bytes[i] == b'`' {
1341                escaped_positions.push(i);
1342            }
1343        }
1344
1345        let mut pos = 0;
1346        while pos < bytes.len() {
1347            // Find the next backtick
1348            if let Some(backtick_start) = content[pos..].find('`') {
1349                let start_pos = pos + backtick_start;
1350
1351                // Skip if this backtick is escaped
1352                if escaped_positions.contains(&start_pos) {
1353                    pos = start_pos + 1;
1354                    continue;
1355                }
1356
1357                // Skip if this backtick is inside a code block
1358                let mut in_code_block = false;
1359                for (line_idx, line_info) in lines.iter().enumerate() {
1360                    if start_pos >= line_info.byte_offset
1361                        && (line_idx + 1 >= lines.len() || start_pos < lines[line_idx + 1].byte_offset)
1362                    {
1363                        in_code_block = line_info.in_code_block;
1364                        break;
1365                    }
1366                }
1367
1368                if in_code_block {
1369                    pos = start_pos + 1;
1370                    continue;
1371                }
1372
1373                // Count consecutive non-escaped backticks
1374                let mut backtick_count = 0;
1375                let mut i = start_pos;
1376                while i < bytes.len() && bytes[i] == b'`' && !escaped_positions.contains(&i) {
1377                    backtick_count += 1;
1378                    i += 1;
1379                }
1380
1381                // Look for matching closing backticks
1382                let search_start = start_pos + backtick_count;
1383                let mut found_closing = false;
1384                let mut closing_end = 0;
1385
1386                // Search for the exact number of unescaped backticks
1387                let mut search_pos = search_start;
1388                while search_pos < bytes.len() {
1389                    // Look for the first backtick
1390                    if let Some(rel_pos) = content[search_pos..].find('`') {
1391                        let backtick_pos = search_pos + rel_pos;
1392
1393                        // Skip if escaped
1394                        if escaped_positions.contains(&backtick_pos) {
1395                            search_pos = backtick_pos + 1;
1396                            continue;
1397                        }
1398
1399                        // Count consecutive non-escaped backticks at this position
1400                        let mut count = 0;
1401                        let mut j = backtick_pos;
1402                        while j < bytes.len() && bytes[j] == b'`' && !escaped_positions.contains(&j) {
1403                            count += 1;
1404                            j += 1;
1405                        }
1406
1407                        // Check if we found the right number of backticks
1408                        if count == backtick_count {
1409                            // Make sure it's not part of a longer sequence
1410                            let before_ok = backtick_pos == 0
1411                                || bytes[backtick_pos - 1] != b'`'
1412                                || escaped_positions.contains(&(backtick_pos - 1));
1413                            let after_ok = j >= bytes.len() || bytes[j] != b'`' || escaped_positions.contains(&j);
1414
1415                            if before_ok && after_ok {
1416                                found_closing = true;
1417                                closing_end = j;
1418                                break;
1419                            }
1420                        }
1421
1422                        search_pos = backtick_pos + 1;
1423                    } else {
1424                        break;
1425                    }
1426                }
1427
1428                if found_closing {
1429                    // We found a valid code span
1430                    let content_start = start_pos + backtick_count;
1431                    let content_end = closing_end - backtick_count;
1432                    let span_content = content[content_start..content_end].to_string();
1433
1434                    // Find which line this code span starts on
1435                    let mut line_num = 1;
1436                    let mut col_start = start_pos;
1437                    for (idx, line_info) in lines.iter().enumerate() {
1438                        if start_pos >= line_info.byte_offset {
1439                            line_num = idx + 1;
1440                            col_start = start_pos - line_info.byte_offset;
1441                        } else {
1442                            break;
1443                        }
1444                    }
1445
1446                    // Find end column
1447                    let mut col_end = closing_end;
1448                    for line_info in lines.iter() {
1449                        if closing_end > line_info.byte_offset {
1450                            col_end = closing_end - line_info.byte_offset;
1451                        } else {
1452                            break;
1453                        }
1454                    }
1455
1456                    code_spans.push(CodeSpan {
1457                        line: line_num,
1458                        start_col: col_start,
1459                        end_col: col_end,
1460                        byte_offset: start_pos,
1461                        byte_end: closing_end,
1462                        backtick_count,
1463                        content: span_content,
1464                    });
1465
1466                    // Move position after this code span
1467                    pos = closing_end;
1468                } else {
1469                    // No valid closing found, skip these opening backticks
1470                    pos = start_pos + backtick_count;
1471                }
1472            } else {
1473                // No more backticks found
1474                break;
1475            }
1476        }
1477
1478        code_spans
1479    }
1480
1481    /// Parse all list blocks in the content
1482    fn parse_list_blocks(lines: &[LineInfo]) -> Vec<ListBlock> {
1483        // Pre-size based on lines that could be list items
1484        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
1485        let mut current_block: Option<ListBlock> = None;
1486        let mut last_list_item_line = 0;
1487        let mut current_indent_level = 0;
1488        let mut last_marker_width = 0;
1489
1490        for (line_idx, line_info) in lines.iter().enumerate() {
1491            let line_num = line_idx + 1;
1492
1493            // Enhanced code block handling using Design #3's context analysis
1494            if line_info.in_code_block {
1495                if let Some(ref mut block) = current_block {
1496                    // Calculate minimum indentation for list continuation
1497                    let min_continuation_indent = CodeBlockUtils::calculate_min_continuation_indent(lines, line_idx);
1498
1499                    // Analyze code block context using the three-tier classification
1500                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
1501
1502                    match context {
1503                        CodeBlockContext::Indented => {
1504                            // Code block is properly indented - continues the list
1505                            block.end_line = line_num;
1506                            continue;
1507                        }
1508                        CodeBlockContext::Standalone => {
1509                            // Code block separates lists - end current block
1510                            let completed_block = current_block.take().unwrap();
1511                            list_blocks.push(completed_block);
1512                            continue;
1513                        }
1514                        CodeBlockContext::Adjacent => {
1515                            // Edge case - use conservative behavior (continue list)
1516                            block.end_line = line_num;
1517                            continue;
1518                        }
1519                    }
1520                } else {
1521                    // No current list block - skip code block lines
1522                    continue;
1523                }
1524            }
1525
1526            // Extract blockquote prefix if any
1527            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(&line_info.content) {
1528                caps.get(0).unwrap().as_str().to_string()
1529            } else {
1530                String::new()
1531            };
1532
1533            // Check if this line is a list item
1534            if let Some(list_item) = &line_info.list_item {
1535                // Calculate nesting level based on indentation
1536                let item_indent = list_item.marker_column;
1537                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
1538
1539                if let Some(ref mut block) = current_block {
1540                    // Check if this continues the current block
1541                    // For nested lists, we need to check if this is a nested item (higher nesting level)
1542                    // or a continuation at the same or lower level
1543                    let is_nested = nesting > block.nesting_level;
1544                    let same_type =
1545                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
1546                    let same_context = block.blockquote_prefix == blockquote_prefix;
1547                    let reasonable_distance = line_num <= last_list_item_line + 2; // Allow one blank line
1548
1549                    // For unordered lists, also check marker consistency
1550                    let marker_compatible =
1551                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
1552
1553                    // Check if there's non-list content between the last item and this one
1554                    let has_non_list_content = {
1555                        let mut found_non_list = false;
1556                        // Use the last item from the current block, not the global last_list_item_line
1557                        let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
1558
1559                        // Debug: Special check for problematic line
1560                        if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1561                            let last_line = &lines[block_last_item_line - 1];
1562                            if last_line.content.contains(r"`sqlalchemy`") && last_line.content.contains(r"\`") {
1563                                log::debug!(
1564                                    "After problematic line {}: checking lines {} to {} for non-list content",
1565                                    block_last_item_line,
1566                                    block_last_item_line + 1,
1567                                    line_num
1568                                );
1569                                // If they're consecutive list items, there's no content between
1570                                if line_num == block_last_item_line + 1 {
1571                                    log::debug!("Lines are consecutive, no content between");
1572                                }
1573                            }
1574                        }
1575
1576                        for check_line in (block_last_item_line + 1)..line_num {
1577                            let check_idx = check_line - 1;
1578                            if check_idx < lines.len() {
1579                                let check_info = &lines[check_idx];
1580                                // Check for content that breaks the list
1581                                let is_list_breaking_content = if check_info.in_code_block {
1582                                    // Use enhanced code block classification for list separation
1583                                    let last_item_marker_width =
1584                                        if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1585                                            lines[block_last_item_line - 1]
1586                                                .list_item
1587                                                .as_ref()
1588                                                .map(|li| {
1589                                                    if li.is_ordered {
1590                                                        li.marker.len() + 1 // Add 1 for the space after ordered list markers
1591                                                    } else {
1592                                                        li.marker.len()
1593                                                    }
1594                                                })
1595                                                .unwrap_or(3) // fallback to 3 if no list item found
1596                                        } else {
1597                                            3 // fallback
1598                                        };
1599
1600                                    let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
1601
1602                                    // Analyze code block context using our enhanced classification
1603                                    let context = CodeBlockUtils::analyze_code_block_context(
1604                                        lines,
1605                                        check_line - 1,
1606                                        min_continuation,
1607                                    );
1608
1609                                    // Standalone code blocks break lists, indented ones continue them
1610                                    matches!(context, CodeBlockContext::Standalone)
1611                                } else if !check_info.is_blank && check_info.list_item.is_none() {
1612                                    // Check for structural separators that should break lists (from issue #42)
1613                                    let line_content = check_info.content.trim();
1614
1615                                    // Any of these structural separators break lists
1616                                    if check_info.heading.is_some()
1617                                        || line_content.starts_with("---")
1618                                        || line_content.starts_with("***")
1619                                        || line_content.starts_with("___")
1620                                        || (line_content.contains('|')
1621                                            && !line_content.contains("](")
1622                                            && !line_content.contains("http")
1623                                            && (line_content.matches('|').count() > 1
1624                                                || line_content.starts_with('|')
1625                                                || line_content.ends_with('|')))
1626                                        || line_content.starts_with(">")
1627                                    {
1628                                        true
1629                                    }
1630                                    // Other non-list content - check if properly indented
1631                                    else {
1632                                        let last_item_marker_width =
1633                                            if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1634                                                lines[block_last_item_line - 1]
1635                                                    .list_item
1636                                                    .as_ref()
1637                                                    .map(|li| {
1638                                                        if li.is_ordered {
1639                                                            li.marker.len() + 1 // Add 1 for the space after ordered list markers
1640                                                        } else {
1641                                                            li.marker.len()
1642                                                        }
1643                                                    })
1644                                                    .unwrap_or(3) // fallback to 3 if no list item found
1645                                            } else {
1646                                                3 // fallback
1647                                            };
1648
1649                                        let min_continuation =
1650                                            if block.is_ordered { last_item_marker_width } else { 2 };
1651                                        check_info.indent < min_continuation
1652                                    }
1653                                } else {
1654                                    false
1655                                };
1656
1657                                if is_list_breaking_content {
1658                                    // Not indented enough, so it breaks the list
1659                                    found_non_list = true;
1660                                    break;
1661                                }
1662                            }
1663                        }
1664                        found_non_list
1665                    };
1666
1667                    // A list continues if:
1668                    // 1. It's a nested item (indented more than the parent), OR
1669                    // 2. It's the same type at the same level with reasonable distance
1670                    let mut continues_list = if is_nested {
1671                        // Nested items always continue the list if they're in the same context
1672                        same_context && reasonable_distance && !has_non_list_content
1673                    } else {
1674                        // Same-level items need to match type and markers
1675                        let result = same_type
1676                            && same_context
1677                            && reasonable_distance
1678                            && marker_compatible
1679                            && !has_non_list_content;
1680
1681                        // Debug logging for lines after problematic content
1682                        if block.item_lines.last().is_some_and(|&last_line| {
1683                            last_line > 0
1684                                && last_line <= lines.len()
1685                                && lines[last_line - 1].content.contains(r"`sqlalchemy`")
1686                                && lines[last_line - 1].content.contains(r"\`")
1687                        }) {
1688                            log::debug!(
1689                                "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
1690                            );
1691                            if line_num > 0 && line_num <= lines.len() {
1692                                log::debug!("Current line content: {:?}", lines[line_num - 1].content);
1693                            }
1694                        }
1695
1696                        result
1697                    };
1698
1699                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
1700                    // This handles edge cases where content patterns might otherwise split lists incorrectly
1701                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
1702                        // Check if the previous line was a list item
1703                        if block.item_lines.contains(&(line_num - 1)) {
1704                            // They're consecutive list items - force them to be in the same list
1705                            continues_list = true;
1706                        }
1707                    }
1708
1709                    if continues_list {
1710                        // Extend current block
1711                        block.end_line = line_num;
1712                        block.item_lines.push(line_num);
1713
1714                        // Update max marker width
1715                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
1716                            list_item.marker.len() + 1
1717                        } else {
1718                            list_item.marker.len()
1719                        });
1720
1721                        // Update marker consistency for unordered lists
1722                        if !block.is_ordered
1723                            && block.marker.is_some()
1724                            && block.marker.as_ref() != Some(&list_item.marker)
1725                        {
1726                            // Mixed markers, clear the marker field
1727                            block.marker = None;
1728                        }
1729                    } else {
1730                        // End current block and start a new one
1731
1732                        list_blocks.push(block.clone());
1733
1734                        *block = ListBlock {
1735                            start_line: line_num,
1736                            end_line: line_num,
1737                            is_ordered: list_item.is_ordered,
1738                            marker: if list_item.is_ordered {
1739                                None
1740                            } else {
1741                                Some(list_item.marker.clone())
1742                            },
1743                            blockquote_prefix: blockquote_prefix.clone(),
1744                            item_lines: vec![line_num],
1745                            nesting_level: nesting,
1746                            max_marker_width: if list_item.is_ordered {
1747                                list_item.marker.len() + 1
1748                            } else {
1749                                list_item.marker.len()
1750                            },
1751                        };
1752                    }
1753                } else {
1754                    // Start a new block
1755                    current_block = Some(ListBlock {
1756                        start_line: line_num,
1757                        end_line: line_num,
1758                        is_ordered: list_item.is_ordered,
1759                        marker: if list_item.is_ordered {
1760                            None
1761                        } else {
1762                            Some(list_item.marker.clone())
1763                        },
1764                        blockquote_prefix,
1765                        item_lines: vec![line_num],
1766                        nesting_level: nesting,
1767                        max_marker_width: list_item.marker.len(),
1768                    });
1769                }
1770
1771                last_list_item_line = line_num;
1772                current_indent_level = item_indent;
1773                last_marker_width = if list_item.is_ordered {
1774                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
1775                } else {
1776                    list_item.marker.len()
1777                };
1778            } else if let Some(ref mut block) = current_block {
1779                // Not a list item - check if it continues the current block
1780
1781                // For MD032 compatibility, we use a simple approach:
1782                // - Indented lines continue the list
1783                // - Blank lines followed by indented content continue the list
1784                // - Everything else ends the list
1785
1786                // Calculate minimum indentation for list continuation
1787                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
1788                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
1789                let min_continuation_indent = if block.is_ordered {
1790                    current_indent_level + last_marker_width
1791                } else {
1792                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
1793                };
1794
1795                if line_info.indent >= min_continuation_indent {
1796                    // Indented line continues the list
1797                    block.end_line = line_num;
1798                } else if line_info.is_blank {
1799                    // Blank line - check if it's internal to the list or ending it
1800                    // We only include blank lines that are followed by more list content
1801                    let mut check_idx = line_idx + 1;
1802                    let mut found_continuation = false;
1803
1804                    // Skip additional blank lines
1805                    while check_idx < lines.len() && lines[check_idx].is_blank {
1806                        check_idx += 1;
1807                    }
1808
1809                    if check_idx < lines.len() {
1810                        let next_line = &lines[check_idx];
1811                        // Check if followed by indented content (list continuation)
1812                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
1813                            found_continuation = true;
1814                        }
1815                        // Check if followed by another list item at the same level
1816                        else if !next_line.in_code_block
1817                            && next_line.list_item.is_some()
1818                            && let Some(item) = &next_line.list_item
1819                        {
1820                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
1821                                .find(&next_line.content)
1822                                .map_or(String::new(), |m| m.as_str().to_string());
1823                            if item.marker_column == current_indent_level
1824                                && item.is_ordered == block.is_ordered
1825                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
1826                            {
1827                                // Check if there was meaningful content between the list items (unused now)
1828                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
1829                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
1830                                    if let Some(between_line) = lines.get(idx) {
1831                                        let trimmed = between_line.content.trim();
1832                                        // Skip empty lines
1833                                        if trimmed.is_empty() {
1834                                            return false;
1835                                        }
1836                                        // Check for meaningful content
1837                                        let line_indent =
1838                                            between_line.content.len() - between_line.content.trim_start().len();
1839
1840                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
1841                                        if trimmed.starts_with("```")
1842                                            || trimmed.starts_with("~~~")
1843                                            || trimmed.starts_with("---")
1844                                            || trimmed.starts_with("***")
1845                                            || trimmed.starts_with("___")
1846                                            || trimmed.starts_with(">")
1847                                            || trimmed.contains('|') // Tables
1848                                            || between_line.heading.is_some()
1849                                        {
1850                                            return true; // These are structural separators - meaningful content that breaks lists
1851                                        }
1852
1853                                        // Only properly indented content continues the list
1854                                        line_indent >= min_continuation_indent
1855                                    } else {
1856                                        false
1857                                    }
1858                                });
1859
1860                                if block.is_ordered {
1861                                    // For ordered lists: don't continue if there are structural separators
1862                                    // Check if there are structural separators between the list items
1863                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1864                                        if let Some(between_line) = lines.get(idx) {
1865                                            let trimmed = between_line.content.trim();
1866                                            if trimmed.is_empty() {
1867                                                return false;
1868                                            }
1869                                            // Check for structural separators that break lists
1870                                            trimmed.starts_with("```")
1871                                                || trimmed.starts_with("~~~")
1872                                                || trimmed.starts_with("---")
1873                                                || trimmed.starts_with("***")
1874                                                || trimmed.starts_with("___")
1875                                                || trimmed.starts_with(">")
1876                                                || trimmed.contains('|') // Tables
1877                                                || between_line.heading.is_some()
1878                                        } else {
1879                                            false
1880                                        }
1881                                    });
1882                                    found_continuation = !has_structural_separators;
1883                                } else {
1884                                    // For unordered lists: also check for structural separators
1885                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1886                                        if let Some(between_line) = lines.get(idx) {
1887                                            let trimmed = between_line.content.trim();
1888                                            if trimmed.is_empty() {
1889                                                return false;
1890                                            }
1891                                            // Check for structural separators that break lists
1892                                            trimmed.starts_with("```")
1893                                                || trimmed.starts_with("~~~")
1894                                                || trimmed.starts_with("---")
1895                                                || trimmed.starts_with("***")
1896                                                || trimmed.starts_with("___")
1897                                                || trimmed.starts_with(">")
1898                                                || trimmed.contains('|') // Tables
1899                                                || between_line.heading.is_some()
1900                                        } else {
1901                                            false
1902                                        }
1903                                    });
1904                                    found_continuation = !has_structural_separators;
1905                                }
1906                            }
1907                        }
1908                    }
1909
1910                    if found_continuation {
1911                        // Include the blank line in the block
1912                        block.end_line = line_num;
1913                    } else {
1914                        // Blank line ends the list - don't include it
1915                        list_blocks.push(block.clone());
1916                        current_block = None;
1917                    }
1918                } else {
1919                    // Check for lazy continuation - non-indented line immediately after a list item
1920                    // But only if the line has sufficient indentation for the list type
1921                    let min_required_indent = if block.is_ordered {
1922                        current_indent_level + last_marker_width
1923                    } else {
1924                        current_indent_level + 2
1925                    };
1926
1927                    // For lazy continuation to apply, the line must either:
1928                    // 1. Have no indentation (true lazy continuation)
1929                    // 2. Have sufficient indentation for the list type
1930                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
1931                    let line_content = line_info.content.trim();
1932                    let is_structural_separator = line_info.heading.is_some()
1933                        || line_content.starts_with("```")
1934                        || line_content.starts_with("~~~")
1935                        || line_content.starts_with("---")
1936                        || line_content.starts_with("***")
1937                        || line_content.starts_with("___")
1938                        || line_content.starts_with(">")
1939                        || (line_content.contains('|')
1940                            && !line_content.contains("](")
1941                            && !line_content.contains("http")
1942                            && (line_content.matches('|').count() > 1
1943                                || line_content.starts_with('|')
1944                                || line_content.ends_with('|'))); // Tables
1945
1946                    // Allow lazy continuation if we're still within the same list block
1947                    // (not just immediately after a list item)
1948                    let is_lazy_continuation = !is_structural_separator
1949                        && !line_info.is_blank
1950                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
1951
1952                    if is_lazy_continuation {
1953                        // Additional check: if the line starts with uppercase and looks like a new sentence,
1954                        // it's probably not a continuation
1955                        let content_to_check = if !blockquote_prefix.is_empty() {
1956                            // Strip blockquote prefix to check the actual content
1957                            line_info
1958                                .content
1959                                .strip_prefix(&blockquote_prefix)
1960                                .unwrap_or(&line_info.content)
1961                                .trim()
1962                        } else {
1963                            line_info.content.trim()
1964                        };
1965
1966                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
1967
1968                        // If it starts with uppercase and the previous line ended with punctuation,
1969                        // it's likely a new paragraph, not a continuation
1970                        if starts_with_uppercase && last_list_item_line > 0 {
1971                            // This looks like a new paragraph
1972                            list_blocks.push(block.clone());
1973                            current_block = None;
1974                        } else {
1975                            // This is a lazy continuation line
1976                            block.end_line = line_num;
1977                        }
1978                    } else {
1979                        // Non-indented, non-blank line that's not a lazy continuation - end the block
1980                        list_blocks.push(block.clone());
1981                        current_block = None;
1982                    }
1983                }
1984            }
1985        }
1986
1987        // Don't forget the last block
1988        if let Some(block) = current_block {
1989            list_blocks.push(block);
1990        }
1991
1992        // Merge adjacent blocks that should be one
1993        merge_adjacent_list_blocks(&mut list_blocks, lines);
1994
1995        list_blocks
1996    }
1997
1998    /// Compute character frequency for fast content analysis
1999    fn compute_char_frequency(content: &str) -> CharFrequency {
2000        let mut frequency = CharFrequency::default();
2001
2002        for ch in content.chars() {
2003            match ch {
2004                '#' => frequency.hash_count += 1,
2005                '*' => frequency.asterisk_count += 1,
2006                '_' => frequency.underscore_count += 1,
2007                '-' => frequency.hyphen_count += 1,
2008                '+' => frequency.plus_count += 1,
2009                '>' => frequency.gt_count += 1,
2010                '|' => frequency.pipe_count += 1,
2011                '[' => frequency.bracket_count += 1,
2012                '`' => frequency.backtick_count += 1,
2013                '<' => frequency.lt_count += 1,
2014                '!' => frequency.exclamation_count += 1,
2015                '\n' => frequency.newline_count += 1,
2016                _ => {}
2017            }
2018        }
2019
2020        frequency
2021    }
2022
2023    /// Parse HTML tags in the content
2024    fn parse_html_tags(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<HtmlTag> {
2025        lazy_static! {
2026            static ref HTML_TAG_REGEX: regex::Regex =
2027                regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap();
2028        }
2029
2030        let mut html_tags = Vec::with_capacity(content.matches('<').count());
2031
2032        for cap in HTML_TAG_REGEX.captures_iter(content) {
2033            let full_match = cap.get(0).unwrap();
2034            let match_start = full_match.start();
2035            let match_end = full_match.end();
2036
2037            // Skip if in code block
2038            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2039                continue;
2040            }
2041
2042            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2043            let tag_name = cap.get(2).unwrap().as_str().to_lowercase();
2044            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2045
2046            // Find which line this tag is on
2047            let mut line_num = 1;
2048            let mut col_start = match_start;
2049            let mut col_end = match_end;
2050            for (idx, line_info) in lines.iter().enumerate() {
2051                if match_start >= line_info.byte_offset {
2052                    line_num = idx + 1;
2053                    col_start = match_start - line_info.byte_offset;
2054                    col_end = match_end - line_info.byte_offset;
2055                } else {
2056                    break;
2057                }
2058            }
2059
2060            html_tags.push(HtmlTag {
2061                line: line_num,
2062                start_col: col_start,
2063                end_col: col_end,
2064                byte_offset: match_start,
2065                byte_end: match_end,
2066                tag_name,
2067                is_closing,
2068                is_self_closing,
2069                raw_content: full_match.as_str().to_string(),
2070            });
2071        }
2072
2073        html_tags
2074    }
2075
2076    /// Parse emphasis spans in the content
2077    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2078        lazy_static! {
2079            static ref EMPHASIS_REGEX: regex::Regex =
2080                regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap();
2081        }
2082
2083        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2084
2085        for cap in EMPHASIS_REGEX.captures_iter(content) {
2086            let full_match = cap.get(0).unwrap();
2087            let match_start = full_match.start();
2088            let match_end = full_match.end();
2089
2090            // Skip if in code block
2091            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2092                continue;
2093            }
2094
2095            let opening_markers = cap.get(1).unwrap().as_str();
2096            let content_part = cap.get(2).unwrap().as_str();
2097            let closing_markers = cap.get(3).unwrap().as_str();
2098
2099            // Validate matching markers
2100            if opening_markers.chars().next() != closing_markers.chars().next()
2101                || opening_markers.len() != closing_markers.len()
2102            {
2103                continue;
2104            }
2105
2106            let marker = opening_markers.chars().next().unwrap();
2107            let marker_count = opening_markers.len();
2108
2109            // Find which line this emphasis is on
2110            let mut line_num = 1;
2111            let mut col_start = match_start;
2112            let mut col_end = match_end;
2113            for (idx, line_info) in lines.iter().enumerate() {
2114                if match_start >= line_info.byte_offset {
2115                    line_num = idx + 1;
2116                    col_start = match_start - line_info.byte_offset;
2117                    col_end = match_end - line_info.byte_offset;
2118                } else {
2119                    break;
2120                }
2121            }
2122
2123            emphasis_spans.push(EmphasisSpan {
2124                line: line_num,
2125                start_col: col_start,
2126                end_col: col_end,
2127                byte_offset: match_start,
2128                byte_end: match_end,
2129                marker,
2130                marker_count,
2131                content: content_part.to_string(),
2132            });
2133        }
2134
2135        emphasis_spans
2136    }
2137
2138    /// Parse table rows in the content
2139    fn parse_table_rows(lines: &[LineInfo]) -> Vec<TableRow> {
2140        let mut table_rows = Vec::with_capacity(lines.len() / 20);
2141
2142        for (line_idx, line_info) in lines.iter().enumerate() {
2143            // Skip lines in code blocks or blank lines
2144            if line_info.in_code_block || line_info.is_blank {
2145                continue;
2146            }
2147
2148            let line = &line_info.content;
2149            let line_num = line_idx + 1;
2150
2151            // Check if this line contains pipes (potential table row)
2152            if !line.contains('|') {
2153                continue;
2154            }
2155
2156            // Count columns by splitting on pipes
2157            let parts: Vec<&str> = line.split('|').collect();
2158            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2159
2160            // Check if this is a separator row
2161            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2162            let mut column_alignments = Vec::new();
2163
2164            if is_separator {
2165                for part in &parts[1..parts.len() - 1] {
2166                    // Skip first and last empty parts
2167                    let trimmed = part.trim();
2168                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2169                        "center".to_string()
2170                    } else if trimmed.ends_with(':') {
2171                        "right".to_string()
2172                    } else if trimmed.starts_with(':') {
2173                        "left".to_string()
2174                    } else {
2175                        "none".to_string()
2176                    };
2177                    column_alignments.push(alignment);
2178                }
2179            }
2180
2181            table_rows.push(TableRow {
2182                line: line_num,
2183                is_separator,
2184                column_count,
2185                column_alignments,
2186            });
2187        }
2188
2189        table_rows
2190    }
2191
2192    /// Parse bare URLs and emails in the content
2193    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2194        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2195
2196        // Check for bare URLs (not in angle brackets or markdown links)
2197        for cap in BARE_URL_PATTERN.captures_iter(content) {
2198            let full_match = cap.get(0).unwrap();
2199            let match_start = full_match.start();
2200            let match_end = full_match.end();
2201
2202            // Skip if in code block
2203            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2204                continue;
2205            }
2206
2207            // Skip if already in angle brackets or markdown links
2208            let preceding_char = if match_start > 0 {
2209                content.chars().nth(match_start - 1)
2210            } else {
2211                None
2212            };
2213            let following_char = content.chars().nth(match_end);
2214
2215            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2216                continue;
2217            }
2218            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2219                continue;
2220            }
2221
2222            let url = full_match.as_str();
2223            let url_type = if url.starts_with("https://") {
2224                "https"
2225            } else if url.starts_with("http://") {
2226                "http"
2227            } else if url.starts_with("ftp://") {
2228                "ftp"
2229            } else {
2230                "other"
2231            };
2232
2233            // Find which line this URL is on
2234            let mut line_num = 1;
2235            let mut col_start = match_start;
2236            let mut col_end = match_end;
2237            for (idx, line_info) in lines.iter().enumerate() {
2238                if match_start >= line_info.byte_offset {
2239                    line_num = idx + 1;
2240                    col_start = match_start - line_info.byte_offset;
2241                    col_end = match_end - line_info.byte_offset;
2242                } else {
2243                    break;
2244                }
2245            }
2246
2247            bare_urls.push(BareUrl {
2248                line: line_num,
2249                start_col: col_start,
2250                end_col: col_end,
2251                byte_offset: match_start,
2252                byte_end: match_end,
2253                url: url.to_string(),
2254                url_type: url_type.to_string(),
2255            });
2256        }
2257
2258        // Check for bare email addresses
2259        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
2260            let full_match = cap.get(0).unwrap();
2261            let match_start = full_match.start();
2262            let match_end = full_match.end();
2263
2264            // Skip if in code block
2265            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2266                continue;
2267            }
2268
2269            // Skip if already in angle brackets or markdown links
2270            let preceding_char = if match_start > 0 {
2271                content.chars().nth(match_start - 1)
2272            } else {
2273                None
2274            };
2275            let following_char = content.chars().nth(match_end);
2276
2277            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2278                continue;
2279            }
2280            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2281                continue;
2282            }
2283
2284            let email = full_match.as_str();
2285
2286            // Find which line this email is on
2287            let mut line_num = 1;
2288            let mut col_start = match_start;
2289            let mut col_end = match_end;
2290            for (idx, line_info) in lines.iter().enumerate() {
2291                if match_start >= line_info.byte_offset {
2292                    line_num = idx + 1;
2293                    col_start = match_start - line_info.byte_offset;
2294                    col_end = match_end - line_info.byte_offset;
2295                } else {
2296                    break;
2297                }
2298            }
2299
2300            bare_urls.push(BareUrl {
2301                line: line_num,
2302                start_col: col_start,
2303                end_col: col_end,
2304                byte_offset: match_start,
2305                byte_end: match_end,
2306                url: email.to_string(),
2307                url_type: "email".to_string(),
2308            });
2309        }
2310
2311        bare_urls
2312    }
2313}
2314
2315/// Merge adjacent list blocks that should be treated as one
2316fn merge_adjacent_list_blocks(list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
2317    if list_blocks.len() < 2 {
2318        return;
2319    }
2320
2321    let mut merger = ListBlockMerger::new(lines);
2322    *list_blocks = merger.merge(list_blocks);
2323}
2324
2325/// Helper struct to manage the complex logic of merging list blocks
2326struct ListBlockMerger<'a> {
2327    lines: &'a [LineInfo],
2328}
2329
2330impl<'a> ListBlockMerger<'a> {
2331    fn new(lines: &'a [LineInfo]) -> Self {
2332        Self { lines }
2333    }
2334
2335    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
2336        let mut merged = Vec::with_capacity(list_blocks.len());
2337        let mut current = list_blocks[0].clone();
2338
2339        for next in list_blocks.iter().skip(1) {
2340            if self.should_merge_blocks(&current, next) {
2341                current = self.merge_two_blocks(current, next);
2342            } else {
2343                merged.push(current);
2344                current = next.clone();
2345            }
2346        }
2347
2348        merged.push(current);
2349        merged
2350    }
2351
2352    /// Determine if two adjacent list blocks should be merged
2353    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
2354        // Basic compatibility checks
2355        if !self.blocks_are_compatible(current, next) {
2356            return false;
2357        }
2358
2359        // Check spacing and content between blocks
2360        let spacing = self.analyze_spacing_between(current, next);
2361        match spacing {
2362            BlockSpacing::Consecutive => true,
2363            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
2364            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
2365                self.can_merge_with_content_between(current, next)
2366            }
2367        }
2368    }
2369
2370    /// Check if blocks have compatible structure for merging
2371    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
2372        current.is_ordered == next.is_ordered
2373            && current.blockquote_prefix == next.blockquote_prefix
2374            && current.nesting_level == next.nesting_level
2375    }
2376
2377    /// Analyze the spacing between two list blocks
2378    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
2379        let gap = next.start_line - current.end_line;
2380
2381        match gap {
2382            1 => BlockSpacing::Consecutive,
2383            2 => BlockSpacing::SingleBlank,
2384            _ if gap > 2 => {
2385                if self.has_only_blank_lines_between(current, next) {
2386                    BlockSpacing::MultipleBlanks
2387                } else {
2388                    BlockSpacing::ContentBetween
2389                }
2390            }
2391            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
2392        }
2393    }
2394
2395    /// Check if unordered lists can be merged with a single blank line between
2396    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2397        // Check if there are structural separators between the blocks
2398        // If has_meaningful_content_between returns true, it means there are structural separators
2399        if has_meaningful_content_between(current, next, self.lines) {
2400            return false; // Structural separators prevent merging
2401        }
2402
2403        // Only merge unordered lists with same marker across single blank
2404        !current.is_ordered && current.marker == next.marker
2405    }
2406
2407    /// Check if ordered lists can be merged when there's content between them
2408    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2409        // Do not merge lists if there are structural separators between them
2410        if has_meaningful_content_between(current, next, self.lines) {
2411            return false; // Structural separators prevent merging
2412        }
2413
2414        // Only consider merging ordered lists if there's no structural content between
2415        current.is_ordered && next.is_ordered
2416    }
2417
2418    /// Check if there are only blank lines between blocks
2419    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2420        for line_num in (current.end_line + 1)..next.start_line {
2421            if let Some(line_info) = self.lines.get(line_num - 1)
2422                && !line_info.content.trim().is_empty()
2423            {
2424                return false;
2425            }
2426        }
2427        true
2428    }
2429
2430    /// Merge two compatible list blocks into one
2431    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
2432        current.end_line = next.end_line;
2433        current.item_lines.extend_from_slice(&next.item_lines);
2434
2435        // Update max marker width
2436        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
2437
2438        // Handle marker consistency for unordered lists
2439        if !current.is_ordered && self.markers_differ(&current, next) {
2440            current.marker = None; // Mixed markers
2441        }
2442
2443        current
2444    }
2445
2446    /// Check if two blocks have different markers
2447    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
2448        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
2449    }
2450}
2451
2452/// Types of spacing between list blocks
2453#[derive(Debug, PartialEq)]
2454enum BlockSpacing {
2455    Consecutive,    // No gap between blocks
2456    SingleBlank,    // One blank line between blocks
2457    MultipleBlanks, // Multiple blank lines but no content
2458    ContentBetween, // Content exists between blocks
2459}
2460
2461/// Check if there's meaningful content (not just blank lines) between two list blocks
2462fn has_meaningful_content_between(current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
2463    // Check lines between current.end_line and next.start_line
2464    for line_num in (current.end_line + 1)..next.start_line {
2465        if let Some(line_info) = lines.get(line_num - 1) {
2466            // Convert to 0-indexed
2467            let trimmed = line_info.content.trim();
2468
2469            // Skip empty lines
2470            if trimmed.is_empty() {
2471                continue;
2472            }
2473
2474            // Check for structural separators that should separate lists (CommonMark compliant)
2475
2476            // Headings separate lists
2477            if line_info.heading.is_some() {
2478                return true; // Has meaningful content - headings separate lists
2479            }
2480
2481            // Horizontal rules separate lists (---, ***, ___)
2482            if is_horizontal_rule(trimmed) {
2483                return true; // Has meaningful content - horizontal rules separate lists
2484            }
2485
2486            // Tables separate lists (lines containing | but not in URLs or code)
2487            // Simple heuristic: tables typically have | at start/end or multiple |
2488            if trimmed.contains('|') && trimmed.len() > 1 {
2489                // Don't treat URLs with | as tables
2490                if !trimmed.contains("](") && !trimmed.contains("http") {
2491                    // More robust check: tables usually have multiple | or | at edges
2492                    let pipe_count = trimmed.matches('|').count();
2493                    if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
2494                        return true; // Has meaningful content - tables separate lists
2495                    }
2496                }
2497            }
2498
2499            // Blockquotes separate lists
2500            if trimmed.starts_with('>') {
2501                return true; // Has meaningful content - blockquotes separate lists
2502            }
2503
2504            // Code block fences separate lists (unless properly indented as list content)
2505            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2506                let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2507
2508                // Check if this code block is properly indented as list continuation
2509                let min_continuation_indent = if current.is_ordered {
2510                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
2511                } else {
2512                    current.nesting_level + 2
2513                };
2514
2515                if line_indent < min_continuation_indent {
2516                    // This is a standalone code block that separates lists
2517                    return true; // Has meaningful content - standalone code blocks separate lists
2518                }
2519            }
2520
2521            // Check if this line has proper indentation for list continuation
2522            let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2523
2524            // Calculate minimum indentation needed to be list continuation
2525            let min_indent = if current.is_ordered {
2526                current.nesting_level + current.max_marker_width
2527            } else {
2528                current.nesting_level + 2
2529            };
2530
2531            // If the line is not indented enough to be list continuation, it's meaningful content
2532            if line_indent < min_indent {
2533                return true; // Has meaningful content - content not indented as list continuation
2534            }
2535
2536            // If we reach here, the line is properly indented as list continuation
2537            // Continue checking other lines
2538        }
2539    }
2540
2541    // Only blank lines or properly indented list continuation content between blocks
2542    false
2543}
2544
2545/// Check if a line is a horizontal rule (---, ***, ___)
2546fn is_horizontal_rule(trimmed: &str) -> bool {
2547    if trimmed.len() < 3 {
2548        return false;
2549    }
2550
2551    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
2552    let chars: Vec<char> = trimmed.chars().collect();
2553    if let Some(&first_char) = chars.first()
2554        && (first_char == '-' || first_char == '*' || first_char == '_')
2555    {
2556        let mut count = 0;
2557        for &ch in &chars {
2558            if ch == first_char {
2559                count += 1;
2560            } else if ch != ' ' && ch != '\t' {
2561                return false; // Non-matching, non-whitespace character
2562            }
2563        }
2564        return count >= 3;
2565    }
2566    false
2567}
2568
2569/// Check if content contains patterns that cause the markdown crate to panic
2570#[cfg(test)]
2571mod tests {
2572    use super::*;
2573
2574    #[test]
2575    fn test_empty_content() {
2576        let ctx = LintContext::new("", MarkdownFlavor::Standard);
2577        assert_eq!(ctx.content, "");
2578        assert_eq!(ctx.line_offsets, vec![0]);
2579        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2580        assert_eq!(ctx.lines.len(), 0);
2581    }
2582
2583    #[test]
2584    fn test_single_line() {
2585        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
2586        assert_eq!(ctx.content, "# Hello");
2587        assert_eq!(ctx.line_offsets, vec![0]);
2588        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2589        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
2590    }
2591
2592    #[test]
2593    fn test_multi_line() {
2594        let content = "# Title\n\nSecond line\nThird line";
2595        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2596        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
2597        // Test offset to line/col
2598        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
2599        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
2600        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
2601        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
2602        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
2603    }
2604
2605    #[test]
2606    fn test_line_info() {
2607        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
2608        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2609
2610        // Test line info
2611        assert_eq!(ctx.lines.len(), 7);
2612
2613        // Line 1: "# Title"
2614        let line1 = &ctx.lines[0];
2615        assert_eq!(line1.content, "# Title");
2616        assert_eq!(line1.byte_offset, 0);
2617        assert_eq!(line1.indent, 0);
2618        assert!(!line1.is_blank);
2619        assert!(!line1.in_code_block);
2620        assert!(line1.list_item.is_none());
2621
2622        // Line 2: "    indented"
2623        let line2 = &ctx.lines[1];
2624        assert_eq!(line2.content, "    indented");
2625        assert_eq!(line2.byte_offset, 8);
2626        assert_eq!(line2.indent, 4);
2627        assert!(!line2.is_blank);
2628
2629        // Line 3: "" (blank)
2630        let line3 = &ctx.lines[2];
2631        assert_eq!(line3.content, "");
2632        assert!(line3.is_blank);
2633
2634        // Test helper methods
2635        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
2636        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
2637        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
2638        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
2639    }
2640
2641    #[test]
2642    fn test_list_item_detection() {
2643        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
2644        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2645
2646        // Line 1: "- Unordered item"
2647        let line1 = &ctx.lines[0];
2648        assert!(line1.list_item.is_some());
2649        let list1 = line1.list_item.as_ref().unwrap();
2650        assert_eq!(list1.marker, "-");
2651        assert!(!list1.is_ordered);
2652        assert_eq!(list1.marker_column, 0);
2653        assert_eq!(list1.content_column, 2);
2654
2655        // Line 2: "  * Nested item"
2656        let line2 = &ctx.lines[1];
2657        assert!(line2.list_item.is_some());
2658        let list2 = line2.list_item.as_ref().unwrap();
2659        assert_eq!(list2.marker, "*");
2660        assert_eq!(list2.marker_column, 2);
2661
2662        // Line 3: "1. Ordered item"
2663        let line3 = &ctx.lines[2];
2664        assert!(line3.list_item.is_some());
2665        let list3 = line3.list_item.as_ref().unwrap();
2666        assert_eq!(list3.marker, "1.");
2667        assert!(list3.is_ordered);
2668        assert_eq!(list3.number, Some(1));
2669
2670        // Line 6: "Not a list"
2671        let line6 = &ctx.lines[5];
2672        assert!(line6.list_item.is_none());
2673    }
2674
2675    #[test]
2676    fn test_offset_to_line_col_edge_cases() {
2677        let content = "a\nb\nc";
2678        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2679        // line_offsets: [0, 2, 4]
2680        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
2681        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
2682        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
2683        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
2684        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
2685        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
2686    }
2687}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs