rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::utils::ast_utils::get_cached_ast;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use lazy_static::lazy_static;
5use markdown::mdast::Node;
6use regex::Regex;
7
8lazy_static! {
9    // Comprehensive link pattern that captures both inline and reference links
10    // Use (?s) flag to make . match newlines
11    static ref LINK_PATTERN: Regex = Regex::new(
12        r"(?sx)
13        \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]          # Link text in group 1 (handles nested brackets)
14        (?:
15            \(([^)]*)\)       # Inline URL in group 2 (can be empty)
16            |
17            \[([^\]]*)\]      # Reference ID in group 3
18        )"
19    ).unwrap();
20
21    // Image pattern (similar to links but with ! prefix)
22    // Use (?s) flag to make . match newlines
23    static ref IMAGE_PATTERN: Regex = Regex::new(
24        r"(?sx)
25        !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]         # Alt text in group 1 (handles nested brackets)
26        (?:
27            \(([^)]*)\)       # Inline URL in group 2 (can be empty)
28            |
29            \[([^\]]*)\]      # Reference ID in group 3
30        )"
31    ).unwrap();
32
33    // Reference definition pattern
34    static ref REF_DEF_PATTERN: Regex = Regex::new(
35        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
36    ).unwrap();
37
38    // Code span pattern - matches backticks and captures content
39    // This handles multi-backtick code spans correctly
40    static ref CODE_SPAN_PATTERN: Regex = Regex::new(
41        r"`+"
42    ).unwrap();
43
44    // Pattern for bare URLs
45    static ref BARE_URL_PATTERN: Regex = Regex::new(
46        r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
47    ).unwrap();
48
49    // Pattern for email addresses
50    static ref BARE_EMAIL_PATTERN: Regex = Regex::new(
51        r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
52    ).unwrap();
53
54    // Pattern for angle bracket links (to exclude from bare URL detection)
55    static ref ANGLE_BRACKET_PATTERN: Regex = Regex::new(
56        r"<((?:https?|ftp)://[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"
57    ).unwrap();
58
59    // Pattern for blockquote prefix in parse_list_blocks
60    static ref BLOCKQUOTE_PREFIX_REGEX: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
61}
62
63/// Pre-computed information about a line
64#[derive(Debug, Clone)]
65pub struct LineInfo {
66    /// The actual line content (without newline)
67    pub content: String,
68    /// Byte offset where this line starts in the document
69    pub byte_offset: usize,
70    /// Number of leading spaces/tabs
71    pub indent: usize,
72    /// Whether the line is blank (empty or only whitespace)
73    pub is_blank: bool,
74    /// Whether this line is inside a code block
75    pub in_code_block: bool,
76    /// Whether this line is inside front matter
77    pub in_front_matter: bool,
78    /// List item information if this line starts a list item
79    pub list_item: Option<ListItemInfo>,
80    /// Heading information if this line is a heading
81    pub heading: Option<HeadingInfo>,
82    /// Blockquote information if this line is a blockquote
83    pub blockquote: Option<BlockquoteInfo>,
84}
85
86/// Information about a list item
87#[derive(Debug, Clone)]
88pub struct ListItemInfo {
89    /// The marker used (*, -, +, or number with . or ))
90    pub marker: String,
91    /// Whether it's ordered (true) or unordered (false)
92    pub is_ordered: bool,
93    /// The number for ordered lists
94    pub number: Option<usize>,
95    /// Column where the marker starts (0-based)
96    pub marker_column: usize,
97    /// Column where content after marker starts
98    pub content_column: usize,
99}
100
101/// Heading style type
102#[derive(Debug, Clone, PartialEq)]
103pub enum HeadingStyle {
104    /// ATX style heading (# Heading)
105    ATX,
106    /// Setext style heading with = underline
107    Setext1,
108    /// Setext style heading with - underline
109    Setext2,
110}
111
112/// Parsed link information
113#[derive(Debug, Clone)]
114pub struct ParsedLink {
115    /// Line number (1-indexed)
116    pub line: usize,
117    /// Start column (0-indexed) in the line
118    pub start_col: usize,
119    /// End column (0-indexed) in the line
120    pub end_col: usize,
121    /// Byte offset in document
122    pub byte_offset: usize,
123    /// End byte offset in document
124    pub byte_end: usize,
125    /// Link text
126    pub text: String,
127    /// Link URL or reference
128    pub url: String,
129    /// Whether this is a reference link [text][ref] vs inline [text](url)
130    pub is_reference: bool,
131    /// Reference ID for reference links
132    pub reference_id: Option<String>,
133}
134
135/// Parsed image information
136#[derive(Debug, Clone)]
137pub struct ParsedImage {
138    /// Line number (1-indexed)
139    pub line: usize,
140    /// Start column (0-indexed) in the line
141    pub start_col: usize,
142    /// End column (0-indexed) in the line
143    pub end_col: usize,
144    /// Byte offset in document
145    pub byte_offset: usize,
146    /// End byte offset in document
147    pub byte_end: usize,
148    /// Alt text
149    pub alt_text: String,
150    /// Image URL or reference
151    pub url: String,
152    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
153    pub is_reference: bool,
154    /// Reference ID for reference images
155    pub reference_id: Option<String>,
156}
157
158/// Reference definition [ref]: url "title"
159#[derive(Debug, Clone)]
160pub struct ReferenceDef {
161    /// Line number (1-indexed)
162    pub line: usize,
163    /// Reference ID (normalized to lowercase)
164    pub id: String,
165    /// URL
166    pub url: String,
167    /// Optional title
168    pub title: Option<String>,
169}
170
171/// Parsed code span information
172#[derive(Debug, Clone)]
173pub struct CodeSpan {
174    /// Line number (1-indexed)
175    pub line: usize,
176    /// Start column (0-indexed) in the line
177    pub start_col: usize,
178    /// End column (0-indexed) in the line
179    pub end_col: usize,
180    /// Byte offset in document
181    pub byte_offset: usize,
182    /// End byte offset in document
183    pub byte_end: usize,
184    /// Number of backticks used (1, 2, 3, etc.)
185    pub backtick_count: usize,
186    /// Content inside the code span (without backticks)
187    pub content: String,
188}
189
190/// Information about a heading
191#[derive(Debug, Clone)]
192pub struct HeadingInfo {
193    /// Heading level (1-6 for ATX, 1-2 for Setext)
194    pub level: u8,
195    /// Style of heading
196    pub style: HeadingStyle,
197    /// The heading marker (# characters or underline)
198    pub marker: String,
199    /// Column where the marker starts (0-based)
200    pub marker_column: usize,
201    /// Column where heading text starts
202    pub content_column: usize,
203    /// The heading text (without markers and without custom ID syntax)
204    pub text: String,
205    /// Custom header ID if present (e.g., from {#custom-id} syntax)
206    pub custom_id: Option<String>,
207    /// Original heading text including custom ID syntax
208    pub raw_text: String,
209    /// Whether it has a closing sequence (for ATX)
210    pub has_closing_sequence: bool,
211    /// The closing sequence if present
212    pub closing_sequence: String,
213}
214
215/// Information about a blockquote line
216#[derive(Debug, Clone)]
217pub struct BlockquoteInfo {
218    /// Nesting level (1 for >, 2 for >>, etc.)
219    pub nesting_level: usize,
220    /// The indentation before the blockquote marker
221    pub indent: String,
222    /// Column where the first > starts (0-based)
223    pub marker_column: usize,
224    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
225    pub prefix: String,
226    /// Content after the blockquote marker(s)
227    pub content: String,
228    /// Whether the line has no space after the marker
229    pub has_no_space_after_marker: bool,
230    /// Whether the line has multiple spaces after the marker
231    pub has_multiple_spaces_after_marker: bool,
232    /// Whether this is an empty blockquote line needing MD028 fix
233    pub needs_md028_fix: bool,
234}
235
236/// Information about a list block
237#[derive(Debug, Clone)]
238pub struct ListBlock {
239    /// Line number where the list starts (1-indexed)
240    pub start_line: usize,
241    /// Line number where the list ends (1-indexed)
242    pub end_line: usize,
243    /// Whether it's ordered or unordered
244    pub is_ordered: bool,
245    /// The consistent marker for unordered lists (if any)
246    pub marker: Option<String>,
247    /// Blockquote prefix for this list (empty if not in blockquote)
248    pub blockquote_prefix: String,
249    /// Lines that are list items within this block
250    pub item_lines: Vec<usize>,
251    /// Nesting level (0 for top-level lists)
252    pub nesting_level: usize,
253    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
254    pub max_marker_width: usize,
255}
256
257use std::sync::{Arc, Mutex};
258
259/// Character frequency data for fast content analysis
260#[derive(Debug, Clone, Default)]
261pub struct CharFrequency {
262    /// Count of # characters (headings)
263    pub hash_count: usize,
264    /// Count of * characters (emphasis, lists, horizontal rules)
265    pub asterisk_count: usize,
266    /// Count of _ characters (emphasis, horizontal rules)
267    pub underscore_count: usize,
268    /// Count of - characters (lists, horizontal rules, setext headings)
269    pub hyphen_count: usize,
270    /// Count of + characters (lists)
271    pub plus_count: usize,
272    /// Count of > characters (blockquotes)
273    pub gt_count: usize,
274    /// Count of | characters (tables)
275    pub pipe_count: usize,
276    /// Count of [ characters (links, images)
277    pub bracket_count: usize,
278    /// Count of ` characters (code spans, code blocks)
279    pub backtick_count: usize,
280    /// Count of < characters (HTML tags, autolinks)
281    pub lt_count: usize,
282    /// Count of ! characters (images)
283    pub exclamation_count: usize,
284    /// Count of newline characters
285    pub newline_count: usize,
286}
287
288/// Pre-parsed HTML tag information
289#[derive(Debug, Clone)]
290pub struct HtmlTag {
291    /// Line number (1-indexed)
292    pub line: usize,
293    /// Start column (0-indexed) in the line
294    pub start_col: usize,
295    /// End column (0-indexed) in the line
296    pub end_col: usize,
297    /// Byte offset in document
298    pub byte_offset: usize,
299    /// End byte offset in document
300    pub byte_end: usize,
301    /// Tag name (e.g., "div", "img", "br")
302    pub tag_name: String,
303    /// Whether it's a closing tag (</tag>)
304    pub is_closing: bool,
305    /// Whether it's self-closing (<tag />)
306    pub is_self_closing: bool,
307    /// Raw tag content
308    pub raw_content: String,
309}
310
311/// Pre-parsed emphasis span information
312#[derive(Debug, Clone)]
313pub struct EmphasisSpan {
314    /// Line number (1-indexed)
315    pub line: usize,
316    /// Start column (0-indexed) in the line
317    pub start_col: usize,
318    /// End column (0-indexed) in the line
319    pub end_col: usize,
320    /// Byte offset in document
321    pub byte_offset: usize,
322    /// End byte offset in document
323    pub byte_end: usize,
324    /// Type of emphasis ('*' or '_')
325    pub marker: char,
326    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
327    pub marker_count: usize,
328    /// Content inside the emphasis
329    pub content: String,
330}
331
332/// Pre-parsed table row information
333#[derive(Debug, Clone)]
334pub struct TableRow {
335    /// Line number (1-indexed)
336    pub line: usize,
337    /// Whether this is a separator row (contains only |, -, :, and spaces)
338    pub is_separator: bool,
339    /// Number of columns (pipe-separated cells)
340    pub column_count: usize,
341    /// Alignment info from separator row
342    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
343}
344
345/// Pre-parsed bare URL information (not in links)
346#[derive(Debug, Clone)]
347pub struct BareUrl {
348    /// Line number (1-indexed)
349    pub line: usize,
350    /// Start column (0-indexed) in the line
351    pub start_col: usize,
352    /// End column (0-indexed) in the line
353    pub end_col: usize,
354    /// Byte offset in document
355    pub byte_offset: usize,
356    /// End byte offset in document
357    pub byte_end: usize,
358    /// The URL string
359    pub url: String,
360    /// Type of URL ("http", "https", "ftp", "email")
361    pub url_type: String,
362}
363
364pub struct LintContext<'a> {
365    pub content: &'a str,
366    pub line_offsets: Vec<usize>,
367    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
368    pub lines: Vec<LineInfo>,             // Pre-computed line information
369    pub links: Vec<ParsedLink>,           // Pre-parsed links
370    pub images: Vec<ParsedImage>,         // Pre-parsed images
371    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
372    code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, // Lazy-loaded inline code spans
373    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
374    pub char_frequency: CharFrequency,    // Character frequency analysis
375    html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, // Lazy-loaded HTML tags
376    emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, // Lazy-loaded emphasis spans
377    table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, // Lazy-loaded table rows
378    bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, // Lazy-loaded bare URLs
379    ast_cache: Mutex<Option<Arc<Node>>>,  // Lazy-loaded AST
380    pub flavor: MarkdownFlavor,           // Markdown flavor being used
381}
382
383impl<'a> LintContext<'a> {
384    pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
385        let mut line_offsets = vec![0];
386        for (i, c) in content.char_indices() {
387            if c == '\n' {
388                line_offsets.push(i + 1);
389            }
390        }
391
392        // Detect code blocks once and cache them
393        let code_blocks = CodeBlockUtils::detect_code_blocks(content);
394
395        // Pre-compute line information
396        let lines = Self::compute_line_info(content, &line_offsets, &code_blocks, flavor);
397
398        // Parse code spans early so we can exclude them from link/image parsing
399        let ast = get_cached_ast(content);
400        let code_spans = Self::parse_code_spans(content, &lines, &ast);
401
402        // Parse links, images, references, and list blocks
403        let links = Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor);
404        let images = Self::parse_images(content, &lines, &code_blocks, &code_spans);
405        let reference_defs = Self::parse_reference_defs(content, &lines);
406        let list_blocks = Self::parse_list_blocks(&lines);
407
408        // Compute character frequency for fast content analysis
409        let char_frequency = Self::compute_char_frequency(content);
410
411        Self {
412            content,
413            line_offsets,
414            code_blocks,
415            lines,
416            links,
417            images,
418            reference_defs,
419            code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
420            list_blocks,
421            char_frequency,
422            html_tags_cache: Mutex::new(None),
423            emphasis_spans_cache: Mutex::new(None),
424            table_rows_cache: Mutex::new(None),
425            bare_urls_cache: Mutex::new(None),
426            ast_cache: Mutex::new(None),
427            flavor,
428        }
429    }
430
431    /// Get AST - uses global cache for deduplication
432    pub fn get_ast(&self) -> Arc<Node> {
433        let mut cache = self.ast_cache.lock().unwrap();
434
435        if cache.is_none() {
436            // Use global AST cache to avoid duplicate parsing
437            // MarkdownAst is just a type alias for Node, so no conversion needed
438            *cache = Some(get_cached_ast(self.content));
439        }
440
441        cache.as_ref().unwrap().clone()
442    }
443
444    /// Get code spans - computed lazily on first access
445    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
446        let mut cache = self.code_spans_cache.lock().unwrap();
447
448        // Check if we need to compute code spans
449        if cache.is_none() {
450            let ast = self.get_ast();
451            let code_spans = Self::parse_code_spans(self.content, &self.lines, &ast);
452            *cache = Some(Arc::new(code_spans));
453        }
454
455        // Return a reference to the cached code spans
456        cache.as_ref().unwrap().clone()
457    }
458
459    /// Get HTML tags - computed lazily on first access
460    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
461        let mut cache = self.html_tags_cache.lock().unwrap();
462
463        if cache.is_none() {
464            let html_tags = Self::parse_html_tags(self.content, &self.lines, &self.code_blocks);
465            *cache = Some(Arc::new(html_tags));
466        }
467
468        cache.as_ref().unwrap().clone()
469    }
470
471    /// Get emphasis spans - computed lazily on first access
472    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
473        let mut cache = self.emphasis_spans_cache.lock().unwrap();
474
475        if cache.is_none() {
476            let emphasis_spans = Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks);
477            *cache = Some(Arc::new(emphasis_spans));
478        }
479
480        cache.as_ref().unwrap().clone()
481    }
482
483    /// Get table rows - computed lazily on first access
484    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
485        let mut cache = self.table_rows_cache.lock().unwrap();
486
487        if cache.is_none() {
488            let table_rows = Self::parse_table_rows(&self.lines);
489            *cache = Some(Arc::new(table_rows));
490        }
491
492        cache.as_ref().unwrap().clone()
493    }
494
495    /// Get bare URLs - computed lazily on first access
496    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
497        let mut cache = self.bare_urls_cache.lock().unwrap();
498
499        if cache.is_none() {
500            let bare_urls = Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks);
501            *cache = Some(Arc::new(bare_urls));
502        }
503
504        cache.as_ref().unwrap().clone()
505    }
506
507    /// Map a byte offset to (line, column)
508    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
509        match self.line_offsets.binary_search(&offset) {
510            Ok(line) => (line + 1, 1),
511            Err(line) => {
512                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
513                (line, offset - line_start + 1)
514            }
515        }
516    }
517
518    /// Check if a position is within a code block or code span
519    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
520        // Check code blocks first
521        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
522            return true;
523        }
524
525        // Check inline code spans (lazy load if needed)
526        self.code_spans()
527            .iter()
528            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
529    }
530
531    /// Get line information by line number (1-indexed)
532    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
533        if line_num > 0 {
534            self.lines.get(line_num - 1)
535        } else {
536            None
537        }
538    }
539
540    /// Get byte offset for a line number (1-indexed)
541    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
542        self.line_info(line_num).map(|info| info.byte_offset)
543    }
544
545    /// Get URL for a reference link/image by its ID
546    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
547        let normalized_id = ref_id.to_lowercase();
548        self.reference_defs
549            .iter()
550            .find(|def| def.id == normalized_id)
551            .map(|def| def.url.as_str())
552    }
553
554    /// Get links on a specific line
555    pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink> {
556        self.links.iter().filter(|link| link.line == line_num).collect()
557    }
558
559    /// Get images on a specific line
560    pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage> {
561        self.images.iter().filter(|img| img.line == line_num).collect()
562    }
563
564    /// Check if a line is part of a list block
565    pub fn is_in_list_block(&self, line_num: usize) -> bool {
566        self.list_blocks
567            .iter()
568            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
569    }
570
571    /// Get the list block containing a specific line
572    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
573        self.list_blocks
574            .iter()
575            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
576    }
577
578    /// Check if content has any instances of a specific character (fast)
579    pub fn has_char(&self, ch: char) -> bool {
580        match ch {
581            '#' => self.char_frequency.hash_count > 0,
582            '*' => self.char_frequency.asterisk_count > 0,
583            '_' => self.char_frequency.underscore_count > 0,
584            '-' => self.char_frequency.hyphen_count > 0,
585            '+' => self.char_frequency.plus_count > 0,
586            '>' => self.char_frequency.gt_count > 0,
587            '|' => self.char_frequency.pipe_count > 0,
588            '[' => self.char_frequency.bracket_count > 0,
589            '`' => self.char_frequency.backtick_count > 0,
590            '<' => self.char_frequency.lt_count > 0,
591            '!' => self.char_frequency.exclamation_count > 0,
592            '\n' => self.char_frequency.newline_count > 0,
593            _ => self.content.contains(ch), // Fallback for other characters
594        }
595    }
596
597    /// Get count of a specific character (fast)
598    pub fn char_count(&self, ch: char) -> usize {
599        match ch {
600            '#' => self.char_frequency.hash_count,
601            '*' => self.char_frequency.asterisk_count,
602            '_' => self.char_frequency.underscore_count,
603            '-' => self.char_frequency.hyphen_count,
604            '+' => self.char_frequency.plus_count,
605            '>' => self.char_frequency.gt_count,
606            '|' => self.char_frequency.pipe_count,
607            '[' => self.char_frequency.bracket_count,
608            '`' => self.char_frequency.backtick_count,
609            '<' => self.char_frequency.lt_count,
610            '!' => self.char_frequency.exclamation_count,
611            '\n' => self.char_frequency.newline_count,
612            _ => self.content.matches(ch).count(), // Fallback for other characters
613        }
614    }
615
616    /// Check if content likely contains headings (fast)
617    pub fn likely_has_headings(&self) -> bool {
618        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
619    }
620
621    /// Check if content likely contains lists (fast)
622    pub fn likely_has_lists(&self) -> bool {
623        self.char_frequency.asterisk_count > 0
624            || self.char_frequency.hyphen_count > 0
625            || self.char_frequency.plus_count > 0
626    }
627
628    /// Check if content likely contains emphasis (fast)
629    pub fn likely_has_emphasis(&self) -> bool {
630        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
631    }
632
633    /// Check if content likely contains tables (fast)
634    pub fn likely_has_tables(&self) -> bool {
635        self.char_frequency.pipe_count > 2
636    }
637
638    /// Check if content likely contains blockquotes (fast)
639    pub fn likely_has_blockquotes(&self) -> bool {
640        self.char_frequency.gt_count > 0
641    }
642
643    /// Check if content likely contains code (fast)
644    pub fn likely_has_code(&self) -> bool {
645        self.char_frequency.backtick_count > 0
646    }
647
648    /// Check if content likely contains links or images (fast)
649    pub fn likely_has_links_or_images(&self) -> bool {
650        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
651    }
652
653    /// Check if content likely contains HTML (fast)
654    pub fn likely_has_html(&self) -> bool {
655        self.char_frequency.lt_count > 0
656    }
657
658    /// Get HTML tags on a specific line
659    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
660        self.html_tags()
661            .iter()
662            .filter(|tag| tag.line == line_num)
663            .cloned()
664            .collect()
665    }
666
667    /// Get emphasis spans on a specific line
668    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
669        self.emphasis_spans()
670            .iter()
671            .filter(|span| span.line == line_num)
672            .cloned()
673            .collect()
674    }
675
676    /// Get table rows on a specific line
677    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
678        self.table_rows()
679            .iter()
680            .filter(|row| row.line == line_num)
681            .cloned()
682            .collect()
683    }
684
685    /// Get bare URLs on a specific line
686    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
687        self.bare_urls()
688            .iter()
689            .filter(|url| url.line == line_num)
690            .cloned()
691            .collect()
692    }
693
694    /// Parse all links in the content
695    fn parse_links(
696        content: &str,
697        lines: &[LineInfo],
698        code_blocks: &[(usize, usize)],
699        code_spans: &[CodeSpan],
700        flavor: MarkdownFlavor,
701    ) -> Vec<ParsedLink> {
702        use crate::utils::skip_context::is_mkdocs_snippet_line;
703
704        // Pre-size based on a heuristic: most markdown files have relatively few links
705        let mut links = Vec::with_capacity(content.len() / 500); // ~1 link per 500 chars
706
707        // Parse links across the entire content, not line by line
708        for cap in LINK_PATTERN.captures_iter(content) {
709            let full_match = cap.get(0).unwrap();
710            let match_start = full_match.start();
711            let match_end = full_match.end();
712
713            // Skip if the opening bracket is escaped (preceded by \)
714            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
715                continue;
716            }
717
718            // Skip if this is actually an image (preceded by !)
719            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
720                continue;
721            }
722
723            // Skip if in code block
724            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
725                continue;
726            }
727
728            // Skip if in code span
729            if code_spans
730                .iter()
731                .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
732            {
733                continue;
734            }
735
736            // Skip if this link is on a MkDocs snippet line
737            // Find which line this link is on
738            let line_idx = lines
739                .iter()
740                .position(|line| {
741                    match_start >= line.byte_offset && (match_start < line.byte_offset + line.content.len() + 1)
742                })
743                .unwrap_or(0);
744
745            if is_mkdocs_snippet_line(&lines[line_idx].content, flavor) {
746                continue;
747            }
748
749            // Find which line this link starts on
750            let mut line_num = 1;
751            let mut col_start = match_start;
752            for (idx, line_info) in lines.iter().enumerate() {
753                if match_start >= line_info.byte_offset {
754                    line_num = idx + 1;
755                    col_start = match_start - line_info.byte_offset;
756                } else {
757                    break;
758                }
759            }
760
761            // Find which line this link ends on (and calculate column on that line)
762            let mut end_line_num = 1;
763            let mut col_end = match_end;
764            for (idx, line_info) in lines.iter().enumerate() {
765                if match_end > line_info.byte_offset {
766                    end_line_num = idx + 1;
767                    col_end = match_end - line_info.byte_offset;
768                } else {
769                    break;
770                }
771            }
772
773            // For single-line links, use the same approach as before
774            if line_num == end_line_num {
775                // col_end is already correct
776            } else {
777                // For multi-line links, col_end represents the column on the ending line
778                // which is what we want
779            }
780
781            let text = cap.get(1).map_or("", |m| m.as_str()).to_string();
782
783            if let Some(inline_url) = cap.get(2) {
784                // Inline link
785                links.push(ParsedLink {
786                    line: line_num,
787                    start_col: col_start,
788                    end_col: col_end,
789                    byte_offset: match_start,
790                    byte_end: match_end,
791                    text,
792                    url: inline_url.as_str().to_string(),
793                    is_reference: false,
794                    reference_id: None,
795                });
796            } else if let Some(ref_id) = cap.get(3) {
797                // Reference link
798                let ref_id_str = ref_id.as_str();
799                let normalized_ref = if ref_id_str.is_empty() {
800                    text.to_lowercase() // Implicit reference
801                } else {
802                    ref_id_str.to_lowercase()
803                };
804
805                links.push(ParsedLink {
806                    line: line_num,
807                    start_col: col_start,
808                    end_col: col_end,
809                    byte_offset: match_start,
810                    byte_end: match_end,
811                    text,
812                    url: String::new(), // Will be resolved with reference_defs
813                    is_reference: true,
814                    reference_id: Some(normalized_ref),
815                });
816            }
817        }
818
819        links
820    }
821
822    /// Parse all images in the content
823    fn parse_images(
824        content: &str,
825        lines: &[LineInfo],
826        code_blocks: &[(usize, usize)],
827        code_spans: &[CodeSpan],
828    ) -> Vec<ParsedImage> {
829        // Pre-size based on a heuristic: images are less common than links
830        let mut images = Vec::with_capacity(content.len() / 1000); // ~1 image per 1000 chars
831
832        // Parse images across the entire content, not line by line
833        for cap in IMAGE_PATTERN.captures_iter(content) {
834            let full_match = cap.get(0).unwrap();
835            let match_start = full_match.start();
836            let match_end = full_match.end();
837
838            // Skip if the ! is escaped (preceded by \)
839            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
840                continue;
841            }
842
843            // Skip if in code block
844            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
845                continue;
846            }
847
848            // Skip if in code span
849            if code_spans
850                .iter()
851                .any(|span| match_start >= span.byte_offset && match_start < span.byte_end)
852            {
853                continue;
854            }
855
856            // Find which line this image starts on
857            let mut line_num = 1;
858            let mut col_start = match_start;
859            for (idx, line_info) in lines.iter().enumerate() {
860                if match_start >= line_info.byte_offset {
861                    line_num = idx + 1;
862                    col_start = match_start - line_info.byte_offset;
863                } else {
864                    break;
865                }
866            }
867
868            // Find which line this image ends on (and calculate column on that line)
869            let mut end_line_num = 1;
870            let mut col_end = match_end;
871            for (idx, line_info) in lines.iter().enumerate() {
872                if match_end > line_info.byte_offset {
873                    end_line_num = idx + 1;
874                    col_end = match_end - line_info.byte_offset;
875                } else {
876                    break;
877                }
878            }
879
880            // For single-line images, use the same approach as before
881            if line_num == end_line_num {
882                // col_end is already correct
883            } else {
884                // For multi-line images, col_end represents the column on the ending line
885                // which is what we want
886            }
887
888            let alt_text = cap.get(1).map_or("", |m| m.as_str()).to_string();
889
890            if let Some(inline_url) = cap.get(2) {
891                // Inline image
892                images.push(ParsedImage {
893                    line: line_num,
894                    start_col: col_start,
895                    end_col: col_end,
896                    byte_offset: match_start,
897                    byte_end: match_end,
898                    alt_text,
899                    url: inline_url.as_str().to_string(),
900                    is_reference: false,
901                    reference_id: None,
902                });
903            } else if let Some(ref_id) = cap.get(3) {
904                // Reference image
905                let ref_id_str = ref_id.as_str();
906                let normalized_ref = if ref_id_str.is_empty() {
907                    alt_text.to_lowercase() // Implicit reference
908                } else {
909                    ref_id_str.to_lowercase()
910                };
911
912                images.push(ParsedImage {
913                    line: line_num,
914                    start_col: col_start,
915                    end_col: col_end,
916                    byte_offset: match_start,
917                    byte_end: match_end,
918                    alt_text,
919                    url: String::new(), // Will be resolved with reference_defs
920                    is_reference: true,
921                    reference_id: Some(normalized_ref),
922                });
923            }
924        }
925
926        images
927    }
928
929    /// Parse reference definitions
930    fn parse_reference_defs(_content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
931        // Pre-size based on lines count as reference definitions are line-based
932        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
933
934        for (line_idx, line_info) in lines.iter().enumerate() {
935            // Skip lines in code blocks
936            if line_info.in_code_block {
937                continue;
938            }
939
940            let line = &line_info.content;
941            let line_num = line_idx + 1;
942
943            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
944                let id = cap.get(1).unwrap().as_str().to_lowercase();
945                let url = cap.get(2).unwrap().as_str().to_string();
946                let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
947
948                refs.push(ReferenceDef {
949                    line: line_num,
950                    id,
951                    url,
952                    title,
953                });
954            }
955        }
956
957        refs
958    }
959
960    /// Pre-compute line information
961    fn compute_line_info(
962        content: &str,
963        line_offsets: &[usize],
964        code_blocks: &[(usize, usize)],
965        flavor: MarkdownFlavor,
966    ) -> Vec<LineInfo> {
967        lazy_static! {
968            // Regex for list detection - allow any whitespace including no space (to catch malformed lists)
969            static ref UNORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)([-*+])([ \t]*)(.*)").unwrap();
970            static ref ORDERED_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(\d+)([.)])([ \t]*)(.*)").unwrap();
971
972            // Regex for blockquote prefix
973            static ref BLOCKQUOTE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*>\s*)(.*)").unwrap();
974
975            // Regex for heading detection
976            static ref ATX_HEADING_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap();
977            static ref SETEXT_UNDERLINE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap();
978
979            // Regex for blockquote detection
980            static ref BLOCKQUOTE_REGEX_FULL: regex::Regex = regex::Regex::new(r"^(\s*)(>+)(\s*)(.*)$").unwrap();
981        }
982
983        let content_lines: Vec<&str> = content.lines().collect();
984        let mut lines = Vec::with_capacity(content_lines.len());
985
986        // Detect front matter boundaries FIRST, before any other parsing
987        let mut in_front_matter = false;
988        let mut front_matter_end = 0;
989        if content_lines.first().map(|l| l.trim()) == Some("---") {
990            in_front_matter = true;
991            for (idx, line) in content_lines.iter().enumerate().skip(1) {
992                if line.trim() == "---" {
993                    front_matter_end = idx;
994                    break;
995                }
996            }
997        }
998
999        for (i, line) in content_lines.iter().enumerate() {
1000            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1001            let indent = line.len() - line.trim_start().len();
1002            // For blank detection, consider blockquote context
1003            let is_blank = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1004                // In blockquote context, check if content after prefix is blank
1005                let after_prefix = caps.get(2).map_or("", |m| m.as_str());
1006                after_prefix.trim().is_empty()
1007            } else {
1008                line.trim().is_empty()
1009            };
1010            // Check if this line is inside a code block (not inline code span)
1011            // We only want to check for fenced/indented code blocks, not inline code
1012            let in_code_block = code_blocks.iter().any(|&(start, end)| {
1013                // Only consider ranges that span multiple lines (code blocks)
1014                // Inline code spans are typically on a single line
1015
1016                // Ensure we're at valid UTF-8 boundaries
1017                let safe_start = if start > 0 && !content.is_char_boundary(start) {
1018                    // Find the nearest valid boundary before start
1019                    let mut boundary = start;
1020                    while boundary > 0 && !content.is_char_boundary(boundary) {
1021                        boundary -= 1;
1022                    }
1023                    boundary
1024                } else {
1025                    start
1026                };
1027
1028                let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1029                    // Find the nearest valid boundary after end
1030                    let mut boundary = end;
1031                    while boundary < content.len() && !content.is_char_boundary(boundary) {
1032                        boundary += 1;
1033                    }
1034                    boundary
1035                } else {
1036                    end.min(content.len())
1037                };
1038
1039                let block_content = &content[safe_start..safe_end];
1040                let is_multiline = block_content.contains('\n');
1041                let is_fenced = block_content.starts_with("```") || block_content.starts_with("~~~");
1042                let is_indented = !is_fenced
1043                    && block_content
1044                        .lines()
1045                        .all(|l| l.starts_with("    ") || l.starts_with("\t") || l.trim().is_empty());
1046
1047                byte_offset >= start && byte_offset < end && (is_multiline || is_fenced || is_indented)
1048            });
1049
1050            // Detect list items (skip if in frontmatter)
1051            let list_item = if !(in_code_block || is_blank || in_front_matter && i <= front_matter_end) {
1052                // Strip blockquote prefix if present for list detection
1053                let (line_for_list_check, blockquote_prefix_len) = if let Some(caps) = BLOCKQUOTE_REGEX.captures(line) {
1054                    let prefix = caps.get(1).unwrap().as_str();
1055                    let content = caps.get(2).unwrap().as_str();
1056                    (content, prefix.len())
1057                } else {
1058                    (&**line, 0)
1059                };
1060
1061                if let Some(caps) = UNORDERED_REGEX.captures(line_for_list_check) {
1062                    let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1063                    let marker = caps.get(2).map_or("", |m| m.as_str());
1064                    let spacing = caps.get(3).map_or("", |m| m.as_str());
1065                    let _content = caps.get(4).map_or("", |m| m.as_str());
1066                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1067                    let content_column = marker_column + marker.len() + spacing.len();
1068
1069                    // According to CommonMark spec, unordered list items MUST have at least one space
1070                    // after the marker (-, *, or +). Without a space, it's not a list item.
1071                    // This also naturally handles cases like:
1072                    // - *emphasis* (not a list)
1073                    // - **bold** (not a list)
1074                    // - --- (horizontal rule, not a list)
1075                    if spacing.is_empty() {
1076                        None
1077                    } else {
1078                        Some(ListItemInfo {
1079                            marker: marker.to_string(),
1080                            is_ordered: false,
1081                            number: None,
1082                            marker_column,
1083                            content_column,
1084                        })
1085                    }
1086                } else if let Some(caps) = ORDERED_REGEX.captures(line_for_list_check) {
1087                    let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1088                    let number_str = caps.get(2).map_or("", |m| m.as_str());
1089                    let delimiter = caps.get(3).map_or("", |m| m.as_str());
1090                    let spacing = caps.get(4).map_or("", |m| m.as_str());
1091                    let _content = caps.get(5).map_or("", |m| m.as_str());
1092                    let marker = format!("{number_str}{delimiter}");
1093                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1094                    let content_column = marker_column + marker.len() + spacing.len();
1095
1096                    // According to CommonMark spec, ordered list items MUST have at least one space
1097                    // after the marker (period or parenthesis). Without a space, it's not a list item.
1098                    if spacing.is_empty() {
1099                        None
1100                    } else {
1101                        Some(ListItemInfo {
1102                            marker,
1103                            is_ordered: true,
1104                            number: number_str.parse().ok(),
1105                            marker_column,
1106                            content_column,
1107                        })
1108                    }
1109                } else {
1110                    None
1111                }
1112            } else {
1113                None
1114            };
1115
1116            lines.push(LineInfo {
1117                content: line.to_string(),
1118                byte_offset,
1119                indent,
1120                is_blank,
1121                in_code_block,
1122                in_front_matter: in_front_matter && i <= front_matter_end,
1123                list_item,
1124                heading: None,    // Will be populated in second pass for Setext headings
1125                blockquote: None, // Will be populated after line creation
1126            });
1127        }
1128
1129        // Second pass: detect headings (including Setext which needs look-ahead) and blockquotes
1130        for i in 0..content_lines.len() {
1131            if lines[i].in_code_block {
1132                continue;
1133            }
1134
1135            // Skip lines in front matter
1136            if in_front_matter && i <= front_matter_end {
1137                continue;
1138            }
1139
1140            let line = content_lines[i];
1141
1142            // Check for blockquotes (even on blank lines within blockquotes)
1143            if let Some(caps) = BLOCKQUOTE_REGEX_FULL.captures(line) {
1144                let indent_str = caps.get(1).map_or("", |m| m.as_str());
1145                let markers = caps.get(2).map_or("", |m| m.as_str());
1146                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1147                let content = caps.get(4).map_or("", |m| m.as_str());
1148
1149                let nesting_level = markers.chars().filter(|&c| c == '>').count();
1150                let marker_column = indent_str.len();
1151
1152                // Build the prefix (indentation + markers + space)
1153                let prefix = format!("{indent_str}{markers}{spaces_after}");
1154
1155                // Check for various blockquote issues
1156                let has_no_space = spaces_after.is_empty() && !content.is_empty();
1157                // Consider tabs as multiple spaces, or actual multiple spaces
1158                let has_multiple_spaces = spaces_after.len() > 1 || spaces_after.contains('\t');
1159
1160                // Check if needs MD028 fix (empty blockquote line without proper spacing)
1161                // MD028 flags empty blockquote lines that don't have a single space after the marker
1162                // Lines like "> " or ">> " are already correct and don't need fixing
1163                let needs_md028_fix = content.is_empty() && spaces_after.is_empty();
1164
1165                lines[i].blockquote = Some(BlockquoteInfo {
1166                    nesting_level,
1167                    indent: indent_str.to_string(),
1168                    marker_column,
1169                    prefix,
1170                    content: content.to_string(),
1171                    has_no_space_after_marker: has_no_space,
1172                    has_multiple_spaces_after_marker: has_multiple_spaces,
1173                    needs_md028_fix,
1174                });
1175            }
1176
1177            // Skip heading detection for blank lines
1178            if lines[i].is_blank {
1179                continue;
1180            }
1181
1182            // Check for ATX headings (but skip MkDocs snippet lines)
1183            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
1184            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1185                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1186                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1187            } else {
1188                false
1189            };
1190
1191            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1192                // Skip headings inside HTML comments
1193                if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1194                    continue;
1195                }
1196                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1197                let hashes = caps.get(2).map_or("", |m| m.as_str());
1198                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1199                let rest = caps.get(4).map_or("", |m| m.as_str());
1200
1201                let level = hashes.len() as u8;
1202                let marker_column = leading_spaces.len();
1203
1204                // Check for closing sequence, but handle custom IDs that might come after
1205                let (text, has_closing, closing_seq) = {
1206                    // First check if there's a custom ID at the end
1207                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1208                        // Check if this looks like a valid custom ID (ends with })
1209                        if rest[id_start..].trim_end().ends_with('}') {
1210                            // Split off the custom ID
1211                            (&rest[..id_start], &rest[id_start..])
1212                        } else {
1213                            (rest, "")
1214                        }
1215                    } else {
1216                        (rest, "")
1217                    };
1218
1219                    // Now look for closing hashes in the part before the custom ID
1220                    let trimmed_rest = rest_without_id.trim_end();
1221                    if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1222                        // Look for the start of the hash sequence
1223                        let mut start_of_hashes = last_hash_pos;
1224                        while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1225                            start_of_hashes -= 1;
1226                        }
1227
1228                        // Check if there's at least one space before the closing hashes
1229                        let has_space_before = start_of_hashes == 0
1230                            || trimmed_rest
1231                                .chars()
1232                                .nth(start_of_hashes - 1)
1233                                .is_some_and(|c| c.is_whitespace());
1234
1235                        // Check if this is a valid closing sequence (all hashes to end of trimmed part)
1236                        let potential_closing = &trimmed_rest[start_of_hashes..];
1237                        let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1238
1239                        if is_all_hashes && has_space_before {
1240                            // This is a closing sequence
1241                            let closing_hashes = potential_closing.to_string();
1242                            // The text is everything before the closing hashes
1243                            // Don't include the custom ID here - it will be extracted later
1244                            let text_part = if !custom_id_part.is_empty() {
1245                                // If we have a custom ID, append it back to get the full rest
1246                                // This allows the extract_header_id function to handle it properly
1247                                format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1248                            } else {
1249                                rest_without_id[..start_of_hashes].trim_end().to_string()
1250                            };
1251                            (text_part, true, closing_hashes)
1252                        } else {
1253                            // Not a valid closing sequence, return the full content
1254                            (rest.to_string(), false, String::new())
1255                        }
1256                    } else {
1257                        // No hashes found, return the full content
1258                        (rest.to_string(), false, String::new())
1259                    }
1260                };
1261
1262                let content_column = marker_column + hashes.len() + spaces_after.len();
1263
1264                // Extract custom header ID if present
1265                let raw_text = text.trim().to_string();
1266                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1267
1268                // If no custom ID was found on the header line, check the next line for standalone attr-list
1269                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1270                    let next_line = content_lines[i + 1];
1271                    if !lines[i + 1].in_code_block
1272                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1273                        && let Some(next_line_id) =
1274                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1275                    {
1276                        custom_id = Some(next_line_id);
1277                    }
1278                }
1279
1280                lines[i].heading = Some(HeadingInfo {
1281                    level,
1282                    style: HeadingStyle::ATX,
1283                    marker: hashes.to_string(),
1284                    marker_column,
1285                    content_column,
1286                    text: clean_text,
1287                    custom_id,
1288                    raw_text,
1289                    has_closing_sequence: has_closing,
1290                    closing_sequence: closing_seq,
1291                });
1292            }
1293            // Check for Setext headings (need to look at next line)
1294            else if i + 1 < content_lines.len() {
1295                let next_line = content_lines[i + 1];
1296                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1297                    // Skip if next line is front matter delimiter
1298                    if in_front_matter && i < front_matter_end {
1299                        continue;
1300                    }
1301
1302                    // Skip Setext headings inside HTML comments
1303                    if crate::utils::skip_context::is_in_html_comment(content, lines[i].byte_offset) {
1304                        continue;
1305                    }
1306
1307                    let underline = next_line.trim();
1308
1309                    // Skip if the underline looks like YAML delimiter (exactly 3 or more dashes)
1310                    // YAML uses exactly `---` while Setext headings typically use longer underlines
1311                    if underline == "---" {
1312                        continue;
1313                    }
1314
1315                    // Skip if the current line looks like YAML key-value syntax
1316                    let current_line_trimmed = line.trim();
1317                    if current_line_trimmed.contains(':')
1318                        && !current_line_trimmed.starts_with('#')
1319                        && !current_line_trimmed.contains('[')
1320                        && !current_line_trimmed.contains("](")
1321                    {
1322                        // This looks like "key: value" which suggests YAML, not a heading
1323                        continue;
1324                    }
1325
1326                    let level = if underline.starts_with('=') { 1 } else { 2 };
1327                    let style = if level == 1 {
1328                        HeadingStyle::Setext1
1329                    } else {
1330                        HeadingStyle::Setext2
1331                    };
1332
1333                    // Extract custom header ID if present
1334                    let raw_text = line.trim().to_string();
1335                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1336
1337                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
1338                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1339                        let attr_line = content_lines[i + 2];
1340                        if !lines[i + 2].in_code_block
1341                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1342                            && let Some(attr_line_id) =
1343                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
1344                        {
1345                            custom_id = Some(attr_line_id);
1346                        }
1347                    }
1348
1349                    lines[i].heading = Some(HeadingInfo {
1350                        level,
1351                        style,
1352                        marker: underline.to_string(),
1353                        marker_column: next_line.len() - next_line.trim_start().len(),
1354                        content_column: lines[i].indent,
1355                        text: clean_text,
1356                        custom_id,
1357                        raw_text,
1358                        has_closing_sequence: false,
1359                        closing_sequence: String::new(),
1360                    });
1361                }
1362            }
1363        }
1364
1365        lines
1366    }
1367
1368    /// Parse all inline code spans in the content using AST
1369    fn parse_code_spans(content: &str, lines: &[LineInfo], ast: &Node) -> Vec<CodeSpan> {
1370        let mut code_spans = Vec::new();
1371
1372        // Quick check - if no backticks, no code spans
1373        if !content.contains('`') {
1374            return code_spans;
1375        }
1376
1377        // Helper function to recursively extract inline code spans from AST nodes
1378        fn extract_code_spans(node: &Node, content: &str, lines: &[LineInfo], spans: &mut Vec<CodeSpan>) {
1379            match node {
1380                Node::InlineCode(inline_code) => {
1381                    if let Some(pos) = &inline_code.position {
1382                        let start_pos = pos.start.offset;
1383                        let end_pos = pos.end.offset;
1384
1385                        // The position includes the backticks, extract the actual content
1386                        let full_span = &content[start_pos..end_pos];
1387                        let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
1388
1389                        // Extract content between backticks, preserving spaces
1390                        let content_start = start_pos + backtick_count;
1391                        let content_end = end_pos - backtick_count;
1392                        let span_content = if content_start < content_end {
1393                            content[content_start..content_end].to_string()
1394                        } else {
1395                            String::new()
1396                        };
1397
1398                        // Find which line this code span starts on
1399                        let mut line_num = 1;
1400                        let mut col_start = start_pos;
1401                        for (idx, line_info) in lines.iter().enumerate() {
1402                            if start_pos >= line_info.byte_offset {
1403                                line_num = idx + 1;
1404                                col_start = start_pos - line_info.byte_offset;
1405                            } else {
1406                                break;
1407                            }
1408                        }
1409
1410                        // Find end column
1411                        let mut col_end = end_pos;
1412                        for line_info in lines.iter() {
1413                            if end_pos > line_info.byte_offset {
1414                                col_end = end_pos - line_info.byte_offset;
1415                            } else {
1416                                break;
1417                            }
1418                        }
1419
1420                        spans.push(CodeSpan {
1421                            line: line_num,
1422                            start_col: col_start,
1423                            end_col: col_end,
1424                            byte_offset: start_pos,
1425                            byte_end: end_pos,
1426                            backtick_count,
1427                            content: span_content,
1428                        });
1429                    }
1430                }
1431                // Recursively process children
1432                Node::Root(root) => {
1433                    for child in &root.children {
1434                        extract_code_spans(child, content, lines, spans);
1435                    }
1436                }
1437                Node::Paragraph(para) => {
1438                    for child in &para.children {
1439                        extract_code_spans(child, content, lines, spans);
1440                    }
1441                }
1442                Node::Heading(heading) => {
1443                    for child in &heading.children {
1444                        extract_code_spans(child, content, lines, spans);
1445                    }
1446                }
1447                Node::List(list) => {
1448                    for child in &list.children {
1449                        extract_code_spans(child, content, lines, spans);
1450                    }
1451                }
1452                Node::ListItem(item) => {
1453                    for child in &item.children {
1454                        extract_code_spans(child, content, lines, spans);
1455                    }
1456                }
1457                Node::Blockquote(blockquote) => {
1458                    for child in &blockquote.children {
1459                        extract_code_spans(child, content, lines, spans);
1460                    }
1461                }
1462                Node::Table(table) => {
1463                    for child in &table.children {
1464                        extract_code_spans(child, content, lines, spans);
1465                    }
1466                }
1467                Node::TableRow(row) => {
1468                    for child in &row.children {
1469                        extract_code_spans(child, content, lines, spans);
1470                    }
1471                }
1472                Node::TableCell(cell) => {
1473                    for child in &cell.children {
1474                        extract_code_spans(child, content, lines, spans);
1475                    }
1476                }
1477                Node::Emphasis(emphasis) => {
1478                    for child in &emphasis.children {
1479                        extract_code_spans(child, content, lines, spans);
1480                    }
1481                }
1482                Node::Strong(strong) => {
1483                    for child in &strong.children {
1484                        extract_code_spans(child, content, lines, spans);
1485                    }
1486                }
1487                Node::Link(link) => {
1488                    for child in &link.children {
1489                        extract_code_spans(child, content, lines, spans);
1490                    }
1491                }
1492                Node::LinkReference(link_ref) => {
1493                    for child in &link_ref.children {
1494                        extract_code_spans(child, content, lines, spans);
1495                    }
1496                }
1497                Node::FootnoteDefinition(footnote) => {
1498                    for child in &footnote.children {
1499                        extract_code_spans(child, content, lines, spans);
1500                    }
1501                }
1502                Node::Delete(delete) => {
1503                    for child in &delete.children {
1504                        extract_code_spans(child, content, lines, spans);
1505                    }
1506                }
1507                // Terminal nodes or nodes without relevant children
1508                Node::Code(_)
1509                | Node::Text(_)
1510                | Node::Html(_)
1511                | Node::Image(_)
1512                | Node::ImageReference(_)
1513                | Node::FootnoteReference(_)
1514                | Node::Break(_)
1515                | Node::ThematicBreak(_)
1516                | Node::Definition(_)
1517                | Node::Yaml(_)
1518                | Node::Toml(_)
1519                | Node::Math(_)
1520                | Node::InlineMath(_)
1521                | Node::MdxJsxFlowElement(_)
1522                | Node::MdxFlowExpression(_)
1523                | Node::MdxJsxTextElement(_)
1524                | Node::MdxTextExpression(_)
1525                | Node::MdxjsEsm(_) => {
1526                    // No children to process or not relevant for code spans
1527                }
1528            }
1529        }
1530
1531        // Extract all code spans from the AST
1532        extract_code_spans(ast, content, lines, &mut code_spans);
1533
1534        // Sort by position to ensure consistent ordering
1535        code_spans.sort_by_key(|span| span.byte_offset);
1536
1537        code_spans
1538    }
1539
1540    /// Parse all list blocks in the content
1541    fn parse_list_blocks(lines: &[LineInfo]) -> Vec<ListBlock> {
1542        // Pre-size based on lines that could be list items
1543        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
1544        let mut current_block: Option<ListBlock> = None;
1545        let mut last_list_item_line = 0;
1546        let mut current_indent_level = 0;
1547        let mut last_marker_width = 0;
1548
1549        for (line_idx, line_info) in lines.iter().enumerate() {
1550            let line_num = line_idx + 1;
1551
1552            // Enhanced code block handling using Design #3's context analysis
1553            if line_info.in_code_block {
1554                if let Some(ref mut block) = current_block {
1555                    // Calculate minimum indentation for list continuation
1556                    let min_continuation_indent = CodeBlockUtils::calculate_min_continuation_indent(lines, line_idx);
1557
1558                    // Analyze code block context using the three-tier classification
1559                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
1560
1561                    match context {
1562                        CodeBlockContext::Indented => {
1563                            // Code block is properly indented - continues the list
1564                            block.end_line = line_num;
1565                            continue;
1566                        }
1567                        CodeBlockContext::Standalone => {
1568                            // Code block separates lists - end current block
1569                            let completed_block = current_block.take().unwrap();
1570                            list_blocks.push(completed_block);
1571                            continue;
1572                        }
1573                        CodeBlockContext::Adjacent => {
1574                            // Edge case - use conservative behavior (continue list)
1575                            block.end_line = line_num;
1576                            continue;
1577                        }
1578                    }
1579                } else {
1580                    // No current list block - skip code block lines
1581                    continue;
1582                }
1583            }
1584
1585            // Extract blockquote prefix if any
1586            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(&line_info.content) {
1587                caps.get(0).unwrap().as_str().to_string()
1588            } else {
1589                String::new()
1590            };
1591
1592            // Check if this line is a list item
1593            if let Some(list_item) = &line_info.list_item {
1594                // Calculate nesting level based on indentation
1595                let item_indent = list_item.marker_column;
1596                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
1597
1598                if let Some(ref mut block) = current_block {
1599                    // Check if this continues the current block
1600                    // For nested lists, we need to check if this is a nested item (higher nesting level)
1601                    // or a continuation at the same or lower level
1602                    let is_nested = nesting > block.nesting_level;
1603                    let same_type =
1604                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
1605                    let same_context = block.blockquote_prefix == blockquote_prefix;
1606                    let reasonable_distance = line_num <= last_list_item_line + 2; // Allow one blank line
1607
1608                    // For unordered lists, also check marker consistency
1609                    let marker_compatible =
1610                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
1611
1612                    // Check if there's non-list content between the last item and this one
1613                    let has_non_list_content = {
1614                        let mut found_non_list = false;
1615                        // Use the last item from the current block, not the global last_list_item_line
1616                        let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
1617
1618                        // Debug: Special check for problematic line
1619                        if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1620                            let last_line = &lines[block_last_item_line - 1];
1621                            if last_line.content.contains(r"`sqlalchemy`") && last_line.content.contains(r"\`") {
1622                                log::debug!(
1623                                    "After problematic line {}: checking lines {} to {} for non-list content",
1624                                    block_last_item_line,
1625                                    block_last_item_line + 1,
1626                                    line_num
1627                                );
1628                                // If they're consecutive list items, there's no content between
1629                                if line_num == block_last_item_line + 1 {
1630                                    log::debug!("Lines are consecutive, no content between");
1631                                }
1632                            }
1633                        }
1634
1635                        for check_line in (block_last_item_line + 1)..line_num {
1636                            let check_idx = check_line - 1;
1637                            if check_idx < lines.len() {
1638                                let check_info = &lines[check_idx];
1639                                // Check for content that breaks the list
1640                                let is_list_breaking_content = if check_info.in_code_block {
1641                                    // Use enhanced code block classification for list separation
1642                                    let last_item_marker_width =
1643                                        if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1644                                            lines[block_last_item_line - 1]
1645                                                .list_item
1646                                                .as_ref()
1647                                                .map(|li| {
1648                                                    if li.is_ordered {
1649                                                        li.marker.len() + 1 // Add 1 for the space after ordered list markers
1650                                                    } else {
1651                                                        li.marker.len()
1652                                                    }
1653                                                })
1654                                                .unwrap_or(3) // fallback to 3 if no list item found
1655                                        } else {
1656                                            3 // fallback
1657                                        };
1658
1659                                    let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
1660
1661                                    // Analyze code block context using our enhanced classification
1662                                    let context = CodeBlockUtils::analyze_code_block_context(
1663                                        lines,
1664                                        check_line - 1,
1665                                        min_continuation,
1666                                    );
1667
1668                                    // Standalone code blocks break lists, indented ones continue them
1669                                    matches!(context, CodeBlockContext::Standalone)
1670                                } else if !check_info.is_blank && check_info.list_item.is_none() {
1671                                    // Check for structural separators that should break lists (from issue #42)
1672                                    let line_content = check_info.content.trim();
1673
1674                                    // Any of these structural separators break lists
1675                                    if check_info.heading.is_some()
1676                                        || line_content.starts_with("---")
1677                                        || line_content.starts_with("***")
1678                                        || line_content.starts_with("___")
1679                                        || (line_content.contains('|')
1680                                            && !line_content.contains("](")
1681                                            && !line_content.contains("http")
1682                                            && (line_content.matches('|').count() > 1
1683                                                || line_content.starts_with('|')
1684                                                || line_content.ends_with('|')))
1685                                        || line_content.starts_with(">")
1686                                    {
1687                                        true
1688                                    }
1689                                    // Other non-list content - check if properly indented
1690                                    else {
1691                                        let last_item_marker_width =
1692                                            if block_last_item_line > 0 && block_last_item_line <= lines.len() {
1693                                                lines[block_last_item_line - 1]
1694                                                    .list_item
1695                                                    .as_ref()
1696                                                    .map(|li| {
1697                                                        if li.is_ordered {
1698                                                            li.marker.len() + 1 // Add 1 for the space after ordered list markers
1699                                                        } else {
1700                                                            li.marker.len()
1701                                                        }
1702                                                    })
1703                                                    .unwrap_or(3) // fallback to 3 if no list item found
1704                                            } else {
1705                                                3 // fallback
1706                                            };
1707
1708                                        let min_continuation =
1709                                            if block.is_ordered { last_item_marker_width } else { 2 };
1710                                        check_info.indent < min_continuation
1711                                    }
1712                                } else {
1713                                    false
1714                                };
1715
1716                                if is_list_breaking_content {
1717                                    // Not indented enough, so it breaks the list
1718                                    found_non_list = true;
1719                                    break;
1720                                }
1721                            }
1722                        }
1723                        found_non_list
1724                    };
1725
1726                    // A list continues if:
1727                    // 1. It's a nested item (indented more than the parent), OR
1728                    // 2. It's the same type at the same level with reasonable distance
1729                    let mut continues_list = if is_nested {
1730                        // Nested items always continue the list if they're in the same context
1731                        same_context && reasonable_distance && !has_non_list_content
1732                    } else {
1733                        // Same-level items need to match type and markers
1734                        let result = same_type
1735                            && same_context
1736                            && reasonable_distance
1737                            && marker_compatible
1738                            && !has_non_list_content;
1739
1740                        // Debug logging for lines after problematic content
1741                        if block.item_lines.last().is_some_and(|&last_line| {
1742                            last_line > 0
1743                                && last_line <= lines.len()
1744                                && lines[last_line - 1].content.contains(r"`sqlalchemy`")
1745                                && lines[last_line - 1].content.contains(r"\`")
1746                        }) {
1747                            log::debug!(
1748                                "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
1749                            );
1750                            if line_num > 0 && line_num <= lines.len() {
1751                                log::debug!("Current line content: {:?}", lines[line_num - 1].content);
1752                            }
1753                        }
1754
1755                        result
1756                    };
1757
1758                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
1759                    // This handles edge cases where content patterns might otherwise split lists incorrectly
1760                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
1761                        // Check if the previous line was a list item
1762                        if block.item_lines.contains(&(line_num - 1)) {
1763                            // They're consecutive list items - force them to be in the same list
1764                            continues_list = true;
1765                        }
1766                    }
1767
1768                    if continues_list {
1769                        // Extend current block
1770                        block.end_line = line_num;
1771                        block.item_lines.push(line_num);
1772
1773                        // Update max marker width
1774                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
1775                            list_item.marker.len() + 1
1776                        } else {
1777                            list_item.marker.len()
1778                        });
1779
1780                        // Update marker consistency for unordered lists
1781                        if !block.is_ordered
1782                            && block.marker.is_some()
1783                            && block.marker.as_ref() != Some(&list_item.marker)
1784                        {
1785                            // Mixed markers, clear the marker field
1786                            block.marker = None;
1787                        }
1788                    } else {
1789                        // End current block and start a new one
1790
1791                        list_blocks.push(block.clone());
1792
1793                        *block = ListBlock {
1794                            start_line: line_num,
1795                            end_line: line_num,
1796                            is_ordered: list_item.is_ordered,
1797                            marker: if list_item.is_ordered {
1798                                None
1799                            } else {
1800                                Some(list_item.marker.clone())
1801                            },
1802                            blockquote_prefix: blockquote_prefix.clone(),
1803                            item_lines: vec![line_num],
1804                            nesting_level: nesting,
1805                            max_marker_width: if list_item.is_ordered {
1806                                list_item.marker.len() + 1
1807                            } else {
1808                                list_item.marker.len()
1809                            },
1810                        };
1811                    }
1812                } else {
1813                    // Start a new block
1814                    current_block = Some(ListBlock {
1815                        start_line: line_num,
1816                        end_line: line_num,
1817                        is_ordered: list_item.is_ordered,
1818                        marker: if list_item.is_ordered {
1819                            None
1820                        } else {
1821                            Some(list_item.marker.clone())
1822                        },
1823                        blockquote_prefix,
1824                        item_lines: vec![line_num],
1825                        nesting_level: nesting,
1826                        max_marker_width: list_item.marker.len(),
1827                    });
1828                }
1829
1830                last_list_item_line = line_num;
1831                current_indent_level = item_indent;
1832                last_marker_width = if list_item.is_ordered {
1833                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
1834                } else {
1835                    list_item.marker.len()
1836                };
1837            } else if let Some(ref mut block) = current_block {
1838                // Not a list item - check if it continues the current block
1839
1840                // For MD032 compatibility, we use a simple approach:
1841                // - Indented lines continue the list
1842                // - Blank lines followed by indented content continue the list
1843                // - Everything else ends the list
1844
1845                // Calculate minimum indentation for list continuation
1846                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
1847                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
1848                let min_continuation_indent = if block.is_ordered {
1849                    current_indent_level + last_marker_width
1850                } else {
1851                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
1852                };
1853
1854                if line_info.indent >= min_continuation_indent {
1855                    // Indented line continues the list
1856                    block.end_line = line_num;
1857                } else if line_info.is_blank {
1858                    // Blank line - check if it's internal to the list or ending it
1859                    // We only include blank lines that are followed by more list content
1860                    let mut check_idx = line_idx + 1;
1861                    let mut found_continuation = false;
1862
1863                    // Skip additional blank lines
1864                    while check_idx < lines.len() && lines[check_idx].is_blank {
1865                        check_idx += 1;
1866                    }
1867
1868                    if check_idx < lines.len() {
1869                        let next_line = &lines[check_idx];
1870                        // Check if followed by indented content (list continuation)
1871                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
1872                            found_continuation = true;
1873                        }
1874                        // Check if followed by another list item at the same level
1875                        else if !next_line.in_code_block
1876                            && next_line.list_item.is_some()
1877                            && let Some(item) = &next_line.list_item
1878                        {
1879                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
1880                                .find(&next_line.content)
1881                                .map_or(String::new(), |m| m.as_str().to_string());
1882                            if item.marker_column == current_indent_level
1883                                && item.is_ordered == block.is_ordered
1884                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
1885                            {
1886                                // Check if there was meaningful content between the list items (unused now)
1887                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
1888                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
1889                                    if let Some(between_line) = lines.get(idx) {
1890                                        let trimmed = between_line.content.trim();
1891                                        // Skip empty lines
1892                                        if trimmed.is_empty() {
1893                                            return false;
1894                                        }
1895                                        // Check for meaningful content
1896                                        let line_indent =
1897                                            between_line.content.len() - between_line.content.trim_start().len();
1898
1899                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
1900                                        if trimmed.starts_with("```")
1901                                            || trimmed.starts_with("~~~")
1902                                            || trimmed.starts_with("---")
1903                                            || trimmed.starts_with("***")
1904                                            || trimmed.starts_with("___")
1905                                            || trimmed.starts_with(">")
1906                                            || trimmed.contains('|') // Tables
1907                                            || between_line.heading.is_some()
1908                                        {
1909                                            return true; // These are structural separators - meaningful content that breaks lists
1910                                        }
1911
1912                                        // Only properly indented content continues the list
1913                                        line_indent >= min_continuation_indent
1914                                    } else {
1915                                        false
1916                                    }
1917                                });
1918
1919                                if block.is_ordered {
1920                                    // For ordered lists: don't continue if there are structural separators
1921                                    // Check if there are structural separators between the list items
1922                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1923                                        if let Some(between_line) = lines.get(idx) {
1924                                            let trimmed = between_line.content.trim();
1925                                            if trimmed.is_empty() {
1926                                                return false;
1927                                            }
1928                                            // Check for structural separators that break lists
1929                                            trimmed.starts_with("```")
1930                                                || trimmed.starts_with("~~~")
1931                                                || trimmed.starts_with("---")
1932                                                || trimmed.starts_with("***")
1933                                                || trimmed.starts_with("___")
1934                                                || trimmed.starts_with(">")
1935                                                || trimmed.contains('|') // Tables
1936                                                || between_line.heading.is_some()
1937                                        } else {
1938                                            false
1939                                        }
1940                                    });
1941                                    found_continuation = !has_structural_separators;
1942                                } else {
1943                                    // For unordered lists: also check for structural separators
1944                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
1945                                        if let Some(between_line) = lines.get(idx) {
1946                                            let trimmed = between_line.content.trim();
1947                                            if trimmed.is_empty() {
1948                                                return false;
1949                                            }
1950                                            // Check for structural separators that break lists
1951                                            trimmed.starts_with("```")
1952                                                || trimmed.starts_with("~~~")
1953                                                || trimmed.starts_with("---")
1954                                                || trimmed.starts_with("***")
1955                                                || trimmed.starts_with("___")
1956                                                || trimmed.starts_with(">")
1957                                                || trimmed.contains('|') // Tables
1958                                                || between_line.heading.is_some()
1959                                        } else {
1960                                            false
1961                                        }
1962                                    });
1963                                    found_continuation = !has_structural_separators;
1964                                }
1965                            }
1966                        }
1967                    }
1968
1969                    if found_continuation {
1970                        // Include the blank line in the block
1971                        block.end_line = line_num;
1972                    } else {
1973                        // Blank line ends the list - don't include it
1974                        list_blocks.push(block.clone());
1975                        current_block = None;
1976                    }
1977                } else {
1978                    // Check for lazy continuation - non-indented line immediately after a list item
1979                    // But only if the line has sufficient indentation for the list type
1980                    let min_required_indent = if block.is_ordered {
1981                        current_indent_level + last_marker_width
1982                    } else {
1983                        current_indent_level + 2
1984                    };
1985
1986                    // For lazy continuation to apply, the line must either:
1987                    // 1. Have no indentation (true lazy continuation)
1988                    // 2. Have sufficient indentation for the list type
1989                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
1990                    let line_content = line_info.content.trim();
1991                    let is_structural_separator = line_info.heading.is_some()
1992                        || line_content.starts_with("```")
1993                        || line_content.starts_with("~~~")
1994                        || line_content.starts_with("---")
1995                        || line_content.starts_with("***")
1996                        || line_content.starts_with("___")
1997                        || line_content.starts_with(">")
1998                        || (line_content.contains('|')
1999                            && !line_content.contains("](")
2000                            && !line_content.contains("http")
2001                            && (line_content.matches('|').count() > 1
2002                                || line_content.starts_with('|')
2003                                || line_content.ends_with('|'))); // Tables
2004
2005                    // Allow lazy continuation if we're still within the same list block
2006                    // (not just immediately after a list item)
2007                    let is_lazy_continuation = !is_structural_separator
2008                        && !line_info.is_blank
2009                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2010
2011                    if is_lazy_continuation {
2012                        // Additional check: if the line starts with uppercase and looks like a new sentence,
2013                        // it's probably not a continuation
2014                        let content_to_check = if !blockquote_prefix.is_empty() {
2015                            // Strip blockquote prefix to check the actual content
2016                            line_info
2017                                .content
2018                                .strip_prefix(&blockquote_prefix)
2019                                .unwrap_or(&line_info.content)
2020                                .trim()
2021                        } else {
2022                            line_info.content.trim()
2023                        };
2024
2025                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2026
2027                        // If it starts with uppercase and the previous line ended with punctuation,
2028                        // it's likely a new paragraph, not a continuation
2029                        if starts_with_uppercase && last_list_item_line > 0 {
2030                            // This looks like a new paragraph
2031                            list_blocks.push(block.clone());
2032                            current_block = None;
2033                        } else {
2034                            // This is a lazy continuation line
2035                            block.end_line = line_num;
2036                        }
2037                    } else {
2038                        // Non-indented, non-blank line that's not a lazy continuation - end the block
2039                        list_blocks.push(block.clone());
2040                        current_block = None;
2041                    }
2042                }
2043            }
2044        }
2045
2046        // Don't forget the last block
2047        if let Some(block) = current_block {
2048            list_blocks.push(block);
2049        }
2050
2051        // Merge adjacent blocks that should be one
2052        merge_adjacent_list_blocks(&mut list_blocks, lines);
2053
2054        list_blocks
2055    }
2056
2057    /// Compute character frequency for fast content analysis
2058    fn compute_char_frequency(content: &str) -> CharFrequency {
2059        let mut frequency = CharFrequency::default();
2060
2061        for ch in content.chars() {
2062            match ch {
2063                '#' => frequency.hash_count += 1,
2064                '*' => frequency.asterisk_count += 1,
2065                '_' => frequency.underscore_count += 1,
2066                '-' => frequency.hyphen_count += 1,
2067                '+' => frequency.plus_count += 1,
2068                '>' => frequency.gt_count += 1,
2069                '|' => frequency.pipe_count += 1,
2070                '[' => frequency.bracket_count += 1,
2071                '`' => frequency.backtick_count += 1,
2072                '<' => frequency.lt_count += 1,
2073                '!' => frequency.exclamation_count += 1,
2074                '\n' => frequency.newline_count += 1,
2075                _ => {}
2076            }
2077        }
2078
2079        frequency
2080    }
2081
2082    /// Parse HTML tags in the content
2083    fn parse_html_tags(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<HtmlTag> {
2084        lazy_static! {
2085            static ref HTML_TAG_REGEX: regex::Regex =
2086                regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap();
2087        }
2088
2089        let mut html_tags = Vec::with_capacity(content.matches('<').count());
2090
2091        for cap in HTML_TAG_REGEX.captures_iter(content) {
2092            let full_match = cap.get(0).unwrap();
2093            let match_start = full_match.start();
2094            let match_end = full_match.end();
2095
2096            // Skip if in code block
2097            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2098                continue;
2099            }
2100
2101            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2102            let tag_name = cap.get(2).unwrap().as_str().to_lowercase();
2103            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2104
2105            // Find which line this tag is on
2106            let mut line_num = 1;
2107            let mut col_start = match_start;
2108            let mut col_end = match_end;
2109            for (idx, line_info) in lines.iter().enumerate() {
2110                if match_start >= line_info.byte_offset {
2111                    line_num = idx + 1;
2112                    col_start = match_start - line_info.byte_offset;
2113                    col_end = match_end - line_info.byte_offset;
2114                } else {
2115                    break;
2116                }
2117            }
2118
2119            html_tags.push(HtmlTag {
2120                line: line_num,
2121                start_col: col_start,
2122                end_col: col_end,
2123                byte_offset: match_start,
2124                byte_end: match_end,
2125                tag_name,
2126                is_closing,
2127                is_self_closing,
2128                raw_content: full_match.as_str().to_string(),
2129            });
2130        }
2131
2132        html_tags
2133    }
2134
2135    /// Parse emphasis spans in the content
2136    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2137        lazy_static! {
2138            static ref EMPHASIS_REGEX: regex::Regex =
2139                regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap();
2140        }
2141
2142        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2143
2144        for cap in EMPHASIS_REGEX.captures_iter(content) {
2145            let full_match = cap.get(0).unwrap();
2146            let match_start = full_match.start();
2147            let match_end = full_match.end();
2148
2149            // Skip if in code block
2150            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2151                continue;
2152            }
2153
2154            let opening_markers = cap.get(1).unwrap().as_str();
2155            let content_part = cap.get(2).unwrap().as_str();
2156            let closing_markers = cap.get(3).unwrap().as_str();
2157
2158            // Validate matching markers
2159            if opening_markers.chars().next() != closing_markers.chars().next()
2160                || opening_markers.len() != closing_markers.len()
2161            {
2162                continue;
2163            }
2164
2165            let marker = opening_markers.chars().next().unwrap();
2166            let marker_count = opening_markers.len();
2167
2168            // Find which line this emphasis is on
2169            let mut line_num = 1;
2170            let mut col_start = match_start;
2171            let mut col_end = match_end;
2172            for (idx, line_info) in lines.iter().enumerate() {
2173                if match_start >= line_info.byte_offset {
2174                    line_num = idx + 1;
2175                    col_start = match_start - line_info.byte_offset;
2176                    col_end = match_end - line_info.byte_offset;
2177                } else {
2178                    break;
2179                }
2180            }
2181
2182            emphasis_spans.push(EmphasisSpan {
2183                line: line_num,
2184                start_col: col_start,
2185                end_col: col_end,
2186                byte_offset: match_start,
2187                byte_end: match_end,
2188                marker,
2189                marker_count,
2190                content: content_part.to_string(),
2191            });
2192        }
2193
2194        emphasis_spans
2195    }
2196
2197    /// Parse table rows in the content
2198    fn parse_table_rows(lines: &[LineInfo]) -> Vec<TableRow> {
2199        let mut table_rows = Vec::with_capacity(lines.len() / 20);
2200
2201        for (line_idx, line_info) in lines.iter().enumerate() {
2202            // Skip lines in code blocks or blank lines
2203            if line_info.in_code_block || line_info.is_blank {
2204                continue;
2205            }
2206
2207            let line = &line_info.content;
2208            let line_num = line_idx + 1;
2209
2210            // Check if this line contains pipes (potential table row)
2211            if !line.contains('|') {
2212                continue;
2213            }
2214
2215            // Count columns by splitting on pipes
2216            let parts: Vec<&str> = line.split('|').collect();
2217            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2218
2219            // Check if this is a separator row
2220            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2221            let mut column_alignments = Vec::new();
2222
2223            if is_separator {
2224                for part in &parts[1..parts.len() - 1] {
2225                    // Skip first and last empty parts
2226                    let trimmed = part.trim();
2227                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2228                        "center".to_string()
2229                    } else if trimmed.ends_with(':') {
2230                        "right".to_string()
2231                    } else if trimmed.starts_with(':') {
2232                        "left".to_string()
2233                    } else {
2234                        "none".to_string()
2235                    };
2236                    column_alignments.push(alignment);
2237                }
2238            }
2239
2240            table_rows.push(TableRow {
2241                line: line_num,
2242                is_separator,
2243                column_count,
2244                column_alignments,
2245            });
2246        }
2247
2248        table_rows
2249    }
2250
2251    /// Parse bare URLs and emails in the content
2252    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2253        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2254
2255        // Check for bare URLs (not in angle brackets or markdown links)
2256        for cap in BARE_URL_PATTERN.captures_iter(content) {
2257            let full_match = cap.get(0).unwrap();
2258            let match_start = full_match.start();
2259            let match_end = full_match.end();
2260
2261            // Skip if in code block
2262            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2263                continue;
2264            }
2265
2266            // Skip if already in angle brackets or markdown links
2267            let preceding_char = if match_start > 0 {
2268                content.chars().nth(match_start - 1)
2269            } else {
2270                None
2271            };
2272            let following_char = content.chars().nth(match_end);
2273
2274            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2275                continue;
2276            }
2277            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2278                continue;
2279            }
2280
2281            let url = full_match.as_str();
2282            let url_type = if url.starts_with("https://") {
2283                "https"
2284            } else if url.starts_with("http://") {
2285                "http"
2286            } else if url.starts_with("ftp://") {
2287                "ftp"
2288            } else {
2289                "other"
2290            };
2291
2292            // Find which line this URL is on
2293            let mut line_num = 1;
2294            let mut col_start = match_start;
2295            let mut col_end = match_end;
2296            for (idx, line_info) in lines.iter().enumerate() {
2297                if match_start >= line_info.byte_offset {
2298                    line_num = idx + 1;
2299                    col_start = match_start - line_info.byte_offset;
2300                    col_end = match_end - line_info.byte_offset;
2301                } else {
2302                    break;
2303                }
2304            }
2305
2306            bare_urls.push(BareUrl {
2307                line: line_num,
2308                start_col: col_start,
2309                end_col: col_end,
2310                byte_offset: match_start,
2311                byte_end: match_end,
2312                url: url.to_string(),
2313                url_type: url_type.to_string(),
2314            });
2315        }
2316
2317        // Check for bare email addresses
2318        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
2319            let full_match = cap.get(0).unwrap();
2320            let match_start = full_match.start();
2321            let match_end = full_match.end();
2322
2323            // Skip if in code block
2324            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2325                continue;
2326            }
2327
2328            // Skip if already in angle brackets or markdown links
2329            let preceding_char = if match_start > 0 {
2330                content.chars().nth(match_start - 1)
2331            } else {
2332                None
2333            };
2334            let following_char = content.chars().nth(match_end);
2335
2336            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2337                continue;
2338            }
2339            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2340                continue;
2341            }
2342
2343            let email = full_match.as_str();
2344
2345            // Find which line this email is on
2346            let mut line_num = 1;
2347            let mut col_start = match_start;
2348            let mut col_end = match_end;
2349            for (idx, line_info) in lines.iter().enumerate() {
2350                if match_start >= line_info.byte_offset {
2351                    line_num = idx + 1;
2352                    col_start = match_start - line_info.byte_offset;
2353                    col_end = match_end - line_info.byte_offset;
2354                } else {
2355                    break;
2356                }
2357            }
2358
2359            bare_urls.push(BareUrl {
2360                line: line_num,
2361                start_col: col_start,
2362                end_col: col_end,
2363                byte_offset: match_start,
2364                byte_end: match_end,
2365                url: email.to_string(),
2366                url_type: "email".to_string(),
2367            });
2368        }
2369
2370        bare_urls
2371    }
2372}
2373
2374/// Merge adjacent list blocks that should be treated as one
2375fn merge_adjacent_list_blocks(list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
2376    if list_blocks.len() < 2 {
2377        return;
2378    }
2379
2380    let mut merger = ListBlockMerger::new(lines);
2381    *list_blocks = merger.merge(list_blocks);
2382}
2383
2384/// Helper struct to manage the complex logic of merging list blocks
2385struct ListBlockMerger<'a> {
2386    lines: &'a [LineInfo],
2387}
2388
2389impl<'a> ListBlockMerger<'a> {
2390    fn new(lines: &'a [LineInfo]) -> Self {
2391        Self { lines }
2392    }
2393
2394    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
2395        let mut merged = Vec::with_capacity(list_blocks.len());
2396        let mut current = list_blocks[0].clone();
2397
2398        for next in list_blocks.iter().skip(1) {
2399            if self.should_merge_blocks(&current, next) {
2400                current = self.merge_two_blocks(current, next);
2401            } else {
2402                merged.push(current);
2403                current = next.clone();
2404            }
2405        }
2406
2407        merged.push(current);
2408        merged
2409    }
2410
2411    /// Determine if two adjacent list blocks should be merged
2412    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
2413        // Basic compatibility checks
2414        if !self.blocks_are_compatible(current, next) {
2415            return false;
2416        }
2417
2418        // Check spacing and content between blocks
2419        let spacing = self.analyze_spacing_between(current, next);
2420        match spacing {
2421            BlockSpacing::Consecutive => true,
2422            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
2423            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
2424                self.can_merge_with_content_between(current, next)
2425            }
2426        }
2427    }
2428
2429    /// Check if blocks have compatible structure for merging
2430    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
2431        current.is_ordered == next.is_ordered
2432            && current.blockquote_prefix == next.blockquote_prefix
2433            && current.nesting_level == next.nesting_level
2434    }
2435
2436    /// Analyze the spacing between two list blocks
2437    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
2438        let gap = next.start_line - current.end_line;
2439
2440        match gap {
2441            1 => BlockSpacing::Consecutive,
2442            2 => BlockSpacing::SingleBlank,
2443            _ if gap > 2 => {
2444                if self.has_only_blank_lines_between(current, next) {
2445                    BlockSpacing::MultipleBlanks
2446                } else {
2447                    BlockSpacing::ContentBetween
2448                }
2449            }
2450            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
2451        }
2452    }
2453
2454    /// Check if unordered lists can be merged with a single blank line between
2455    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2456        // Check if there are structural separators between the blocks
2457        // If has_meaningful_content_between returns true, it means there are structural separators
2458        if has_meaningful_content_between(current, next, self.lines) {
2459            return false; // Structural separators prevent merging
2460        }
2461
2462        // Only merge unordered lists with same marker across single blank
2463        !current.is_ordered && current.marker == next.marker
2464    }
2465
2466    /// Check if ordered lists can be merged when there's content between them
2467    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2468        // Do not merge lists if there are structural separators between them
2469        if has_meaningful_content_between(current, next, self.lines) {
2470            return false; // Structural separators prevent merging
2471        }
2472
2473        // Only consider merging ordered lists if there's no structural content between
2474        current.is_ordered && next.is_ordered
2475    }
2476
2477    /// Check if there are only blank lines between blocks
2478    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2479        for line_num in (current.end_line + 1)..next.start_line {
2480            if let Some(line_info) = self.lines.get(line_num - 1)
2481                && !line_info.content.trim().is_empty()
2482            {
2483                return false;
2484            }
2485        }
2486        true
2487    }
2488
2489    /// Merge two compatible list blocks into one
2490    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
2491        current.end_line = next.end_line;
2492        current.item_lines.extend_from_slice(&next.item_lines);
2493
2494        // Update max marker width
2495        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
2496
2497        // Handle marker consistency for unordered lists
2498        if !current.is_ordered && self.markers_differ(&current, next) {
2499            current.marker = None; // Mixed markers
2500        }
2501
2502        current
2503    }
2504
2505    /// Check if two blocks have different markers
2506    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
2507        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
2508    }
2509}
2510
2511/// Types of spacing between list blocks
2512#[derive(Debug, PartialEq)]
2513enum BlockSpacing {
2514    Consecutive,    // No gap between blocks
2515    SingleBlank,    // One blank line between blocks
2516    MultipleBlanks, // Multiple blank lines but no content
2517    ContentBetween, // Content exists between blocks
2518}
2519
2520/// Check if there's meaningful content (not just blank lines) between two list blocks
2521fn has_meaningful_content_between(current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
2522    // Check lines between current.end_line and next.start_line
2523    for line_num in (current.end_line + 1)..next.start_line {
2524        if let Some(line_info) = lines.get(line_num - 1) {
2525            // Convert to 0-indexed
2526            let trimmed = line_info.content.trim();
2527
2528            // Skip empty lines
2529            if trimmed.is_empty() {
2530                continue;
2531            }
2532
2533            // Check for structural separators that should separate lists (CommonMark compliant)
2534
2535            // Headings separate lists
2536            if line_info.heading.is_some() {
2537                return true; // Has meaningful content - headings separate lists
2538            }
2539
2540            // Horizontal rules separate lists (---, ***, ___)
2541            if is_horizontal_rule(trimmed) {
2542                return true; // Has meaningful content - horizontal rules separate lists
2543            }
2544
2545            // Tables separate lists (lines containing | but not in URLs or code)
2546            // Simple heuristic: tables typically have | at start/end or multiple |
2547            if trimmed.contains('|') && trimmed.len() > 1 {
2548                // Don't treat URLs with | as tables
2549                if !trimmed.contains("](") && !trimmed.contains("http") {
2550                    // More robust check: tables usually have multiple | or | at edges
2551                    let pipe_count = trimmed.matches('|').count();
2552                    if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
2553                        return true; // Has meaningful content - tables separate lists
2554                    }
2555                }
2556            }
2557
2558            // Blockquotes separate lists
2559            if trimmed.starts_with('>') {
2560                return true; // Has meaningful content - blockquotes separate lists
2561            }
2562
2563            // Code block fences separate lists (unless properly indented as list content)
2564            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2565                let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2566
2567                // Check if this code block is properly indented as list continuation
2568                let min_continuation_indent = if current.is_ordered {
2569                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
2570                } else {
2571                    current.nesting_level + 2
2572                };
2573
2574                if line_indent < min_continuation_indent {
2575                    // This is a standalone code block that separates lists
2576                    return true; // Has meaningful content - standalone code blocks separate lists
2577                }
2578            }
2579
2580            // Check if this line has proper indentation for list continuation
2581            let line_indent = line_info.content.len() - line_info.content.trim_start().len();
2582
2583            // Calculate minimum indentation needed to be list continuation
2584            let min_indent = if current.is_ordered {
2585                current.nesting_level + current.max_marker_width
2586            } else {
2587                current.nesting_level + 2
2588            };
2589
2590            // If the line is not indented enough to be list continuation, it's meaningful content
2591            if line_indent < min_indent {
2592                return true; // Has meaningful content - content not indented as list continuation
2593            }
2594
2595            // If we reach here, the line is properly indented as list continuation
2596            // Continue checking other lines
2597        }
2598    }
2599
2600    // Only blank lines or properly indented list continuation content between blocks
2601    false
2602}
2603
2604/// Check if a line is a horizontal rule (---, ***, ___)
2605fn is_horizontal_rule(trimmed: &str) -> bool {
2606    if trimmed.len() < 3 {
2607        return false;
2608    }
2609
2610    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
2611    let chars: Vec<char> = trimmed.chars().collect();
2612    if let Some(&first_char) = chars.first()
2613        && (first_char == '-' || first_char == '*' || first_char == '_')
2614    {
2615        let mut count = 0;
2616        for &ch in &chars {
2617            if ch == first_char {
2618                count += 1;
2619            } else if ch != ' ' && ch != '\t' {
2620                return false; // Non-matching, non-whitespace character
2621            }
2622        }
2623        return count >= 3;
2624    }
2625    false
2626}
2627
2628/// Check if content contains patterns that cause the markdown crate to panic
2629#[cfg(test)]
2630mod tests {
2631    use super::*;
2632
2633    #[test]
2634    fn test_empty_content() {
2635        let ctx = LintContext::new("", MarkdownFlavor::Standard);
2636        assert_eq!(ctx.content, "");
2637        assert_eq!(ctx.line_offsets, vec![0]);
2638        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2639        assert_eq!(ctx.lines.len(), 0);
2640    }
2641
2642    #[test]
2643    fn test_single_line() {
2644        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
2645        assert_eq!(ctx.content, "# Hello");
2646        assert_eq!(ctx.line_offsets, vec![0]);
2647        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
2648        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
2649    }
2650
2651    #[test]
2652    fn test_multi_line() {
2653        let content = "# Title\n\nSecond line\nThird line";
2654        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2655        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
2656        // Test offset to line/col
2657        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
2658        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
2659        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
2660        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
2661        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
2662    }
2663
2664    #[test]
2665    fn test_line_info() {
2666        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
2667        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2668
2669        // Test line info
2670        assert_eq!(ctx.lines.len(), 7);
2671
2672        // Line 1: "# Title"
2673        let line1 = &ctx.lines[0];
2674        assert_eq!(line1.content, "# Title");
2675        assert_eq!(line1.byte_offset, 0);
2676        assert_eq!(line1.indent, 0);
2677        assert!(!line1.is_blank);
2678        assert!(!line1.in_code_block);
2679        assert!(line1.list_item.is_none());
2680
2681        // Line 2: "    indented"
2682        let line2 = &ctx.lines[1];
2683        assert_eq!(line2.content, "    indented");
2684        assert_eq!(line2.byte_offset, 8);
2685        assert_eq!(line2.indent, 4);
2686        assert!(!line2.is_blank);
2687
2688        // Line 3: "" (blank)
2689        let line3 = &ctx.lines[2];
2690        assert_eq!(line3.content, "");
2691        assert!(line3.is_blank);
2692
2693        // Test helper methods
2694        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
2695        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
2696        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
2697        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
2698    }
2699
2700    #[test]
2701    fn test_list_item_detection() {
2702        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
2703        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2704
2705        // Line 1: "- Unordered item"
2706        let line1 = &ctx.lines[0];
2707        assert!(line1.list_item.is_some());
2708        let list1 = line1.list_item.as_ref().unwrap();
2709        assert_eq!(list1.marker, "-");
2710        assert!(!list1.is_ordered);
2711        assert_eq!(list1.marker_column, 0);
2712        assert_eq!(list1.content_column, 2);
2713
2714        // Line 2: "  * Nested item"
2715        let line2 = &ctx.lines[1];
2716        assert!(line2.list_item.is_some());
2717        let list2 = line2.list_item.as_ref().unwrap();
2718        assert_eq!(list2.marker, "*");
2719        assert_eq!(list2.marker_column, 2);
2720
2721        // Line 3: "1. Ordered item"
2722        let line3 = &ctx.lines[2];
2723        assert!(line3.list_item.is_some());
2724        let list3 = line3.list_item.as_ref().unwrap();
2725        assert_eq!(list3.marker, "1.");
2726        assert!(list3.is_ordered);
2727        assert_eq!(list3.number, Some(1));
2728
2729        // Line 6: "Not a list"
2730        let line6 = &ctx.lines[5];
2731        assert!(line6.list_item.is_none());
2732    }
2733
2734    #[test]
2735    fn test_offset_to_line_col_edge_cases() {
2736        let content = "a\nb\nc";
2737        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
2738        // line_offsets: [0, 2, 4]
2739        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
2740        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
2741        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
2742        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
2743        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
2744        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
2745    }
2746}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs