rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12/// Macro for profiling sections - only active in non-WASM builds
13#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15    ($name:expr, $profile:expr, $code:expr) => {{
16        let start = std::time::Instant::now();
17        let result = $code;
18        if $profile {
19            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20        }
21        result
22    }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30// Comprehensive link pattern that captures both inline and reference links
31// Use (?s) flag to make . match newlines
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        r#"(?sx)
35        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36        (?:
37            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
38            |
39            \[([^\]]*)\]      # Reference ID in group 6
40        )"#
41    ).unwrap()
42});
43
44// Image pattern (similar to links but with ! prefix)
45// Use (?s) flag to make . match newlines
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(
48        r#"(?sx)
49        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50        (?:
51            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
52            |
53            \[([^\]]*)\]      # Reference ID in group 6
54        )"#
55    ).unwrap()
56});
57
58// Reference definition pattern
59static REF_DEF_PATTERN: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62// Pattern for bare URLs - uses centralized URL pattern from regex_cache
63
64// Pattern for email addresses
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68// Pattern for blockquote prefix in parse_list_blocks
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71/// Pre-computed information about a line
72#[derive(Debug, Clone)]
73pub struct LineInfo {
74    /// Byte offset where this line starts in the document
75    pub byte_offset: usize,
76    /// Length of the line in bytes (without newline)
77    pub byte_len: usize,
78    /// Number of bytes of leading whitespace (for substring extraction)
79    pub indent: usize,
80    /// Visual column width of leading whitespace (with proper tab expansion)
81    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
82    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
83    pub visual_indent: usize,
84    /// Whether the line is blank (empty or only whitespace)
85    pub is_blank: bool,
86    /// Whether this line is inside a code block
87    pub in_code_block: bool,
88    /// Whether this line is inside front matter
89    pub in_front_matter: bool,
90    /// Whether this line is inside an HTML block
91    pub in_html_block: bool,
92    /// Whether this line is inside an HTML comment
93    pub in_html_comment: bool,
94    /// List item information if this line starts a list item
95    pub list_item: Option<ListItemInfo>,
96    /// Heading information if this line is a heading
97    pub heading: Option<HeadingInfo>,
98    /// Blockquote information if this line is a blockquote
99    pub blockquote: Option<BlockquoteInfo>,
100    /// Whether this line is inside a mkdocstrings autodoc block
101    pub in_mkdocstrings: bool,
102    /// Whether this line is part of an ESM import/export block (MDX only)
103    pub in_esm_block: bool,
104    /// Whether this line is a continuation of a multi-line code span from a previous line
105    pub in_code_span_continuation: bool,
106    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
107    /// Pre-computed for consistent detection across all rules
108    pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112    /// Get the line content as a string slice from the source document
113    pub fn content<'a>(&self, source: &'a str) -> &'a str {
114        &source[self.byte_offset..self.byte_offset + self.byte_len]
115    }
116}
117
118/// Information about a list item
119#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121    /// The marker used (*, -, +, or number with . or ))
122    pub marker: String,
123    /// Whether it's ordered (true) or unordered (false)
124    pub is_ordered: bool,
125    /// The number for ordered lists
126    pub number: Option<usize>,
127    /// Column where the marker starts (0-based)
128    pub marker_column: usize,
129    /// Column where content after marker starts
130    pub content_column: usize,
131}
132
133/// Heading style type
134#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136    /// ATX style heading (# Heading)
137    ATX,
138    /// Setext style heading with = underline
139    Setext1,
140    /// Setext style heading with - underline
141    Setext2,
142}
143
144/// Parsed link information
145#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147    /// Line number (1-indexed)
148    pub line: usize,
149    /// Start column (0-indexed) in the line
150    pub start_col: usize,
151    /// End column (0-indexed) in the line
152    pub end_col: usize,
153    /// Byte offset in document
154    pub byte_offset: usize,
155    /// End byte offset in document
156    pub byte_end: usize,
157    /// Link text
158    pub text: Cow<'a, str>,
159    /// Link URL or reference
160    pub url: Cow<'a, str>,
161    /// Whether this is a reference link [text][ref] vs inline [text](url)
162    pub is_reference: bool,
163    /// Reference ID for reference links
164    pub reference_id: Option<Cow<'a, str>>,
165    /// Link type from pulldown-cmark
166    pub link_type: LinkType,
167}
168
169/// Information about a broken link reported by pulldown-cmark
170#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172    /// The reference text that couldn't be resolved
173    pub reference: String,
174    /// Byte span in the source document
175    pub span: std::ops::Range<usize>,
176}
177
178/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
179#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181    /// The footnote ID (without the ^ prefix)
182    pub id: String,
183    /// Line number (1-indexed)
184    pub line: usize,
185    /// Start byte offset in document
186    pub byte_offset: usize,
187    /// End byte offset in document
188    pub byte_end: usize,
189}
190
191/// Parsed image information
192#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194    /// Line number (1-indexed)
195    pub line: usize,
196    /// Start column (0-indexed) in the line
197    pub start_col: usize,
198    /// End column (0-indexed) in the line
199    pub end_col: usize,
200    /// Byte offset in document
201    pub byte_offset: usize,
202    /// End byte offset in document
203    pub byte_end: usize,
204    /// Alt text
205    pub alt_text: Cow<'a, str>,
206    /// Image URL or reference
207    pub url: Cow<'a, str>,
208    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
209    pub is_reference: bool,
210    /// Reference ID for reference images
211    pub reference_id: Option<Cow<'a, str>>,
212    /// Link type from pulldown-cmark
213    pub link_type: LinkType,
214}
215
216/// Reference definition [ref]: url "title"
217#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219    /// Line number (1-indexed)
220    pub line: usize,
221    /// Reference ID (normalized to lowercase)
222    pub id: String,
223    /// URL
224    pub url: String,
225    /// Optional title
226    pub title: Option<String>,
227    /// Byte offset where the reference definition starts
228    pub byte_offset: usize,
229    /// Byte offset where the reference definition ends
230    pub byte_end: usize,
231    /// Byte offset where the title starts (if present, includes quote)
232    pub title_byte_start: Option<usize>,
233    /// Byte offset where the title ends (if present, includes quote)
234    pub title_byte_end: Option<usize>,
235}
236
237/// Parsed code span information
238#[derive(Debug, Clone)]
239pub struct CodeSpan {
240    /// Line number where the code span starts (1-indexed)
241    pub line: usize,
242    /// Line number where the code span ends (1-indexed)
243    pub end_line: usize,
244    /// Start column (0-indexed) in the line
245    pub start_col: usize,
246    /// End column (0-indexed) in the line
247    pub end_col: usize,
248    /// Byte offset in document
249    pub byte_offset: usize,
250    /// End byte offset in document
251    pub byte_end: usize,
252    /// Number of backticks used (1, 2, 3, etc.)
253    pub backtick_count: usize,
254    /// Content inside the code span (without backticks)
255    pub content: String,
256}
257
258/// Information about a heading
259#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261    /// Heading level (1-6 for ATX, 1-2 for Setext)
262    pub level: u8,
263    /// Style of heading
264    pub style: HeadingStyle,
265    /// The heading marker (# characters or underline)
266    pub marker: String,
267    /// Column where the marker starts (0-based)
268    pub marker_column: usize,
269    /// Column where heading text starts
270    pub content_column: usize,
271    /// The heading text (without markers and without custom ID syntax)
272    pub text: String,
273    /// Custom header ID if present (e.g., from {#custom-id} syntax)
274    pub custom_id: Option<String>,
275    /// Original heading text including custom ID syntax
276    pub raw_text: String,
277    /// Whether it has a closing sequence (for ATX)
278    pub has_closing_sequence: bool,
279    /// The closing sequence if present
280    pub closing_sequence: String,
281    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
282    /// False for malformed headings like `#NoSpace` that MD018 should flag
283    pub is_valid: bool,
284}
285
286/// A valid heading from a filtered iteration
287///
288/// Only includes headings that are CommonMark-compliant (have space after #).
289/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
290#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292    /// The 1-indexed line number in the document
293    pub line_num: usize,
294    /// Reference to the heading information
295    pub heading: &'a HeadingInfo,
296    /// Reference to the full line info (for rules that need additional context)
297    pub line_info: &'a LineInfo,
298}
299
300/// Iterator over valid CommonMark headings in a document
301///
302/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
303/// but should not be processed by other heading rules.
304pub struct ValidHeadingsIter<'a> {
305    lines: &'a [LineInfo],
306    current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310    fn new(lines: &'a [LineInfo]) -> Self {
311        Self {
312            lines,
313            current_index: 0,
314        }
315    }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319    type Item = ValidHeading<'a>;
320
321    fn next(&mut self) -> Option<Self::Item> {
322        while self.current_index < self.lines.len() {
323            let idx = self.current_index;
324            self.current_index += 1;
325
326            let line_info = &self.lines[idx];
327            if let Some(heading) = &line_info.heading
328                && heading.is_valid
329            {
330                return Some(ValidHeading {
331                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
332                    heading,
333                    line_info,
334                });
335            }
336        }
337        None
338    }
339}
340
341/// Information about a blockquote line
342#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344    /// Nesting level (1 for >, 2 for >>, etc.)
345    pub nesting_level: usize,
346    /// The indentation before the blockquote marker
347    pub indent: String,
348    /// Column where the first > starts (0-based)
349    pub marker_column: usize,
350    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
351    pub prefix: String,
352    /// Content after the blockquote marker(s)
353    pub content: String,
354    /// Whether the line has no space after the marker
355    pub has_no_space_after_marker: bool,
356    /// Whether the line has multiple spaces after the marker
357    pub has_multiple_spaces_after_marker: bool,
358    /// Whether this is an empty blockquote line needing MD028 fix
359    pub needs_md028_fix: bool,
360}
361
362/// Information about a list block
363#[derive(Debug, Clone)]
364pub struct ListBlock {
365    /// Line number where the list starts (1-indexed)
366    pub start_line: usize,
367    /// Line number where the list ends (1-indexed)
368    pub end_line: usize,
369    /// Whether it's ordered or unordered
370    pub is_ordered: bool,
371    /// The consistent marker for unordered lists (if any)
372    pub marker: Option<String>,
373    /// Blockquote prefix for this list (empty if not in blockquote)
374    pub blockquote_prefix: String,
375    /// Lines that are list items within this block
376    pub item_lines: Vec<usize>,
377    /// Nesting level (0 for top-level lists)
378    pub nesting_level: usize,
379    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
380    pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385/// Character frequency data for fast content analysis
386#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388    /// Count of # characters (headings)
389    pub hash_count: usize,
390    /// Count of * characters (emphasis, lists, horizontal rules)
391    pub asterisk_count: usize,
392    /// Count of _ characters (emphasis, horizontal rules)
393    pub underscore_count: usize,
394    /// Count of - characters (lists, horizontal rules, setext headings)
395    pub hyphen_count: usize,
396    /// Count of + characters (lists)
397    pub plus_count: usize,
398    /// Count of > characters (blockquotes)
399    pub gt_count: usize,
400    /// Count of | characters (tables)
401    pub pipe_count: usize,
402    /// Count of [ characters (links, images)
403    pub bracket_count: usize,
404    /// Count of ` characters (code spans, code blocks)
405    pub backtick_count: usize,
406    /// Count of < characters (HTML tags, autolinks)
407    pub lt_count: usize,
408    /// Count of ! characters (images)
409    pub exclamation_count: usize,
410    /// Count of newline characters
411    pub newline_count: usize,
412}
413
414/// Pre-parsed HTML tag information
415#[derive(Debug, Clone)]
416pub struct HtmlTag {
417    /// Line number (1-indexed)
418    pub line: usize,
419    /// Start column (0-indexed) in the line
420    pub start_col: usize,
421    /// End column (0-indexed) in the line
422    pub end_col: usize,
423    /// Byte offset in document
424    pub byte_offset: usize,
425    /// End byte offset in document
426    pub byte_end: usize,
427    /// Tag name (e.g., "div", "img", "br")
428    pub tag_name: String,
429    /// Whether it's a closing tag (`</tag>`)
430    pub is_closing: bool,
431    /// Whether it's self-closing (`<tag />`)
432    pub is_self_closing: bool,
433    /// Raw tag content
434    pub raw_content: String,
435}
436
437/// Pre-parsed emphasis span information
438#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440    /// Line number (1-indexed)
441    pub line: usize,
442    /// Start column (0-indexed) in the line
443    pub start_col: usize,
444    /// End column (0-indexed) in the line
445    pub end_col: usize,
446    /// Byte offset in document
447    pub byte_offset: usize,
448    /// End byte offset in document
449    pub byte_end: usize,
450    /// Type of emphasis ('*' or '_')
451    pub marker: char,
452    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
453    pub marker_count: usize,
454    /// Content inside the emphasis
455    pub content: String,
456}
457
458/// Pre-parsed table row information
459#[derive(Debug, Clone)]
460pub struct TableRow {
461    /// Line number (1-indexed)
462    pub line: usize,
463    /// Whether this is a separator row (contains only |, -, :, and spaces)
464    pub is_separator: bool,
465    /// Number of columns (pipe-separated cells)
466    pub column_count: usize,
467    /// Alignment info from separator row
468    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
469}
470
471/// Pre-parsed bare URL information (not in links)
472#[derive(Debug, Clone)]
473pub struct BareUrl {
474    /// Line number (1-indexed)
475    pub line: usize,
476    /// Start column (0-indexed) in the line
477    pub start_col: usize,
478    /// End column (0-indexed) in the line
479    pub end_col: usize,
480    /// Byte offset in document
481    pub byte_offset: usize,
482    /// End byte offset in document
483    pub byte_end: usize,
484    /// The URL string
485    pub url: String,
486    /// Type of URL ("http", "https", "ftp", "email")
487    pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491    pub content: &'a str,
492    pub line_offsets: Vec<usize>,
493    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
494    pub lines: Vec<LineInfo>,             // Pre-computed line information
495    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
496    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
497    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
498    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
499    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
500    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
501    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
502    pub char_frequency: CharFrequency,    // Character frequency analysis
503    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
504    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
505    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
506    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
507    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
508    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
509    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
510    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
511    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
512    pub flavor: MarkdownFlavor,           // Markdown flavor being used
513    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
514}
515
516/// Detailed blockquote parse result with all components
517struct BlockquoteComponents<'a> {
518    indent: &'a str,
519    markers: &'a str,
520    spaces_after: &'a str,
521    content: &'a str,
522}
523
524/// Parse blockquote prefix with detailed components using manual parsing
525#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527    let bytes = line.as_bytes();
528    let mut pos = 0;
529
530    // Parse leading whitespace (indent)
531    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532        pos += 1;
533    }
534    let indent_end = pos;
535
536    // Must have at least one '>' marker
537    if pos >= bytes.len() || bytes[pos] != b'>' {
538        return None;
539    }
540
541    // Parse '>' markers
542    while pos < bytes.len() && bytes[pos] == b'>' {
543        pos += 1;
544    }
545    let markers_end = pos;
546
547    // Parse spaces after markers
548    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549        pos += 1;
550    }
551    let spaces_end = pos;
552
553    Some(BlockquoteComponents {
554        indent: &line[0..indent_end],
555        markers: &line[indent_end..markers_end],
556        spaces_after: &line[markers_end..spaces_end],
557        content: &line[spaces_end..],
558    })
559}
560
561impl<'a> LintContext<'a> {
562    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563        #[cfg(not(target_arch = "wasm32"))]
564        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565        #[cfg(target_arch = "wasm32")]
566        let profile = false;
567
568        let line_offsets = profile_section!("Line offsets", profile, {
569            let mut offsets = vec![0];
570            for (i, c) in content.char_indices() {
571                if c == '\n' {
572                    offsets.push(i + 1);
573                }
574            }
575            offsets
576        });
577
578        // Detect code blocks once and cache them
579        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581        // Pre-compute HTML comment ranges ONCE for all operations
582        let html_comment_ranges = profile_section!(
583            "HTML comment ranges",
584            profile,
585            crate::utils::skip_context::compute_html_comment_ranges(content)
586        );
587
588        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
589        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590            if flavor == MarkdownFlavor::MkDocs {
591                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592            } else {
593                Vec::new()
594            }
595        });
596
597        // Pre-compute line information (without headings/blockquotes yet)
598        let mut lines = profile_section!(
599            "Basic line info",
600            profile,
601            Self::compute_basic_line_info(
602                content,
603                &line_offsets,
604                &code_blocks,
605                flavor,
606                &html_comment_ranges,
607                &autodoc_ranges,
608            )
609        );
610
611        // Detect HTML blocks BEFORE heading detection
612        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614        // Detect ESM import/export blocks in MDX files BEFORE heading detection
615        profile_section!(
616            "ESM blocks",
617            profile,
618            Self::detect_esm_blocks(content, &mut lines, flavor)
619        );
620
621        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
622        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624        // Now detect headings and blockquotes
625        profile_section!(
626            "Headings & blockquotes",
627            profile,
628            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629        );
630
631        // Parse code spans early so we can exclude them from link/image parsing
632        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634        // Mark lines that are continuations of multi-line code spans
635        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
636        for span in &code_spans {
637            if span.end_line > span.line {
638                // Mark lines after the first line as continuations
639                for line_num in (span.line + 1)..=span.end_line {
640                    if let Some(line_info) = lines.get_mut(line_num - 1) {
641                        line_info.in_code_span_continuation = true;
642                    }
643                }
644            }
645        }
646
647        // Parse links, images, references, and list blocks
648        let (links, broken_links, footnote_refs) = profile_section!(
649            "Links",
650            profile,
651            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652        );
653
654        let images = profile_section!(
655            "Images",
656            profile,
657            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658        );
659
660        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664        // Compute character frequency for fast content analysis
665        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
668        let table_blocks = profile_section!(
669            "Table blocks",
670            profile,
671            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672                content,
673                &code_blocks,
674                &code_spans,
675                &html_comment_ranges,
676            )
677        );
678
679        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
680        let line_index = profile_section!(
681            "Line index",
682            profile,
683            crate::utils::range_utils::LineIndex::new(content)
684        );
685
686        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
687        let jinja_ranges = profile_section!(
688            "Jinja ranges",
689            profile,
690            crate::utils::jinja_utils::find_jinja_ranges(content)
691        );
692
693        Self {
694            content,
695            line_offsets,
696            code_blocks,
697            lines,
698            links,
699            images,
700            broken_links,
701            footnote_refs,
702            reference_defs,
703            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704            list_blocks,
705            char_frequency,
706            html_tags_cache: OnceLock::new(),
707            emphasis_spans_cache: OnceLock::new(),
708            table_rows_cache: OnceLock::new(),
709            bare_urls_cache: OnceLock::new(),
710            has_mixed_list_nesting_cache: OnceLock::new(),
711            html_comment_ranges,
712            table_blocks,
713            line_index,
714            jinja_ranges,
715            flavor,
716            source_file,
717        }
718    }
719
720    /// Get code spans - computed lazily on first access
721    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722        Arc::clone(
723            self.code_spans_cache
724                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725        )
726    }
727
728    /// Get HTML comment ranges - pre-computed during LintContext construction
729    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730        &self.html_comment_ranges
731    }
732
733    /// Get HTML tags - computed lazily on first access
734    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735        Arc::clone(self.html_tags_cache.get_or_init(|| {
736            Arc::new(Self::parse_html_tags(
737                self.content,
738                &self.lines,
739                &self.code_blocks,
740                self.flavor,
741            ))
742        }))
743    }
744
745    /// Get emphasis spans - computed lazily on first access
746    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747        Arc::clone(
748            self.emphasis_spans_cache
749                .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750        )
751    }
752
753    /// Get table rows - computed lazily on first access
754    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755        Arc::clone(
756            self.table_rows_cache
757                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758        )
759    }
760
761    /// Get bare URLs - computed lazily on first access
762    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763        Arc::clone(
764            self.bare_urls_cache
765                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766        )
767    }
768
769    /// Check if document has mixed ordered/unordered list nesting.
770    /// Result is cached after first computation (document-level invariant).
771    /// This is used by MD007 for smart style auto-detection.
772    pub fn has_mixed_list_nesting(&self) -> bool {
773        *self
774            .has_mixed_list_nesting_cache
775            .get_or_init(|| self.compute_mixed_list_nesting())
776    }
777
778    /// Internal computation for mixed list nesting (only called once per LintContext).
779    fn compute_mixed_list_nesting(&self) -> bool {
780        // Track parent list items by their marker position and type
781        // Using marker_column instead of indent because it works correctly
782        // for blockquoted content where indent doesn't account for the prefix
783        // Stack stores: (marker_column, is_ordered)
784        let mut stack: Vec<(usize, bool)> = Vec::new();
785        let mut last_was_blank = false;
786
787        for line_info in &self.lines {
788            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
789            if line_info.in_code_block
790                || line_info.in_front_matter
791                || line_info.in_mkdocstrings
792                || line_info.in_html_comment
793                || line_info.in_esm_block
794            {
795                continue;
796            }
797
798            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
799            if line_info.is_blank {
800                last_was_blank = true;
801                continue;
802            }
803
804            if let Some(list_item) = &line_info.list_item {
805                // Normalize column 1 to column 0 (consistent with MD007 check function)
806                let current_pos = if list_item.marker_column == 1 {
807                    0
808                } else {
809                    list_item.marker_column
810                };
811
812                // If there was a blank line and this item is at root level, reset stack
813                if last_was_blank && current_pos == 0 {
814                    stack.clear();
815                }
816                last_was_blank = false;
817
818                // Pop items at same or greater position (they're siblings or deeper, not parents)
819                while let Some(&(pos, _)) = stack.last() {
820                    if pos >= current_pos {
821                        stack.pop();
822                    } else {
823                        break;
824                    }
825                }
826
827                // Check if immediate parent has different type - this is mixed nesting
828                if let Some(&(_, parent_is_ordered)) = stack.last()
829                    && parent_is_ordered != list_item.is_ordered
830                {
831                    return true; // Found mixed nesting - early exit
832                }
833
834                stack.push((current_pos, list_item.is_ordered));
835            } else {
836                // Non-list line (but not blank) - could be paragraph or other content
837                last_was_blank = false;
838            }
839        }
840
841        false
842    }
843
844    /// Map a byte offset to (line, column)
845    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846        match self.line_offsets.binary_search(&offset) {
847            Ok(line) => (line + 1, 1),
848            Err(line) => {
849                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850                (line, offset - line_start + 1)
851            }
852        }
853    }
854
855    /// Check if a position is within a code block or code span
856    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857        // Check code blocks first
858        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859            return true;
860        }
861
862        // Check inline code spans (lazy load if needed)
863        self.code_spans()
864            .iter()
865            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866    }
867
868    /// Get line information by line number (1-indexed)
869    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870        if line_num > 0 {
871            self.lines.get(line_num - 1)
872        } else {
873            None
874        }
875    }
876
877    /// Get byte offset for a line number (1-indexed)
878    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879        self.line_info(line_num).map(|info| info.byte_offset)
880    }
881
882    /// Get URL for a reference link/image by its ID
883    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884        let normalized_id = ref_id.to_lowercase();
885        self.reference_defs
886            .iter()
887            .find(|def| def.id == normalized_id)
888            .map(|def| def.url.as_str())
889    }
890
891    /// Check if a line is part of a list block
892    pub fn is_in_list_block(&self, line_num: usize) -> bool {
893        self.list_blocks
894            .iter()
895            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896    }
897
898    /// Get the list block containing a specific line
899    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900        self.list_blocks
901            .iter()
902            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903    }
904
905    // Compatibility methods for DocumentStructure migration
906
907    /// Check if a line is within a code block
908    pub fn is_in_code_block(&self, line_num: usize) -> bool {
909        if line_num == 0 || line_num > self.lines.len() {
910            return false;
911        }
912        self.lines[line_num - 1].in_code_block
913    }
914
915    /// Check if a line is within front matter
916    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917        if line_num == 0 || line_num > self.lines.len() {
918            return false;
919        }
920        self.lines[line_num - 1].in_front_matter
921    }
922
923    /// Check if a line is within an HTML block
924    pub fn is_in_html_block(&self, line_num: usize) -> bool {
925        if line_num == 0 || line_num > self.lines.len() {
926            return false;
927        }
928        self.lines[line_num - 1].in_html_block
929    }
930
931    /// Check if a line and column is within a code span
932    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933        if line_num == 0 || line_num > self.lines.len() {
934            return false;
935        }
936
937        // Use the code spans cache to check
938        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
939        // Convert col to 0-indexed for comparison
940        let col_0indexed = if col > 0 { col - 1 } else { 0 };
941        let code_spans = self.code_spans();
942        code_spans.iter().any(|span| {
943            // Check if line is within the span's line range
944            if line_num < span.line || line_num > span.end_line {
945                return false;
946            }
947
948            if span.line == span.end_line {
949                // Single-line span: check column bounds
950                col_0indexed >= span.start_col && col_0indexed < span.end_col
951            } else if line_num == span.line {
952                // First line of multi-line span: anything after start_col is in span
953                col_0indexed >= span.start_col
954            } else if line_num == span.end_line {
955                // Last line of multi-line span: anything before end_col is in span
956                col_0indexed < span.end_col
957            } else {
958                // Middle line of multi-line span: entire line is in span
959                true
960            }
961        })
962    }
963
964    /// Check if a byte offset is within a code span
965    #[inline]
966    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967        let code_spans = self.code_spans();
968        code_spans
969            .iter()
970            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971    }
972
973    /// Check if a byte position is within a reference definition
974    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
975    #[inline]
976    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977        self.reference_defs
978            .iter()
979            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980    }
981
982    /// Check if a byte position is within an HTML comment
983    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
984    /// where k is the number of HTML comments (typically very small)
985    #[inline]
986    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987        self.html_comment_ranges
988            .iter()
989            .any(|range| byte_pos >= range.start && byte_pos < range.end)
990    }
991
992    /// Check if a byte position is within an HTML tag (including multiline tags)
993    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
994    #[inline]
995    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996        self.html_tags()
997            .iter()
998            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999    }
1000
1001    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1002    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003        self.jinja_ranges
1004            .iter()
1005            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006    }
1007
1008    /// Check if a byte position is within a link reference definition title
1009    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010        self.reference_defs.iter().any(|def| {
1011            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012                byte_pos >= start && byte_pos < end
1013            } else {
1014                false
1015            }
1016        })
1017    }
1018
1019    /// Check if content has any instances of a specific character (fast)
1020    pub fn has_char(&self, ch: char) -> bool {
1021        match ch {
1022            '#' => self.char_frequency.hash_count > 0,
1023            '*' => self.char_frequency.asterisk_count > 0,
1024            '_' => self.char_frequency.underscore_count > 0,
1025            '-' => self.char_frequency.hyphen_count > 0,
1026            '+' => self.char_frequency.plus_count > 0,
1027            '>' => self.char_frequency.gt_count > 0,
1028            '|' => self.char_frequency.pipe_count > 0,
1029            '[' => self.char_frequency.bracket_count > 0,
1030            '`' => self.char_frequency.backtick_count > 0,
1031            '<' => self.char_frequency.lt_count > 0,
1032            '!' => self.char_frequency.exclamation_count > 0,
1033            '\n' => self.char_frequency.newline_count > 0,
1034            _ => self.content.contains(ch), // Fallback for other characters
1035        }
1036    }
1037
1038    /// Get count of a specific character (fast)
1039    pub fn char_count(&self, ch: char) -> usize {
1040        match ch {
1041            '#' => self.char_frequency.hash_count,
1042            '*' => self.char_frequency.asterisk_count,
1043            '_' => self.char_frequency.underscore_count,
1044            '-' => self.char_frequency.hyphen_count,
1045            '+' => self.char_frequency.plus_count,
1046            '>' => self.char_frequency.gt_count,
1047            '|' => self.char_frequency.pipe_count,
1048            '[' => self.char_frequency.bracket_count,
1049            '`' => self.char_frequency.backtick_count,
1050            '<' => self.char_frequency.lt_count,
1051            '!' => self.char_frequency.exclamation_count,
1052            '\n' => self.char_frequency.newline_count,
1053            _ => self.content.matches(ch).count(), // Fallback for other characters
1054        }
1055    }
1056
1057    /// Check if content likely contains headings (fast)
1058    pub fn likely_has_headings(&self) -> bool {
1059        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1060    }
1061
1062    /// Check if content likely contains lists (fast)
1063    pub fn likely_has_lists(&self) -> bool {
1064        self.char_frequency.asterisk_count > 0
1065            || self.char_frequency.hyphen_count > 0
1066            || self.char_frequency.plus_count > 0
1067    }
1068
1069    /// Check if content likely contains emphasis (fast)
1070    pub fn likely_has_emphasis(&self) -> bool {
1071        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072    }
1073
1074    /// Check if content likely contains tables (fast)
1075    pub fn likely_has_tables(&self) -> bool {
1076        self.char_frequency.pipe_count > 2
1077    }
1078
1079    /// Check if content likely contains blockquotes (fast)
1080    pub fn likely_has_blockquotes(&self) -> bool {
1081        self.char_frequency.gt_count > 0
1082    }
1083
1084    /// Check if content likely contains code (fast)
1085    pub fn likely_has_code(&self) -> bool {
1086        self.char_frequency.backtick_count > 0
1087    }
1088
1089    /// Check if content likely contains links or images (fast)
1090    pub fn likely_has_links_or_images(&self) -> bool {
1091        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092    }
1093
1094    /// Check if content likely contains HTML (fast)
1095    pub fn likely_has_html(&self) -> bool {
1096        self.char_frequency.lt_count > 0
1097    }
1098
1099    /// Get HTML tags on a specific line
1100    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101        self.html_tags()
1102            .iter()
1103            .filter(|tag| tag.line == line_num)
1104            .cloned()
1105            .collect()
1106    }
1107
1108    /// Get emphasis spans on a specific line
1109    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110        self.emphasis_spans()
1111            .iter()
1112            .filter(|span| span.line == line_num)
1113            .cloned()
1114            .collect()
1115    }
1116
1117    /// Get table rows on a specific line
1118    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119        self.table_rows()
1120            .iter()
1121            .filter(|row| row.line == line_num)
1122            .cloned()
1123            .collect()
1124    }
1125
1126    /// Get bare URLs on a specific line
1127    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128        self.bare_urls()
1129            .iter()
1130            .filter(|url| url.line == line_num)
1131            .cloned()
1132            .collect()
1133    }
1134
1135    /// Find the line index for a given byte offset using binary search.
1136    /// Returns (line_index, line_number, column) where:
1137    /// - line_index is the 0-based index in the lines array
1138    /// - line_number is the 1-based line number
1139    /// - column is the byte offset within that line
1140    #[inline]
1141    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142        // Binary search to find the line containing this byte offset
1143        let idx = match lines.binary_search_by(|line| {
1144            if byte_offset < line.byte_offset {
1145                std::cmp::Ordering::Greater
1146            } else if byte_offset > line.byte_offset + line.byte_len {
1147                std::cmp::Ordering::Less
1148            } else {
1149                std::cmp::Ordering::Equal
1150            }
1151        }) {
1152            Ok(idx) => idx,
1153            Err(idx) => idx.saturating_sub(1),
1154        };
1155
1156        let line = &lines[idx];
1157        let line_num = idx + 1;
1158        let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160        (idx, line_num, col)
1161    }
1162
1163    /// Check if a byte offset is within a code span using binary search
1164    #[inline]
1165    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166        // Since spans are sorted by byte_offset, use partition_point for binary search
1167        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169        // Check the span that starts at or before our offset
1170        if idx > 0 {
1171            let span = &code_spans[idx - 1];
1172            if offset >= span.byte_offset && offset < span.byte_end {
1173                return true;
1174            }
1175        }
1176
1177        false
1178    }
1179
1180    /// Collect byte ranges of all links using pulldown-cmark
1181    /// This is used to skip heading detection for lines that fall within link syntax
1182    /// (e.g., multiline links like `[text](url\n#fragment)`)
1183    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186        let mut link_ranges = Vec::new();
1187        let mut options = Options::empty();
1188        options.insert(Options::ENABLE_WIKILINKS);
1189        options.insert(Options::ENABLE_FOOTNOTES);
1190
1191        let parser = Parser::new_ext(content, options).into_offset_iter();
1192        let mut link_stack: Vec<usize> = Vec::new();
1193
1194        for (event, range) in parser {
1195            match event {
1196                Event::Start(Tag::Link { .. }) => {
1197                    link_stack.push(range.start);
1198                }
1199                Event::End(TagEnd::Link) => {
1200                    if let Some(start_pos) = link_stack.pop() {
1201                        link_ranges.push((start_pos, range.end));
1202                    }
1203                }
1204                _ => {}
1205            }
1206        }
1207
1208        link_ranges
1209    }
1210
1211    /// Parse all links in the content
1212    fn parse_links(
1213        content: &'a str,
1214        lines: &[LineInfo],
1215        code_blocks: &[(usize, usize)],
1216        code_spans: &[CodeSpan],
1217        flavor: MarkdownFlavor,
1218        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221        use std::collections::HashSet;
1222
1223        let mut links = Vec::with_capacity(content.len() / 500);
1224        let mut broken_links = Vec::new();
1225        let mut footnote_refs = Vec::new();
1226
1227        // Track byte positions of links found by pulldown-cmark
1228        let mut found_positions = HashSet::new();
1229
1230        // Use pulldown-cmark's streaming parser with BrokenLink callback
1231        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1232        // This automatically handles:
1233        // - Escaped links (won't generate events)
1234        // - Links in code blocks/spans (won't generate Link events)
1235        // - Images (generates Tag::Image instead)
1236        // - Reference resolution (dest_url is already resolved!)
1237        // - Broken references (callback is invoked)
1238        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1239        let mut options = Options::empty();
1240        options.insert(Options::ENABLE_WIKILINKS);
1241        options.insert(Options::ENABLE_FOOTNOTES);
1242
1243        let parser = Parser::new_with_broken_link_callback(
1244            content,
1245            options,
1246            Some(|link: BrokenLink<'_>| {
1247                broken_links.push(BrokenLinkInfo {
1248                    reference: link.reference.to_string(),
1249                    span: link.span.clone(),
1250                });
1251                None
1252            }),
1253        )
1254        .into_offset_iter();
1255
1256        let mut link_stack: Vec<(
1257            usize,
1258            usize,
1259            pulldown_cmark::CowStr<'a>,
1260            LinkType,
1261            pulldown_cmark::CowStr<'a>,
1262        )> = Vec::new();
1263        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1264
1265        for (event, range) in parser {
1266            match event {
1267                Event::Start(Tag::Link {
1268                    link_type,
1269                    dest_url,
1270                    id,
1271                    ..
1272                }) => {
1273                    // Link start - record position, URL, and reference ID
1274                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1275                    text_chunks.clear();
1276                }
1277                Event::Text(text) if !link_stack.is_empty() => {
1278                    // Track text content with its byte range
1279                    text_chunks.push((text.to_string(), range.start, range.end));
1280                }
1281                Event::Code(code) if !link_stack.is_empty() => {
1282                    // Include inline code in link text (with backticks)
1283                    let code_text = format!("`{code}`");
1284                    text_chunks.push((code_text, range.start, range.end));
1285                }
1286                Event::End(TagEnd::Link) => {
1287                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288                        // Skip if in HTML comment
1289                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290                            text_chunks.clear();
1291                            continue;
1292                        }
1293
1294                        // Find line and column information
1295                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297                        // Skip if this link is on a MkDocs snippet line
1298                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299                            text_chunks.clear();
1300                            continue;
1301                        }
1302
1303                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305                        let is_reference = matches!(
1306                            link_type,
1307                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308                        );
1309
1310                        // Extract link text directly from source bytes to preserve escaping
1311                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1312                        let link_text = if start_pos < content.len() {
1313                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315                            // Find MATCHING ] by tracking bracket depth for nested brackets
1316                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1317                            // Brackets inside code spans (between backticks) should be ignored
1318                            let mut close_pos = None;
1319                            let mut depth = 0;
1320                            let mut in_code_span = false;
1321
1322                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323                                // Count preceding backslashes
1324                                let mut backslash_count = 0;
1325                                let mut j = i;
1326                                while j > 0 && link_bytes[j - 1] == b'\\' {
1327                                    backslash_count += 1;
1328                                    j -= 1;
1329                                }
1330                                let is_escaped = backslash_count % 2 != 0;
1331
1332                                // Track code spans - backticks toggle in/out of code
1333                                if byte == b'`' && !is_escaped {
1334                                    in_code_span = !in_code_span;
1335                                }
1336
1337                                // Only count brackets when NOT in a code span
1338                                if !is_escaped && !in_code_span {
1339                                    if byte == b'[' {
1340                                        depth += 1;
1341                                    } else if byte == b']' {
1342                                        if depth == 0 {
1343                                            // Found the matching closing bracket
1344                                            close_pos = Some(i);
1345                                            break;
1346                                        } else {
1347                                            depth -= 1;
1348                                        }
1349                                    }
1350                                }
1351                            }
1352
1353                            if let Some(pos) = close_pos {
1354                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355                            } else {
1356                                Cow::Borrowed("")
1357                            }
1358                        } else {
1359                            Cow::Borrowed("")
1360                        };
1361
1362                        // For reference links, use the actual reference ID from pulldown-cmark
1363                        let reference_id = if is_reference && !ref_id.is_empty() {
1364                            Some(Cow::Owned(ref_id.to_lowercase()))
1365                        } else if is_reference {
1366                            // For collapsed/shortcut references without explicit ID, use the link text
1367                            Some(Cow::Owned(link_text.to_lowercase()))
1368                        } else {
1369                            None
1370                        };
1371
1372                        // Track this position as found
1373                        found_positions.insert(start_pos);
1374
1375                        links.push(ParsedLink {
1376                            line: line_num,
1377                            start_col: col_start,
1378                            end_col: col_end,
1379                            byte_offset: start_pos,
1380                            byte_end: range.end,
1381                            text: link_text,
1382                            url: Cow::Owned(url.to_string()),
1383                            is_reference,
1384                            reference_id,
1385                            link_type,
1386                        });
1387
1388                        text_chunks.clear();
1389                    }
1390                }
1391                Event::FootnoteReference(footnote_id) => {
1392                    // Capture footnote references like [^1], [^note]
1393                    // Skip if in HTML comment
1394                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395                        continue;
1396                    }
1397
1398                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399                    footnote_refs.push(FootnoteRef {
1400                        id: footnote_id.to_string(),
1401                        line: line_num,
1402                        byte_offset: range.start,
1403                        byte_end: range.end,
1404                    });
1405                }
1406                _ => {}
1407            }
1408        }
1409
1410        // Also find undefined references using regex
1411        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1412        // because the reference is undefined
1413        for cap in LINK_PATTERN.captures_iter(content) {
1414            let full_match = cap.get(0).unwrap();
1415            let match_start = full_match.start();
1416            let match_end = full_match.end();
1417
1418            // Skip if this was already found by pulldown-cmark (it's a valid link)
1419            if found_positions.contains(&match_start) {
1420                continue;
1421            }
1422
1423            // Skip if escaped
1424            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425                continue;
1426            }
1427
1428            // Skip if it's an image
1429            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430                continue;
1431            }
1432
1433            // Skip if in code block
1434            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435                continue;
1436            }
1437
1438            // Skip if in code span
1439            if Self::is_offset_in_code_span(code_spans, match_start) {
1440                continue;
1441            }
1442
1443            // Skip if in HTML comment
1444            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445                continue;
1446            }
1447
1448            // Find line and column information
1449            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451            // Skip if this link is on a MkDocs snippet line
1452            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453                continue;
1454            }
1455
1456            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458            let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460            // Only process reference links (group 6)
1461            if let Some(ref_id) = cap.get(6) {
1462                let ref_id_str = ref_id.as_str();
1463                let normalized_ref = if ref_id_str.is_empty() {
1464                    Cow::Owned(text.to_lowercase()) // Implicit reference
1465                } else {
1466                    Cow::Owned(ref_id_str.to_lowercase())
1467                };
1468
1469                // This is an undefined reference (pulldown-cmark didn't parse it)
1470                links.push(ParsedLink {
1471                    line: line_num,
1472                    start_col: col_start,
1473                    end_col: col_end,
1474                    byte_offset: match_start,
1475                    byte_end: match_end,
1476                    text: Cow::Borrowed(text),
1477                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1478                    is_reference: true,
1479                    reference_id: Some(normalized_ref),
1480                    link_type: LinkType::Reference, // Undefined references are reference-style
1481                });
1482            }
1483        }
1484
1485        (links, broken_links, footnote_refs)
1486    }
1487
1488    /// Parse all images in the content
1489    fn parse_images(
1490        content: &'a str,
1491        lines: &[LineInfo],
1492        code_blocks: &[(usize, usize)],
1493        code_spans: &[CodeSpan],
1494        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495    ) -> Vec<ParsedImage<'a>> {
1496        use crate::utils::skip_context::is_in_html_comment_ranges;
1497        use std::collections::HashSet;
1498
1499        // Pre-size based on a heuristic: images are less common than links
1500        let mut images = Vec::with_capacity(content.len() / 1000);
1501        let mut found_positions = HashSet::new();
1502
1503        // Use pulldown-cmark for parsing - more accurate and faster
1504        let parser = Parser::new(content).into_offset_iter();
1505        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506            Vec::new();
1507        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1508
1509        for (event, range) in parser {
1510            match event {
1511                Event::Start(Tag::Image {
1512                    link_type,
1513                    dest_url,
1514                    id,
1515                    ..
1516                }) => {
1517                    image_stack.push((range.start, dest_url, link_type, id));
1518                    text_chunks.clear();
1519                }
1520                Event::Text(text) if !image_stack.is_empty() => {
1521                    text_chunks.push((text.to_string(), range.start, range.end));
1522                }
1523                Event::Code(code) if !image_stack.is_empty() => {
1524                    let code_text = format!("`{code}`");
1525                    text_chunks.push((code_text, range.start, range.end));
1526                }
1527                Event::End(TagEnd::Image) => {
1528                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529                        // Skip if in code block
1530                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531                            continue;
1532                        }
1533
1534                        // Skip if in code span
1535                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1536                            continue;
1537                        }
1538
1539                        // Skip if in HTML comment
1540                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541                            continue;
1542                        }
1543
1544                        // Find line and column using binary search
1545                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548                        let is_reference = matches!(
1549                            link_type,
1550                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551                        );
1552
1553                        // Extract alt text directly from source bytes to preserve escaping
1554                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1555                        let alt_text = if start_pos < content.len() {
1556                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558                            // Find MATCHING ] by tracking bracket depth for nested brackets
1559                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1560                            let mut close_pos = None;
1561                            let mut depth = 0;
1562
1563                            if image_bytes.len() > 2 {
1564                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565                                    // Count preceding backslashes
1566                                    let mut backslash_count = 0;
1567                                    let mut j = i;
1568                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1569                                        backslash_count += 1;
1570                                        j -= 1;
1571                                    }
1572                                    let is_escaped = backslash_count % 2 != 0;
1573
1574                                    if !is_escaped {
1575                                        if byte == b'[' {
1576                                            depth += 1;
1577                                        } else if byte == b']' {
1578                                            if depth == 0 {
1579                                                // Found the matching closing bracket
1580                                                close_pos = Some(i);
1581                                                break;
1582                                            } else {
1583                                                depth -= 1;
1584                                            }
1585                                        }
1586                                    }
1587                                }
1588                            }
1589
1590                            if let Some(pos) = close_pos {
1591                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592                            } else {
1593                                Cow::Borrowed("")
1594                            }
1595                        } else {
1596                            Cow::Borrowed("")
1597                        };
1598
1599                        let reference_id = if is_reference && !ref_id.is_empty() {
1600                            Some(Cow::Owned(ref_id.to_lowercase()))
1601                        } else if is_reference {
1602                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1603                        } else {
1604                            None
1605                        };
1606
1607                        found_positions.insert(start_pos);
1608                        images.push(ParsedImage {
1609                            line: line_num,
1610                            start_col: col_start,
1611                            end_col: col_end,
1612                            byte_offset: start_pos,
1613                            byte_end: range.end,
1614                            alt_text,
1615                            url: Cow::Owned(url.to_string()),
1616                            is_reference,
1617                            reference_id,
1618                            link_type,
1619                        });
1620                    }
1621                }
1622                _ => {}
1623            }
1624        }
1625
1626        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1627        for cap in IMAGE_PATTERN.captures_iter(content) {
1628            let full_match = cap.get(0).unwrap();
1629            let match_start = full_match.start();
1630            let match_end = full_match.end();
1631
1632            // Skip if already found by pulldown-cmark
1633            if found_positions.contains(&match_start) {
1634                continue;
1635            }
1636
1637            // Skip if the ! is escaped
1638            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639                continue;
1640            }
1641
1642            // Skip if in code block, code span, or HTML comment
1643            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644                || Self::is_offset_in_code_span(code_spans, match_start)
1645                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646            {
1647                continue;
1648            }
1649
1650            // Only process reference images (undefined references not found by pulldown-cmark)
1651            if let Some(ref_id) = cap.get(6) {
1652                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655                let ref_id_str = ref_id.as_str();
1656                let normalized_ref = if ref_id_str.is_empty() {
1657                    Cow::Owned(alt_text.to_lowercase())
1658                } else {
1659                    Cow::Owned(ref_id_str.to_lowercase())
1660                };
1661
1662                images.push(ParsedImage {
1663                    line: line_num,
1664                    start_col: col_start,
1665                    end_col: col_end,
1666                    byte_offset: match_start,
1667                    byte_end: match_end,
1668                    alt_text: Cow::Borrowed(alt_text),
1669                    url: Cow::Borrowed(""),
1670                    is_reference: true,
1671                    reference_id: Some(normalized_ref),
1672                    link_type: LinkType::Reference, // Undefined references are reference-style
1673                });
1674            }
1675        }
1676
1677        images
1678    }
1679
1680    /// Parse reference definitions
1681    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682        // Pre-size based on lines count as reference definitions are line-based
1683        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1684
1685        for (line_idx, line_info) in lines.iter().enumerate() {
1686            // Skip lines in code blocks
1687            if line_info.in_code_block {
1688                continue;
1689            }
1690
1691            let line = line_info.content(content);
1692            let line_num = line_idx + 1;
1693
1694            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695                let id = cap.get(1).unwrap().as_str().to_lowercase();
1696                let url = cap.get(2).unwrap().as_str().to_string();
1697                let title_match = cap.get(3).or_else(|| cap.get(4));
1698                let title = title_match.map(|m| m.as_str().to_string());
1699
1700                // Calculate byte positions
1701                // The match starts at the beginning of the line (0) and extends to the end
1702                let match_obj = cap.get(0).unwrap();
1703                let byte_offset = line_info.byte_offset + match_obj.start();
1704                let byte_end = line_info.byte_offset + match_obj.end();
1705
1706                // Calculate title byte positions (includes the quote character before content)
1707                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708                    // The match is the content inside quotes, so we include the quote before
1709                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1710                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1711                    (Some(start), Some(end))
1712                } else {
1713                    (None, None)
1714                };
1715
1716                refs.push(ReferenceDef {
1717                    line: line_num,
1718                    id,
1719                    url,
1720                    title,
1721                    byte_offset,
1722                    byte_end,
1723                    title_byte_start,
1724                    title_byte_end,
1725                });
1726            }
1727        }
1728
1729        refs
1730    }
1731
1732    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1733    /// Handles nested blockquotes like `> > > content`
1734    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1735    #[inline]
1736    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737        let trimmed_start = line.trim_start();
1738        if !trimmed_start.starts_with('>') {
1739            return None;
1740        }
1741
1742        // Track total prefix length to handle nested blockquotes
1743        let mut remaining = line;
1744        let mut total_prefix_len = 0;
1745
1746        loop {
1747            let trimmed = remaining.trim_start();
1748            if !trimmed.starts_with('>') {
1749                break;
1750            }
1751
1752            // Add leading whitespace + '>' to prefix
1753            let leading_ws_len = remaining.len() - trimmed.len();
1754            total_prefix_len += leading_ws_len + 1;
1755
1756            let after_gt = &trimmed[1..];
1757
1758            // Handle optional whitespace after '>' (space or tab)
1759            if let Some(stripped) = after_gt.strip_prefix(' ') {
1760                total_prefix_len += 1;
1761                remaining = stripped;
1762            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763                total_prefix_len += 1;
1764                remaining = stripped;
1765            } else {
1766                remaining = after_gt;
1767            }
1768        }
1769
1770        Some((&line[..total_prefix_len], remaining))
1771    }
1772
1773    /// Fast unordered list parser - replaces regex for 5-10x speedup
1774    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1775    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1776    #[inline]
1777    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1778        let bytes = line.as_bytes();
1779        let mut i = 0;
1780
1781        // Skip leading whitespace
1782        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1783            i += 1;
1784        }
1785
1786        // Check for marker
1787        if i >= bytes.len() {
1788            return None;
1789        }
1790        let marker = bytes[i] as char;
1791        if marker != '-' && marker != '*' && marker != '+' {
1792            return None;
1793        }
1794        let marker_pos = i;
1795        i += 1;
1796
1797        // Collect spacing after marker (space or tab only)
1798        let spacing_start = i;
1799        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1800            i += 1;
1801        }
1802
1803        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1804    }
1805
1806    /// Fast ordered list parser - replaces regex for 5-10x speedup
1807    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
1808    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
1809    #[inline]
1810    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1811        let bytes = line.as_bytes();
1812        let mut i = 0;
1813
1814        // Skip leading whitespace
1815        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1816            i += 1;
1817        }
1818
1819        // Collect digits
1820        let number_start = i;
1821        while i < bytes.len() && bytes[i].is_ascii_digit() {
1822            i += 1;
1823        }
1824        if i == number_start {
1825            return None; // No digits found
1826        }
1827
1828        // Check for delimiter
1829        if i >= bytes.len() {
1830            return None;
1831        }
1832        let delimiter = bytes[i] as char;
1833        if delimiter != '.' && delimiter != ')' {
1834            return None;
1835        }
1836        let delimiter_pos = i;
1837        i += 1;
1838
1839        // Collect spacing after delimiter (space or tab only)
1840        let spacing_start = i;
1841        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1842            i += 1;
1843        }
1844
1845        Some((
1846            &line[..number_start],
1847            &line[number_start..delimiter_pos],
1848            delimiter,
1849            &line[spacing_start..i],
1850            &line[i..],
1851        ))
1852    }
1853
1854    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
1855    /// Returns a Vec<bool> where index i indicates if line i is in a code block
1856    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1857        let num_lines = line_offsets.len();
1858        let mut in_code_block = vec![false; num_lines];
1859
1860        // For each code block, mark all lines within it
1861        for &(start, end) in code_blocks {
1862            // Ensure we're at valid UTF-8 boundaries
1863            let safe_start = if start > 0 && !content.is_char_boundary(start) {
1864                let mut boundary = start;
1865                while boundary > 0 && !content.is_char_boundary(boundary) {
1866                    boundary -= 1;
1867                }
1868                boundary
1869            } else {
1870                start
1871            };
1872
1873            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1874                let mut boundary = end;
1875                while boundary < content.len() && !content.is_char_boundary(boundary) {
1876                    boundary += 1;
1877                }
1878                boundary
1879            } else {
1880                end.min(content.len())
1881            };
1882
1883            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
1884            // That function now has proper list context awareness (see code_block_utils.rs)
1885            // and correctly distinguishes between:
1886            // - Fenced code blocks (``` or ~~~)
1887            // - Indented code blocks at document level (4 spaces + blank line before)
1888            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
1889            //
1890            // We no longer need to re-validate here. The original validation logic
1891            // was causing false positives by marking list continuation paragraphs as
1892            // code blocks when they have 4 spaces of indentation.
1893
1894            // Use binary search to find the first and last line indices
1895            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
1896            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
1897            //
1898            // Find the line that CONTAINS safe_start: the line with the largest
1899            // start offset that is <= safe_start. partition_point gives us the
1900            // first line that starts AFTER safe_start, so we subtract 1.
1901            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1902            let first_line = first_line_after.saturating_sub(1);
1903            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1904
1905            // Mark all lines in the range at once
1906            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1907                *flag = true;
1908            }
1909        }
1910
1911        in_code_block
1912    }
1913
1914    /// Pre-compute basic line information (without headings/blockquotes)
1915    fn compute_basic_line_info(
1916        content: &str,
1917        line_offsets: &[usize],
1918        code_blocks: &[(usize, usize)],
1919        flavor: MarkdownFlavor,
1920        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1921        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1922    ) -> Vec<LineInfo> {
1923        let content_lines: Vec<&str> = content.lines().collect();
1924        let mut lines = Vec::with_capacity(content_lines.len());
1925
1926        // Pre-compute which lines are in code blocks
1927        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1928
1929        // Detect front matter boundaries FIRST, before any other parsing
1930        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
1931        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1932
1933        for (i, line) in content_lines.iter().enumerate() {
1934            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1935            let indent = line.len() - line.trim_start().len();
1936            // Compute visual indent with proper CommonMark tab expansion
1937            let visual_indent = ElementCache::calculate_indentation_width_default(line);
1938
1939            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
1940            let blockquote_parse = Self::parse_blockquote_prefix(line);
1941
1942            // For blank detection, consider blockquote context
1943            let is_blank = if let Some((_, content)) = blockquote_parse {
1944                // In blockquote context, check if content after prefix is blank
1945                content.trim().is_empty()
1946            } else {
1947                line.trim().is_empty()
1948            };
1949
1950            // Use pre-computed map for O(1) lookup instead of O(m) iteration
1951            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1952
1953            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
1954            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1955                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1956            // Check if the ENTIRE line is within an HTML comment (not just the line start)
1957            // This ensures content after `-->` on the same line is not incorrectly skipped
1958            let line_end_offset = byte_offset + line.len();
1959            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1960                html_comment_ranges,
1961                byte_offset,
1962                line_end_offset,
1963            );
1964            let list_item = if !(in_code_block
1965                || is_blank
1966                || in_mkdocstrings
1967                || in_html_comment
1968                || (front_matter_end > 0 && i < front_matter_end))
1969            {
1970                // Strip blockquote prefix if present for list detection (reuse cached result)
1971                let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1972                    (content, prefix.len())
1973                } else {
1974                    (&**line, 0)
1975                };
1976
1977                if let Some((leading_spaces, marker, spacing, _content)) =
1978                    Self::parse_unordered_list(line_for_list_check)
1979                {
1980                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1981                    let content_column = marker_column + 1 + spacing.len();
1982
1983                    // According to CommonMark spec, unordered list items MUST have at least one space
1984                    // after the marker (-, *, or +). Without a space, it's not a list item.
1985                    // This also naturally handles cases like:
1986                    // - *emphasis* (not a list)
1987                    // - **bold** (not a list)
1988                    // - --- (horizontal rule, not a list)
1989                    if spacing.is_empty() {
1990                        None
1991                    } else {
1992                        Some(ListItemInfo {
1993                            marker: marker.to_string(),
1994                            is_ordered: false,
1995                            number: None,
1996                            marker_column,
1997                            content_column,
1998                        })
1999                    }
2000                } else if let Some((leading_spaces, number_str, delimiter, spacing, content)) =
2001                    Self::parse_ordered_list(line_for_list_check)
2002                {
2003                    let marker = format!("{number_str}{delimiter}");
2004                    let marker_column = blockquote_prefix_len + leading_spaces.len();
2005                    let content_column = marker_column + marker.len() + spacing.len();
2006
2007                    // CommonMark spec: If content follows the marker, a space is required.
2008                    // But if the line ends after the marker (empty content or whitespace-only),
2009                    // no space is needed. Examples:
2010                    // - "1." (valid - no content after marker)
2011                    // - "1. " (valid - space before empty content)
2012                    // - "1. text" (valid - space before content)
2013                    // - "1.text" (INVALID - content without space)
2014                    let content_after_spacing = content.trim();
2015                    if spacing.is_empty() && !content_after_spacing.is_empty() {
2016                        None
2017                    } else {
2018                        Some(ListItemInfo {
2019                            marker,
2020                            is_ordered: true,
2021                            number: number_str.parse().ok(),
2022                            marker_column,
2023                            content_column,
2024                        })
2025                    }
2026                } else {
2027                    None
2028                }
2029            } else {
2030                None
2031            };
2032
2033            // Detect horizontal rules (only outside code blocks and frontmatter)
2034            // Uses CommonMark-compliant check including leading indentation validation
2035            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2036            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2037
2038            lines.push(LineInfo {
2039                byte_offset,
2040                byte_len: line.len(),
2041                indent,
2042                visual_indent,
2043                is_blank,
2044                in_code_block,
2045                in_front_matter,
2046                in_html_block: false, // Will be populated after line creation
2047                in_html_comment,
2048                list_item,
2049                heading: None,    // Will be populated in second pass for Setext headings
2050                blockquote: None, // Will be populated after line creation
2051                in_mkdocstrings,
2052                in_esm_block: false, // Will be populated after line creation for MDX files
2053                in_code_span_continuation: false, // Will be populated after code spans are parsed
2054                is_horizontal_rule: is_hr,
2055            });
2056        }
2057
2058        lines
2059    }
2060
2061    /// Detect headings and blockquotes (called after HTML block detection)
2062    fn detect_headings_and_blockquotes(
2063        content: &str,
2064        lines: &mut [LineInfo],
2065        flavor: MarkdownFlavor,
2066        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2067        link_byte_ranges: &[(usize, usize)],
2068    ) {
2069        // Regex for heading detection
2070        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2071            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2072        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2073            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2074
2075        let content_lines: Vec<&str> = content.lines().collect();
2076
2077        // Detect front matter boundaries to skip those lines
2078        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2079
2080        // Detect headings (including Setext which needs look-ahead) and blockquotes
2081        for i in 0..lines.len() {
2082            if lines[i].in_code_block {
2083                continue;
2084            }
2085
2086            // Skip lines in front matter
2087            if front_matter_end > 0 && i < front_matter_end {
2088                continue;
2089            }
2090
2091            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2092            if lines[i].in_html_block {
2093                continue;
2094            }
2095
2096            let line = content_lines[i];
2097
2098            // Check for blockquotes (even on blank lines within blockquotes)
2099            if let Some(bq) = parse_blockquote_detailed(line) {
2100                let nesting_level = bq.markers.len(); // Each '>' is one level
2101                let marker_column = bq.indent.len();
2102
2103                // Build the prefix (indentation + markers + space)
2104                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2105
2106                // Check for various blockquote issues
2107                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2108                // Only flag multiple literal spaces, not tabs
2109                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
2110                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2111
2112                // Check if needs MD028 fix (empty blockquote line without proper spacing)
2113                // MD028 flags empty blockquote lines that don't have a single space after the marker
2114                // Lines like "> " or ">> " are already correct and don't need fixing
2115                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2116
2117                lines[i].blockquote = Some(BlockquoteInfo {
2118                    nesting_level,
2119                    indent: bq.indent.to_string(),
2120                    marker_column,
2121                    prefix,
2122                    content: bq.content.to_string(),
2123                    has_no_space_after_marker: has_no_space,
2124                    has_multiple_spaces_after_marker: has_multiple_spaces,
2125                    needs_md028_fix,
2126                });
2127            }
2128
2129            // Skip heading detection for blank lines
2130            if lines[i].is_blank {
2131                continue;
2132            }
2133
2134            // Check for ATX headings (but skip MkDocs snippet lines)
2135            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2136            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2137                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2138                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2139            } else {
2140                false
2141            };
2142
2143            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2144                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2145                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2146                    continue;
2147                }
2148                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2149                // This prevents false positives where `#fragment` is detected as a heading
2150                let line_offset = lines[i].byte_offset;
2151                if link_byte_ranges
2152                    .iter()
2153                    .any(|&(start, end)| line_offset > start && line_offset < end)
2154                {
2155                    continue;
2156                }
2157                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2158                let hashes = caps.get(2).map_or("", |m| m.as_str());
2159                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2160                let rest = caps.get(4).map_or("", |m| m.as_str());
2161
2162                let level = hashes.len() as u8;
2163                let marker_column = leading_spaces.len();
2164
2165                // Check for closing sequence, but handle custom IDs that might come after
2166                let (text, has_closing, closing_seq) = {
2167                    // First check if there's a custom ID at the end
2168                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2169                        // Check if this looks like a valid custom ID (ends with })
2170                        if rest[id_start..].trim_end().ends_with('}') {
2171                            // Split off the custom ID
2172                            (&rest[..id_start], &rest[id_start..])
2173                        } else {
2174                            (rest, "")
2175                        }
2176                    } else {
2177                        (rest, "")
2178                    };
2179
2180                    // Now look for closing hashes in the part before the custom ID
2181                    let trimmed_rest = rest_without_id.trim_end();
2182                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2183                        // Find the start of the hash sequence by walking backwards
2184                        // Use char_indices to get byte positions at char boundaries
2185                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2186
2187                        // Find which char index corresponds to last_hash_byte_pos
2188                        let last_hash_char_idx = char_positions
2189                            .iter()
2190                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2191
2192                        if let Some(mut char_idx) = last_hash_char_idx {
2193                            // Walk backwards to find start of hash sequence
2194                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2195                                char_idx -= 1;
2196                            }
2197
2198                            // Get the byte position of the start of hashes
2199                            let start_of_hashes = char_positions[char_idx].0;
2200
2201                            // Check if there's at least one space before the closing hashes
2202                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2203
2204                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2205                            let potential_closing = &trimmed_rest[start_of_hashes..];
2206                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2207
2208                            if is_all_hashes && has_space_before {
2209                                // This is a closing sequence
2210                                let closing_hashes = potential_closing.to_string();
2211                                // The text is everything before the closing hashes
2212                                // Don't include the custom ID here - it will be extracted later
2213                                let text_part = if !custom_id_part.is_empty() {
2214                                    // If we have a custom ID, append it back to get the full rest
2215                                    // This allows the extract_header_id function to handle it properly
2216                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2217                                } else {
2218                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2219                                };
2220                                (text_part, true, closing_hashes)
2221                            } else {
2222                                // Not a valid closing sequence, return the full content
2223                                (rest.to_string(), false, String::new())
2224                            }
2225                        } else {
2226                            // Couldn't find char boundary, return the full content
2227                            (rest.to_string(), false, String::new())
2228                        }
2229                    } else {
2230                        // No hashes found, return the full content
2231                        (rest.to_string(), false, String::new())
2232                    }
2233                };
2234
2235                let content_column = marker_column + hashes.len() + spaces_after.len();
2236
2237                // Extract custom header ID if present
2238                let raw_text = text.trim().to_string();
2239                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2240
2241                // If no custom ID was found on the header line, check the next line for standalone attr-list
2242                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2243                    let next_line = content_lines[i + 1];
2244                    if !lines[i + 1].in_code_block
2245                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2246                        && let Some(next_line_id) =
2247                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2248                    {
2249                        custom_id = Some(next_line_id);
2250                    }
2251                }
2252
2253                // ATX heading is "valid" for processing by heading rules if:
2254                // 1. Has space after # (CommonMark compliant): `# Heading`
2255                // 2. Is empty (just hashes): `#`
2256                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2257                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2258                //
2259                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2260                // - `#tag` - single # with lowercase (social hashtag)
2261                // - `#123` - single # with number (GitHub issue ref)
2262                let is_valid = !spaces_after.is_empty()
2263                    || rest.is_empty()
2264                    || level > 1
2265                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2266
2267                lines[i].heading = Some(HeadingInfo {
2268                    level,
2269                    style: HeadingStyle::ATX,
2270                    marker: hashes.to_string(),
2271                    marker_column,
2272                    content_column,
2273                    text: clean_text,
2274                    custom_id,
2275                    raw_text,
2276                    has_closing_sequence: has_closing,
2277                    closing_sequence: closing_seq,
2278                    is_valid,
2279                });
2280            }
2281            // Check for Setext headings (need to look at next line)
2282            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2283                let next_line = content_lines[i + 1];
2284                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2285                    // Skip if next line is front matter delimiter
2286                    if front_matter_end > 0 && i < front_matter_end {
2287                        continue;
2288                    }
2289
2290                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2291                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2292                    {
2293                        continue;
2294                    }
2295
2296                    let underline = next_line.trim();
2297
2298                    let level = if underline.starts_with('=') { 1 } else { 2 };
2299                    let style = if level == 1 {
2300                        HeadingStyle::Setext1
2301                    } else {
2302                        HeadingStyle::Setext2
2303                    };
2304
2305                    // Extract custom header ID if present
2306                    let raw_text = line.trim().to_string();
2307                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2308
2309                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2310                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2311                        let attr_line = content_lines[i + 2];
2312                        if !lines[i + 2].in_code_block
2313                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2314                            && let Some(attr_line_id) =
2315                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2316                        {
2317                            custom_id = Some(attr_line_id);
2318                        }
2319                    }
2320
2321                    lines[i].heading = Some(HeadingInfo {
2322                        level,
2323                        style,
2324                        marker: underline.to_string(),
2325                        marker_column: next_line.len() - next_line.trim_start().len(),
2326                        content_column: lines[i].indent,
2327                        text: clean_text,
2328                        custom_id,
2329                        raw_text,
2330                        has_closing_sequence: false,
2331                        closing_sequence: String::new(),
2332                        is_valid: true, // Setext headings are always valid
2333                    });
2334                }
2335            }
2336        }
2337    }
2338
2339    /// Detect HTML blocks in the content
2340    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2341        // HTML block elements that trigger block context
2342        // Includes HTML5 media, embedded content, and interactive elements
2343        const BLOCK_ELEMENTS: &[&str] = &[
2344            "address",
2345            "article",
2346            "aside",
2347            "audio",
2348            "blockquote",
2349            "canvas",
2350            "details",
2351            "dialog",
2352            "dd",
2353            "div",
2354            "dl",
2355            "dt",
2356            "embed",
2357            "fieldset",
2358            "figcaption",
2359            "figure",
2360            "footer",
2361            "form",
2362            "h1",
2363            "h2",
2364            "h3",
2365            "h4",
2366            "h5",
2367            "h6",
2368            "header",
2369            "hr",
2370            "iframe",
2371            "li",
2372            "main",
2373            "menu",
2374            "nav",
2375            "noscript",
2376            "object",
2377            "ol",
2378            "p",
2379            "picture",
2380            "pre",
2381            "script",
2382            "search",
2383            "section",
2384            "source",
2385            "style",
2386            "summary",
2387            "svg",
2388            "table",
2389            "tbody",
2390            "td",
2391            "template",
2392            "textarea",
2393            "tfoot",
2394            "th",
2395            "thead",
2396            "tr",
2397            "track",
2398            "ul",
2399            "video",
2400        ];
2401
2402        let mut i = 0;
2403        while i < lines.len() {
2404            // Skip if already in code block or front matter
2405            if lines[i].in_code_block || lines[i].in_front_matter {
2406                i += 1;
2407                continue;
2408            }
2409
2410            let trimmed = lines[i].content(content).trim_start();
2411
2412            // Check if line starts with an HTML tag
2413            if trimmed.starts_with('<') && trimmed.len() > 1 {
2414                // Extract tag name safely
2415                let after_bracket = &trimmed[1..];
2416                let is_closing = after_bracket.starts_with('/');
2417                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2418
2419                // Extract tag name (stop at space, >, /, or end of string)
2420                let tag_name = tag_start
2421                    .chars()
2422                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2423                    .collect::<String>()
2424                    .to_lowercase();
2425
2426                // Check if it's a block element
2427                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2428                    // Mark this line as in HTML block
2429                    lines[i].in_html_block = true;
2430
2431                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2432                    // This avoids complex nesting logic that might cause infinite loops
2433                    if !is_closing {
2434                        let closing_tag = format!("</{tag_name}>");
2435                        // style and script tags can contain blank lines (CSS/JS formatting)
2436                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2437                        let mut j = i + 1;
2438                        while j < lines.len() && j < i + 100 {
2439                            // Limit search to 100 lines
2440                            // Stop at blank lines (except for style/script tags)
2441                            if !allow_blank_lines && lines[j].is_blank {
2442                                break;
2443                            }
2444
2445                            lines[j].in_html_block = true;
2446
2447                            // Check if this line contains the closing tag
2448                            if lines[j].content(content).contains(&closing_tag) {
2449                                break;
2450                            }
2451                            j += 1;
2452                        }
2453                    }
2454                }
2455            }
2456
2457            i += 1;
2458        }
2459    }
2460
2461    /// Detect ESM import/export blocks in MDX files
2462    /// ESM blocks consist of contiguous import/export statements at the top of the file
2463    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2464        // Only process MDX files
2465        if !flavor.supports_esm_blocks() {
2466            return;
2467        }
2468
2469        let mut in_multiline_comment = false;
2470
2471        for line in lines.iter_mut() {
2472            // Skip blank lines and HTML comments
2473            if line.is_blank || line.in_html_comment {
2474                continue;
2475            }
2476
2477            let trimmed = line.content(content).trim_start();
2478
2479            // Handle continuation of multi-line JS comments
2480            if in_multiline_comment {
2481                if trimmed.contains("*/") {
2482                    in_multiline_comment = false;
2483                }
2484                continue;
2485            }
2486
2487            // Skip single-line JS comments (// and ///)
2488            if trimmed.starts_with("//") {
2489                continue;
2490            }
2491
2492            // Handle start of multi-line JS comment
2493            if trimmed.starts_with("/*") {
2494                if !trimmed.contains("*/") {
2495                    in_multiline_comment = true;
2496                }
2497                continue;
2498            }
2499
2500            // Check if line starts with import or export
2501            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2502                line.in_esm_block = true;
2503            } else {
2504                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2505                break;
2506            }
2507        }
2508    }
2509
2510    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2511    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2512        let mut code_spans = Vec::new();
2513
2514        // Quick check - if no backticks, no code spans
2515        if !content.contains('`') {
2516            return code_spans;
2517        }
2518
2519        // Use pulldown-cmark's streaming parser with byte offsets
2520        let parser = Parser::new(content).into_offset_iter();
2521
2522        for (event, range) in parser {
2523            if let Event::Code(_) = event {
2524                let start_pos = range.start;
2525                let end_pos = range.end;
2526
2527                // The range includes the backticks, extract the actual content
2528                let full_span = &content[start_pos..end_pos];
2529                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2530
2531                // Extract content between backticks, preserving spaces
2532                let content_start = start_pos + backtick_count;
2533                let content_end = end_pos - backtick_count;
2534                let span_content = if content_start < content_end {
2535                    content[content_start..content_end].to_string()
2536                } else {
2537                    String::new()
2538                };
2539
2540                // Use binary search to find line number - O(log n) instead of O(n)
2541                // Find the rightmost line whose byte_offset <= start_pos
2542                let line_idx = lines
2543                    .partition_point(|line| line.byte_offset <= start_pos)
2544                    .saturating_sub(1);
2545                let line_num = line_idx + 1;
2546                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2547
2548                // Find end column using binary search
2549                let end_line_idx = lines
2550                    .partition_point(|line| line.byte_offset <= end_pos)
2551                    .saturating_sub(1);
2552                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2553
2554                // Convert byte offsets to character positions for correct Unicode handling
2555                // This ensures consistency with warning.column which uses character positions
2556                let line_content = lines[line_idx].content(content);
2557                let col_start = if byte_col_start <= line_content.len() {
2558                    line_content[..byte_col_start].chars().count()
2559                } else {
2560                    line_content.chars().count()
2561                };
2562
2563                let end_line_content = lines[end_line_idx].content(content);
2564                let col_end = if byte_col_end <= end_line_content.len() {
2565                    end_line_content[..byte_col_end].chars().count()
2566                } else {
2567                    end_line_content.chars().count()
2568                };
2569
2570                code_spans.push(CodeSpan {
2571                    line: line_num,
2572                    end_line: end_line_idx + 1,
2573                    start_col: col_start,
2574                    end_col: col_end,
2575                    byte_offset: start_pos,
2576                    byte_end: end_pos,
2577                    backtick_count,
2578                    content: span_content,
2579                });
2580            }
2581        }
2582
2583        // Sort by position to ensure consistent ordering
2584        code_spans.sort_by_key(|span| span.byte_offset);
2585
2586        code_spans
2587    }
2588
2589    /// Parse all list blocks in the content (legacy line-by-line approach)
2590    ///
2591    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2592    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
2593    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
2594    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
2595    ///   treated as list continuation (based on the list marker width)
2596    ///
2597    /// When a new list item is encountered, we check if list-breaking content was seen
2598    /// since the last item. If so, we start a new list block.
2599    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2600        // Minimum indentation for unordered list continuation per CommonMark spec
2601        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2602
2603        /// Initialize or reset the forward-scanning tracking state.
2604        /// This helper eliminates code duplication across three initialization sites.
2605        #[inline]
2606        fn reset_tracking_state(
2607            list_item: &ListItemInfo,
2608            has_list_breaking_content: &mut bool,
2609            min_continuation: &mut usize,
2610        ) {
2611            *has_list_breaking_content = false;
2612            let marker_width = if list_item.is_ordered {
2613                list_item.marker.len() + 1 // Ordered markers need space after period/paren
2614            } else {
2615                list_item.marker.len()
2616            };
2617            *min_continuation = if list_item.is_ordered {
2618                marker_width
2619            } else {
2620                UNORDERED_LIST_MIN_CONTINUATION_INDENT
2621            };
2622        }
2623
2624        // Pre-size based on lines that could be list items
2625        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
2626        let mut current_block: Option<ListBlock> = None;
2627        let mut last_list_item_line = 0;
2628        let mut current_indent_level = 0;
2629        let mut last_marker_width = 0;
2630
2631        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
2632        let mut has_list_breaking_content_since_last_item = false;
2633        let mut min_continuation_for_tracking = 0;
2634
2635        for (line_idx, line_info) in lines.iter().enumerate() {
2636            let line_num = line_idx + 1;
2637
2638            // Enhanced code block handling using Design #3's context analysis
2639            if line_info.in_code_block {
2640                if let Some(ref mut block) = current_block {
2641                    // Calculate minimum indentation for list continuation
2642                    let min_continuation_indent =
2643                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2644
2645                    // Analyze code block context using the three-tier classification
2646                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2647
2648                    match context {
2649                        CodeBlockContext::Indented => {
2650                            // Code block is properly indented - continues the list
2651                            block.end_line = line_num;
2652                            continue;
2653                        }
2654                        CodeBlockContext::Standalone => {
2655                            // Code block separates lists - end current block
2656                            let completed_block = current_block.take().unwrap();
2657                            list_blocks.push(completed_block);
2658                            continue;
2659                        }
2660                        CodeBlockContext::Adjacent => {
2661                            // Edge case - use conservative behavior (continue list)
2662                            block.end_line = line_num;
2663                            continue;
2664                        }
2665                    }
2666                } else {
2667                    // No current list block - skip code block lines
2668                    continue;
2669                }
2670            }
2671
2672            // Extract blockquote prefix if any
2673            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2674                caps.get(0).unwrap().as_str().to_string()
2675            } else {
2676                String::new()
2677            };
2678
2679            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
2680            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
2681            if current_block.is_some()
2682                && line_info.list_item.is_none()
2683                && !line_info.is_blank
2684                && !line_info.in_code_span_continuation
2685            {
2686                let line_content = line_info.content(content).trim();
2687
2688                // Check for structural separators that break lists
2689                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
2690                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
2691                // as they indicate improper indentation rather than lazy continuation.
2692                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2693                let breaks_list = line_info.heading.is_some()
2694                    || line_content.starts_with("---")
2695                    || line_content.starts_with("***")
2696                    || line_content.starts_with("___")
2697                    || crate::utils::skip_context::is_table_line(line_content)
2698                    || line_content.starts_with(">")
2699                    || (line_info.indent > 0
2700                        && line_info.indent < min_continuation_for_tracking
2701                        && !is_lazy_continuation);
2702
2703                if breaks_list {
2704                    has_list_breaking_content_since_last_item = true;
2705                }
2706            }
2707
2708            // If this line is a code span continuation within an active list block,
2709            // extend the block's end_line to include this line (maintains list continuity)
2710            if line_info.in_code_span_continuation
2711                && line_info.list_item.is_none()
2712                && let Some(ref mut block) = current_block
2713            {
2714                block.end_line = line_num;
2715            }
2716
2717            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
2718            // properly indented lines within the list). This ensures the workaround at line 2448
2719            // works correctly when there are multiple continuation lines before a nested list item.
2720            // Also include lazy continuation lines (indent=0) per CommonMark spec.
2721            let is_valid_continuation =
2722                line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
2723            if !line_info.in_code_span_continuation
2724                && line_info.list_item.is_none()
2725                && !line_info.is_blank
2726                && !line_info.in_code_block
2727                && is_valid_continuation
2728                && let Some(ref mut block) = current_block
2729            {
2730                block.end_line = line_num;
2731            }
2732
2733            // Check if this line is a list item
2734            if let Some(list_item) = &line_info.list_item {
2735                // Calculate nesting level based on indentation
2736                let item_indent = list_item.marker_column;
2737                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
2738
2739                if let Some(ref mut block) = current_block {
2740                    // Check if this continues the current block
2741                    // For nested lists, we need to check if this is a nested item (higher nesting level)
2742                    // or a continuation at the same or lower level
2743                    let is_nested = nesting > block.nesting_level;
2744                    let same_type =
2745                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2746                    let same_context = block.blockquote_prefix == blockquote_prefix;
2747                    // Allow one blank line after last item, or lines immediately after block content
2748                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2749
2750                    // For unordered lists, also check marker consistency
2751                    let marker_compatible =
2752                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2753
2754                    // O(1) check: Use the tracked variable instead of O(n) nested loop
2755                    // This eliminates the quadratic bottleneck from issue #148
2756                    let has_non_list_content = has_list_breaking_content_since_last_item;
2757
2758                    // A list continues if:
2759                    // 1. It's a nested item (indented more than the parent), OR
2760                    // 2. It's the same type at the same level with reasonable distance
2761                    let mut continues_list = if is_nested {
2762                        // Nested items always continue the list if they're in the same context
2763                        same_context && reasonable_distance && !has_non_list_content
2764                    } else {
2765                        // Same-level items need to match type and markers
2766                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2767                    };
2768
2769                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
2770                    // This handles edge cases where content patterns might otherwise split lists incorrectly
2771                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2772                        // Check if the previous line was a list item or a continuation of a list item
2773                        // (including lazy continuation lines)
2774                        if block.item_lines.contains(&(line_num - 1)) {
2775                            // They're consecutive list items - force them to be in the same list
2776                            continues_list = true;
2777                        } else {
2778                            // Previous line is a continuation line within this block
2779                            // (e.g., lazy continuation with indent=0)
2780                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
2781                            continues_list = true;
2782                        }
2783                    }
2784
2785                    if continues_list {
2786                        // Extend current block
2787                        block.end_line = line_num;
2788                        block.item_lines.push(line_num);
2789
2790                        // Update max marker width
2791                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2792                            list_item.marker.len() + 1
2793                        } else {
2794                            list_item.marker.len()
2795                        });
2796
2797                        // Update marker consistency for unordered lists
2798                        if !block.is_ordered
2799                            && block.marker.is_some()
2800                            && block.marker.as_ref() != Some(&list_item.marker)
2801                        {
2802                            // Mixed markers, clear the marker field
2803                            block.marker = None;
2804                        }
2805
2806                        // Reset tracked state for issue #148 optimization
2807                        reset_tracking_state(
2808                            list_item,
2809                            &mut has_list_breaking_content_since_last_item,
2810                            &mut min_continuation_for_tracking,
2811                        );
2812                    } else {
2813                        // End current block and start a new one
2814
2815                        list_blocks.push(block.clone());
2816
2817                        *block = ListBlock {
2818                            start_line: line_num,
2819                            end_line: line_num,
2820                            is_ordered: list_item.is_ordered,
2821                            marker: if list_item.is_ordered {
2822                                None
2823                            } else {
2824                                Some(list_item.marker.clone())
2825                            },
2826                            blockquote_prefix: blockquote_prefix.clone(),
2827                            item_lines: vec![line_num],
2828                            nesting_level: nesting,
2829                            max_marker_width: if list_item.is_ordered {
2830                                list_item.marker.len() + 1
2831                            } else {
2832                                list_item.marker.len()
2833                            },
2834                        };
2835
2836                        // Initialize tracked state for new block (issue #148 optimization)
2837                        reset_tracking_state(
2838                            list_item,
2839                            &mut has_list_breaking_content_since_last_item,
2840                            &mut min_continuation_for_tracking,
2841                        );
2842                    }
2843                } else {
2844                    // Start a new block
2845                    current_block = Some(ListBlock {
2846                        start_line: line_num,
2847                        end_line: line_num,
2848                        is_ordered: list_item.is_ordered,
2849                        marker: if list_item.is_ordered {
2850                            None
2851                        } else {
2852                            Some(list_item.marker.clone())
2853                        },
2854                        blockquote_prefix,
2855                        item_lines: vec![line_num],
2856                        nesting_level: nesting,
2857                        max_marker_width: list_item.marker.len(),
2858                    });
2859
2860                    // Initialize tracked state for new block (issue #148 optimization)
2861                    reset_tracking_state(
2862                        list_item,
2863                        &mut has_list_breaking_content_since_last_item,
2864                        &mut min_continuation_for_tracking,
2865                    );
2866                }
2867
2868                last_list_item_line = line_num;
2869                current_indent_level = item_indent;
2870                last_marker_width = if list_item.is_ordered {
2871                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
2872                } else {
2873                    list_item.marker.len()
2874                };
2875            } else if let Some(ref mut block) = current_block {
2876                // Not a list item - check if it continues the current block
2877
2878                // For MD032 compatibility, we use a simple approach:
2879                // - Indented lines continue the list
2880                // - Blank lines followed by indented content continue the list
2881                // - Everything else ends the list
2882
2883                // Check if the last line in the list block ended with a backslash (hard line break)
2884                // This handles cases where list items use backslash for hard line breaks
2885                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2886                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2887                } else {
2888                    false
2889                };
2890
2891                // Calculate minimum indentation for list continuation
2892                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
2893                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
2894                let min_continuation_indent = if block.is_ordered {
2895                    current_indent_level + last_marker_width
2896                } else {
2897                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
2898                };
2899
2900                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2901                    // Indented line or backslash continuation continues the list
2902                    block.end_line = line_num;
2903                } else if line_info.is_blank {
2904                    // Blank line - check if it's internal to the list or ending it
2905                    // We only include blank lines that are followed by more list content
2906                    let mut check_idx = line_idx + 1;
2907                    let mut found_continuation = false;
2908
2909                    // Skip additional blank lines
2910                    while check_idx < lines.len() && lines[check_idx].is_blank {
2911                        check_idx += 1;
2912                    }
2913
2914                    if check_idx < lines.len() {
2915                        let next_line = &lines[check_idx];
2916                        // Check if followed by indented content (list continuation)
2917                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2918                            found_continuation = true;
2919                        }
2920                        // Check if followed by another list item at the same level
2921                        else if !next_line.in_code_block
2922                            && next_line.list_item.is_some()
2923                            && let Some(item) = &next_line.list_item
2924                        {
2925                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2926                                .find(next_line.content(content))
2927                                .map_or(String::new(), |m| m.as_str().to_string());
2928                            if item.marker_column == current_indent_level
2929                                && item.is_ordered == block.is_ordered
2930                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2931                            {
2932                                // Check if there was meaningful content between the list items (unused now)
2933                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
2934                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2935                                    if let Some(between_line) = lines.get(idx) {
2936                                        let between_content = between_line.content(content);
2937                                        let trimmed = between_content.trim();
2938                                        // Skip empty lines
2939                                        if trimmed.is_empty() {
2940                                            return false;
2941                                        }
2942                                        // Check for meaningful content
2943                                        let line_indent = between_content.len() - between_content.trim_start().len();
2944
2945                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
2946                                        if trimmed.starts_with("```")
2947                                            || trimmed.starts_with("~~~")
2948                                            || trimmed.starts_with("---")
2949                                            || trimmed.starts_with("***")
2950                                            || trimmed.starts_with("___")
2951                                            || trimmed.starts_with(">")
2952                                            || crate::utils::skip_context::is_table_line(trimmed)
2953                                            || between_line.heading.is_some()
2954                                        {
2955                                            return true; // These are structural separators - meaningful content that breaks lists
2956                                        }
2957
2958                                        // Only properly indented content continues the list
2959                                        line_indent >= min_continuation_indent
2960                                    } else {
2961                                        false
2962                                    }
2963                                });
2964
2965                                if block.is_ordered {
2966                                    // For ordered lists: don't continue if there are structural separators
2967                                    // Check if there are structural separators between the list items
2968                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2969                                        if let Some(between_line) = lines.get(idx) {
2970                                            let trimmed = between_line.content(content).trim();
2971                                            if trimmed.is_empty() {
2972                                                return false;
2973                                            }
2974                                            // Check for structural separators that break lists
2975                                            trimmed.starts_with("```")
2976                                                || trimmed.starts_with("~~~")
2977                                                || trimmed.starts_with("---")
2978                                                || trimmed.starts_with("***")
2979                                                || trimmed.starts_with("___")
2980                                                || trimmed.starts_with(">")
2981                                                || crate::utils::skip_context::is_table_line(trimmed)
2982                                                || between_line.heading.is_some()
2983                                        } else {
2984                                            false
2985                                        }
2986                                    });
2987                                    found_continuation = !has_structural_separators;
2988                                } else {
2989                                    // For unordered lists: also check for structural separators
2990                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2991                                        if let Some(between_line) = lines.get(idx) {
2992                                            let trimmed = between_line.content(content).trim();
2993                                            if trimmed.is_empty() {
2994                                                return false;
2995                                            }
2996                                            // Check for structural separators that break lists
2997                                            trimmed.starts_with("```")
2998                                                || trimmed.starts_with("~~~")
2999                                                || trimmed.starts_with("---")
3000                                                || trimmed.starts_with("***")
3001                                                || trimmed.starts_with("___")
3002                                                || trimmed.starts_with(">")
3003                                                || crate::utils::skip_context::is_table_line(trimmed)
3004                                                || between_line.heading.is_some()
3005                                        } else {
3006                                            false
3007                                        }
3008                                    });
3009                                    found_continuation = !has_structural_separators;
3010                                }
3011                            }
3012                        }
3013                    }
3014
3015                    if found_continuation {
3016                        // Include the blank line in the block
3017                        block.end_line = line_num;
3018                    } else {
3019                        // Blank line ends the list - don't include it
3020                        list_blocks.push(block.clone());
3021                        current_block = None;
3022                    }
3023                } else {
3024                    // Check for lazy continuation - non-indented line immediately after a list item
3025                    // But only if the line has sufficient indentation for the list type
3026                    let min_required_indent = if block.is_ordered {
3027                        current_indent_level + last_marker_width
3028                    } else {
3029                        current_indent_level + 2
3030                    };
3031
3032                    // For lazy continuation to apply, the line must either:
3033                    // 1. Have no indentation (true lazy continuation)
3034                    // 2. Have sufficient indentation for the list type
3035                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3036                    let line_content = line_info.content(content).trim();
3037
3038                    // Check for table-like patterns
3039                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3040
3041                    let is_structural_separator = line_info.heading.is_some()
3042                        || line_content.starts_with("```")
3043                        || line_content.starts_with("~~~")
3044                        || line_content.starts_with("---")
3045                        || line_content.starts_with("***")
3046                        || line_content.starts_with("___")
3047                        || line_content.starts_with(">")
3048                        || looks_like_table;
3049
3050                    // Allow lazy continuation if we're still within the same list block
3051                    // (not just immediately after a list item)
3052                    let is_lazy_continuation = !is_structural_separator
3053                        && !line_info.is_blank
3054                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3055
3056                    if is_lazy_continuation {
3057                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3058                        // it's probably not a continuation
3059                        let content_to_check = if !blockquote_prefix.is_empty() {
3060                            // Strip blockquote prefix to check the actual content
3061                            line_info
3062                                .content(content)
3063                                .strip_prefix(&blockquote_prefix)
3064                                .unwrap_or(line_info.content(content))
3065                                .trim()
3066                        } else {
3067                            line_info.content(content).trim()
3068                        };
3069
3070                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3071
3072                        // If it starts with uppercase and the previous line ended with punctuation,
3073                        // it's likely a new paragraph, not a continuation
3074                        if starts_with_uppercase && last_list_item_line > 0 {
3075                            // This looks like a new paragraph
3076                            list_blocks.push(block.clone());
3077                            current_block = None;
3078                        } else {
3079                            // This is a lazy continuation line
3080                            block.end_line = line_num;
3081                        }
3082                    } else {
3083                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3084                        list_blocks.push(block.clone());
3085                        current_block = None;
3086                    }
3087                }
3088            }
3089        }
3090
3091        // Don't forget the last block
3092        if let Some(block) = current_block {
3093            list_blocks.push(block);
3094        }
3095
3096        // Merge adjacent blocks that should be one
3097        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3098
3099        list_blocks
3100    }
3101
3102    /// Compute character frequency for fast content analysis
3103    fn compute_char_frequency(content: &str) -> CharFrequency {
3104        let mut frequency = CharFrequency::default();
3105
3106        for ch in content.chars() {
3107            match ch {
3108                '#' => frequency.hash_count += 1,
3109                '*' => frequency.asterisk_count += 1,
3110                '_' => frequency.underscore_count += 1,
3111                '-' => frequency.hyphen_count += 1,
3112                '+' => frequency.plus_count += 1,
3113                '>' => frequency.gt_count += 1,
3114                '|' => frequency.pipe_count += 1,
3115                '[' => frequency.bracket_count += 1,
3116                '`' => frequency.backtick_count += 1,
3117                '<' => frequency.lt_count += 1,
3118                '!' => frequency.exclamation_count += 1,
3119                '\n' => frequency.newline_count += 1,
3120                _ => {}
3121            }
3122        }
3123
3124        frequency
3125    }
3126
3127    /// Parse HTML tags in the content
3128    fn parse_html_tags(
3129        content: &str,
3130        lines: &[LineInfo],
3131        code_blocks: &[(usize, usize)],
3132        flavor: MarkdownFlavor,
3133    ) -> Vec<HtmlTag> {
3134        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3135            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3136
3137        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3138
3139        for cap in HTML_TAG_REGEX.captures_iter(content) {
3140            let full_match = cap.get(0).unwrap();
3141            let match_start = full_match.start();
3142            let match_end = full_match.end();
3143
3144            // Skip if in code block
3145            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3146                continue;
3147            }
3148
3149            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3150            let tag_name_original = cap.get(2).unwrap().as_str();
3151            let tag_name = tag_name_original.to_lowercase();
3152            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3153
3154            // Skip JSX components in MDX files (tags starting with uppercase letter)
3155            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3156            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3157                continue;
3158            }
3159
3160            // Find which line this tag is on
3161            let mut line_num = 1;
3162            let mut col_start = match_start;
3163            let mut col_end = match_end;
3164            for (idx, line_info) in lines.iter().enumerate() {
3165                if match_start >= line_info.byte_offset {
3166                    line_num = idx + 1;
3167                    col_start = match_start - line_info.byte_offset;
3168                    col_end = match_end - line_info.byte_offset;
3169                } else {
3170                    break;
3171                }
3172            }
3173
3174            html_tags.push(HtmlTag {
3175                line: line_num,
3176                start_col: col_start,
3177                end_col: col_end,
3178                byte_offset: match_start,
3179                byte_end: match_end,
3180                tag_name,
3181                is_closing,
3182                is_self_closing,
3183                raw_content: full_match.as_str().to_string(),
3184            });
3185        }
3186
3187        html_tags
3188    }
3189
3190    /// Parse emphasis spans in the content
3191    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3192        static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3193            LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3194
3195        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3196
3197        for cap in EMPHASIS_REGEX.captures_iter(content) {
3198            let full_match = cap.get(0).unwrap();
3199            let match_start = full_match.start();
3200            let match_end = full_match.end();
3201
3202            // Skip if in code block
3203            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3204                continue;
3205            }
3206
3207            let opening_markers = cap.get(1).unwrap().as_str();
3208            let content_part = cap.get(2).unwrap().as_str();
3209            let closing_markers = cap.get(3).unwrap().as_str();
3210
3211            // Validate matching markers
3212            if opening_markers.chars().next() != closing_markers.chars().next()
3213                || opening_markers.len() != closing_markers.len()
3214            {
3215                continue;
3216            }
3217
3218            let marker = opening_markers.chars().next().unwrap();
3219            let marker_count = opening_markers.len();
3220
3221            // Find which line this emphasis is on
3222            let mut line_num = 1;
3223            let mut col_start = match_start;
3224            let mut col_end = match_end;
3225            for (idx, line_info) in lines.iter().enumerate() {
3226                if match_start >= line_info.byte_offset {
3227                    line_num = idx + 1;
3228                    col_start = match_start - line_info.byte_offset;
3229                    col_end = match_end - line_info.byte_offset;
3230                } else {
3231                    break;
3232                }
3233            }
3234
3235            emphasis_spans.push(EmphasisSpan {
3236                line: line_num,
3237                start_col: col_start,
3238                end_col: col_end,
3239                byte_offset: match_start,
3240                byte_end: match_end,
3241                marker,
3242                marker_count,
3243                content: content_part.to_string(),
3244            });
3245        }
3246
3247        emphasis_spans
3248    }
3249
3250    /// Parse table rows in the content
3251    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3252        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3253
3254        for (line_idx, line_info) in lines.iter().enumerate() {
3255            // Skip lines in code blocks or blank lines
3256            if line_info.in_code_block || line_info.is_blank {
3257                continue;
3258            }
3259
3260            let line = line_info.content(content);
3261            let line_num = line_idx + 1;
3262
3263            // Check if this line contains pipes (potential table row)
3264            if !line.contains('|') {
3265                continue;
3266            }
3267
3268            // Count columns by splitting on pipes
3269            let parts: Vec<&str> = line.split('|').collect();
3270            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3271
3272            // Check if this is a separator row
3273            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3274            let mut column_alignments = Vec::new();
3275
3276            if is_separator {
3277                for part in &parts[1..parts.len() - 1] {
3278                    // Skip first and last empty parts
3279                    let trimmed = part.trim();
3280                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3281                        "center".to_string()
3282                    } else if trimmed.ends_with(':') {
3283                        "right".to_string()
3284                    } else if trimmed.starts_with(':') {
3285                        "left".to_string()
3286                    } else {
3287                        "none".to_string()
3288                    };
3289                    column_alignments.push(alignment);
3290                }
3291            }
3292
3293            table_rows.push(TableRow {
3294                line: line_num,
3295                is_separator,
3296                column_count,
3297                column_alignments,
3298            });
3299        }
3300
3301        table_rows
3302    }
3303
3304    /// Parse bare URLs and emails in the content
3305    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3306        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3307
3308        // Check for bare URLs (not in angle brackets or markdown links)
3309        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3310            let full_match = cap.get(0).unwrap();
3311            let match_start = full_match.start();
3312            let match_end = full_match.end();
3313
3314            // Skip if in code block
3315            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3316                continue;
3317            }
3318
3319            // Skip if already in angle brackets or markdown links
3320            let preceding_char = if match_start > 0 {
3321                content.chars().nth(match_start - 1)
3322            } else {
3323                None
3324            };
3325            let following_char = content.chars().nth(match_end);
3326
3327            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3328                continue;
3329            }
3330            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3331                continue;
3332            }
3333
3334            let url = full_match.as_str();
3335            let url_type = if url.starts_with("https://") {
3336                "https"
3337            } else if url.starts_with("http://") {
3338                "http"
3339            } else if url.starts_with("ftp://") {
3340                "ftp"
3341            } else {
3342                "other"
3343            };
3344
3345            // Find which line this URL is on
3346            let mut line_num = 1;
3347            let mut col_start = match_start;
3348            let mut col_end = match_end;
3349            for (idx, line_info) in lines.iter().enumerate() {
3350                if match_start >= line_info.byte_offset {
3351                    line_num = idx + 1;
3352                    col_start = match_start - line_info.byte_offset;
3353                    col_end = match_end - line_info.byte_offset;
3354                } else {
3355                    break;
3356                }
3357            }
3358
3359            bare_urls.push(BareUrl {
3360                line: line_num,
3361                start_col: col_start,
3362                end_col: col_end,
3363                byte_offset: match_start,
3364                byte_end: match_end,
3365                url: url.to_string(),
3366                url_type: url_type.to_string(),
3367            });
3368        }
3369
3370        // Check for bare email addresses
3371        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3372            let full_match = cap.get(0).unwrap();
3373            let match_start = full_match.start();
3374            let match_end = full_match.end();
3375
3376            // Skip if in code block
3377            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3378                continue;
3379            }
3380
3381            // Skip if already in angle brackets or markdown links
3382            let preceding_char = if match_start > 0 {
3383                content.chars().nth(match_start - 1)
3384            } else {
3385                None
3386            };
3387            let following_char = content.chars().nth(match_end);
3388
3389            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3390                continue;
3391            }
3392            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3393                continue;
3394            }
3395
3396            let email = full_match.as_str();
3397
3398            // Find which line this email is on
3399            let mut line_num = 1;
3400            let mut col_start = match_start;
3401            let mut col_end = match_end;
3402            for (idx, line_info) in lines.iter().enumerate() {
3403                if match_start >= line_info.byte_offset {
3404                    line_num = idx + 1;
3405                    col_start = match_start - line_info.byte_offset;
3406                    col_end = match_end - line_info.byte_offset;
3407                } else {
3408                    break;
3409                }
3410            }
3411
3412            bare_urls.push(BareUrl {
3413                line: line_num,
3414                start_col: col_start,
3415                end_col: col_end,
3416                byte_offset: match_start,
3417                byte_end: match_end,
3418                url: email.to_string(),
3419                url_type: "email".to_string(),
3420            });
3421        }
3422
3423        bare_urls
3424    }
3425
3426    /// Get an iterator over valid CommonMark headings
3427    ///
3428    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
3429    /// that should be flagged by MD018 but should not be processed by other heading rules.
3430    ///
3431    /// # Examples
3432    ///
3433    /// ```rust
3434    /// use rumdl_lib::lint_context::LintContext;
3435    /// use rumdl_lib::config::MarkdownFlavor;
3436    ///
3437    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
3438    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3439    ///
3440    /// for heading in ctx.valid_headings() {
3441    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
3442    /// }
3443    /// // Only prints valid headings, skips `#NoSpace`
3444    /// ```
3445    #[must_use]
3446    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3447        ValidHeadingsIter::new(&self.lines)
3448    }
3449
3450    /// Check if the document contains any valid CommonMark headings
3451    ///
3452    /// Returns `true` if there is at least one heading with proper space after `#`.
3453    #[must_use]
3454    pub fn has_valid_headings(&self) -> bool {
3455        self.lines
3456            .iter()
3457            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3458    }
3459}
3460
3461/// Merge adjacent list blocks that should be treated as one
3462fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3463    if list_blocks.len() < 2 {
3464        return;
3465    }
3466
3467    let mut merger = ListBlockMerger::new(content, lines);
3468    *list_blocks = merger.merge(list_blocks);
3469}
3470
3471/// Helper struct to manage the complex logic of merging list blocks
3472struct ListBlockMerger<'a> {
3473    content: &'a str,
3474    lines: &'a [LineInfo],
3475}
3476
3477impl<'a> ListBlockMerger<'a> {
3478    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3479        Self { content, lines }
3480    }
3481
3482    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3483        let mut merged = Vec::with_capacity(list_blocks.len());
3484        let mut current = list_blocks[0].clone();
3485
3486        for next in list_blocks.iter().skip(1) {
3487            if self.should_merge_blocks(&current, next) {
3488                current = self.merge_two_blocks(current, next);
3489            } else {
3490                merged.push(current);
3491                current = next.clone();
3492            }
3493        }
3494
3495        merged.push(current);
3496        merged
3497    }
3498
3499    /// Determine if two adjacent list blocks should be merged
3500    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3501        // Basic compatibility checks
3502        if !self.blocks_are_compatible(current, next) {
3503            return false;
3504        }
3505
3506        // Check spacing and content between blocks
3507        let spacing = self.analyze_spacing_between(current, next);
3508        match spacing {
3509            BlockSpacing::Consecutive => true,
3510            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3511            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3512                self.can_merge_with_content_between(current, next)
3513            }
3514        }
3515    }
3516
3517    /// Check if blocks have compatible structure for merging
3518    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3519        current.is_ordered == next.is_ordered
3520            && current.blockquote_prefix == next.blockquote_prefix
3521            && current.nesting_level == next.nesting_level
3522    }
3523
3524    /// Analyze the spacing between two list blocks
3525    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3526        let gap = next.start_line - current.end_line;
3527
3528        match gap {
3529            1 => BlockSpacing::Consecutive,
3530            2 => BlockSpacing::SingleBlank,
3531            _ if gap > 2 => {
3532                if self.has_only_blank_lines_between(current, next) {
3533                    BlockSpacing::MultipleBlanks
3534                } else {
3535                    BlockSpacing::ContentBetween
3536                }
3537            }
3538            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
3539        }
3540    }
3541
3542    /// Check if unordered lists can be merged with a single blank line between
3543    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3544        // Check if there are structural separators between the blocks
3545        // If has_meaningful_content_between returns true, it means there are structural separators
3546        if has_meaningful_content_between(self.content, current, next, self.lines) {
3547            return false; // Structural separators prevent merging
3548        }
3549
3550        // Only merge unordered lists with same marker across single blank
3551        !current.is_ordered && current.marker == next.marker
3552    }
3553
3554    /// Check if ordered lists can be merged when there's content between them
3555    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3556        // Do not merge lists if there are structural separators between them
3557        if has_meaningful_content_between(self.content, current, next, self.lines) {
3558            return false; // Structural separators prevent merging
3559        }
3560
3561        // Only consider merging ordered lists if there's no structural content between
3562        current.is_ordered && next.is_ordered
3563    }
3564
3565    /// Check if there are only blank lines between blocks
3566    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3567        for line_num in (current.end_line + 1)..next.start_line {
3568            if let Some(line_info) = self.lines.get(line_num - 1)
3569                && !line_info.content(self.content).trim().is_empty()
3570            {
3571                return false;
3572            }
3573        }
3574        true
3575    }
3576
3577    /// Merge two compatible list blocks into one
3578    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3579        current.end_line = next.end_line;
3580        current.item_lines.extend_from_slice(&next.item_lines);
3581
3582        // Update max marker width
3583        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3584
3585        // Handle marker consistency for unordered lists
3586        if !current.is_ordered && self.markers_differ(&current, next) {
3587            current.marker = None; // Mixed markers
3588        }
3589
3590        current
3591    }
3592
3593    /// Check if two blocks have different markers
3594    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3595        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3596    }
3597}
3598
3599/// Types of spacing between list blocks
3600#[derive(Debug, PartialEq)]
3601enum BlockSpacing {
3602    Consecutive,    // No gap between blocks
3603    SingleBlank,    // One blank line between blocks
3604    MultipleBlanks, // Multiple blank lines but no content
3605    ContentBetween, // Content exists between blocks
3606}
3607
3608/// Check if there's meaningful content (not just blank lines) between two list blocks
3609fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3610    // Check lines between current.end_line and next.start_line
3611    for line_num in (current.end_line + 1)..next.start_line {
3612        if let Some(line_info) = lines.get(line_num - 1) {
3613            // Convert to 0-indexed
3614            let trimmed = line_info.content(content).trim();
3615
3616            // Skip empty lines
3617            if trimmed.is_empty() {
3618                continue;
3619            }
3620
3621            // Check for structural separators that should separate lists (CommonMark compliant)
3622
3623            // Headings separate lists
3624            if line_info.heading.is_some() {
3625                return true; // Has meaningful content - headings separate lists
3626            }
3627
3628            // Horizontal rules separate lists (---, ***, ___)
3629            if is_horizontal_rule(trimmed) {
3630                return true; // Has meaningful content - horizontal rules separate lists
3631            }
3632
3633            // Tables separate lists
3634            if crate::utils::skip_context::is_table_line(trimmed) {
3635                return true; // Has meaningful content - tables separate lists
3636            }
3637
3638            // Blockquotes separate lists
3639            if trimmed.starts_with('>') {
3640                return true; // Has meaningful content - blockquotes separate lists
3641            }
3642
3643            // Code block fences separate lists (unless properly indented as list content)
3644            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3645                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3646
3647                // Check if this code block is properly indented as list continuation
3648                let min_continuation_indent = if current.is_ordered {
3649                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
3650                } else {
3651                    current.nesting_level + 2
3652                };
3653
3654                if line_indent < min_continuation_indent {
3655                    // This is a standalone code block that separates lists
3656                    return true; // Has meaningful content - standalone code blocks separate lists
3657                }
3658            }
3659
3660            // Check if this line has proper indentation for list continuation
3661            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3662
3663            // Calculate minimum indentation needed to be list continuation
3664            let min_indent = if current.is_ordered {
3665                current.nesting_level + current.max_marker_width
3666            } else {
3667                current.nesting_level + 2
3668            };
3669
3670            // If the line is not indented enough to be list continuation, it's meaningful content
3671            if line_indent < min_indent {
3672                return true; // Has meaningful content - content not indented as list continuation
3673            }
3674
3675            // If we reach here, the line is properly indented as list continuation
3676            // Continue checking other lines
3677        }
3678    }
3679
3680    // Only blank lines or properly indented list continuation content between blocks
3681    false
3682}
3683
3684/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
3685/// CommonMark rules for thematic breaks (horizontal rules):
3686/// - May have 0-3 spaces of leading indentation (but NOT tabs)
3687/// - Must have 3+ of the same character (-, *, or _)
3688/// - May have spaces between characters
3689/// - No other characters allowed
3690pub fn is_horizontal_rule_line(line: &str) -> bool {
3691    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
3692    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3693    if leading_spaces > 3 || line.starts_with('\t') {
3694        return false;
3695    }
3696
3697    is_horizontal_rule_content(line.trim())
3698}
3699
3700/// Check if trimmed content matches horizontal rule pattern.
3701/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
3702pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3703    if trimmed.len() < 3 {
3704        return false;
3705    }
3706
3707    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
3708    let chars: Vec<char> = trimmed.chars().collect();
3709    if let Some(&first_char) = chars.first()
3710        && (first_char == '-' || first_char == '*' || first_char == '_')
3711    {
3712        let mut count = 0;
3713        for &ch in &chars {
3714            if ch == first_char {
3715                count += 1;
3716            } else if ch != ' ' && ch != '\t' {
3717                return false; // Non-matching, non-whitespace character
3718            }
3719        }
3720        return count >= 3;
3721    }
3722    false
3723}
3724
3725/// Backwards-compatible alias for `is_horizontal_rule_content`
3726pub fn is_horizontal_rule(trimmed: &str) -> bool {
3727    is_horizontal_rule_content(trimmed)
3728}
3729
3730/// Check if content contains patterns that cause the markdown crate to panic
3731#[cfg(test)]
3732mod tests {
3733    use super::*;
3734
3735    #[test]
3736    fn test_empty_content() {
3737        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3738        assert_eq!(ctx.content, "");
3739        assert_eq!(ctx.line_offsets, vec![0]);
3740        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3741        assert_eq!(ctx.lines.len(), 0);
3742    }
3743
3744    #[test]
3745    fn test_single_line() {
3746        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3747        assert_eq!(ctx.content, "# Hello");
3748        assert_eq!(ctx.line_offsets, vec![0]);
3749        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3750        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3751    }
3752
3753    #[test]
3754    fn test_multi_line() {
3755        let content = "# Title\n\nSecond line\nThird line";
3756        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3757        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3758        // Test offset to line/col
3759        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
3760        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
3761        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
3762        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
3763        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
3764    }
3765
3766    #[test]
3767    fn test_line_info() {
3768        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
3769        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3770
3771        // Test line info
3772        assert_eq!(ctx.lines.len(), 7);
3773
3774        // Line 1: "# Title"
3775        let line1 = &ctx.lines[0];
3776        assert_eq!(line1.content(ctx.content), "# Title");
3777        assert_eq!(line1.byte_offset, 0);
3778        assert_eq!(line1.indent, 0);
3779        assert!(!line1.is_blank);
3780        assert!(!line1.in_code_block);
3781        assert!(line1.list_item.is_none());
3782
3783        // Line 2: "    indented"
3784        let line2 = &ctx.lines[1];
3785        assert_eq!(line2.content(ctx.content), "    indented");
3786        assert_eq!(line2.byte_offset, 8);
3787        assert_eq!(line2.indent, 4);
3788        assert!(!line2.is_blank);
3789
3790        // Line 3: "" (blank)
3791        let line3 = &ctx.lines[2];
3792        assert_eq!(line3.content(ctx.content), "");
3793        assert!(line3.is_blank);
3794
3795        // Test helper methods
3796        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3797        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3798        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3799        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3800    }
3801
3802    #[test]
3803    fn test_list_item_detection() {
3804        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
3805        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3806
3807        // Line 1: "- Unordered item"
3808        let line1 = &ctx.lines[0];
3809        assert!(line1.list_item.is_some());
3810        let list1 = line1.list_item.as_ref().unwrap();
3811        assert_eq!(list1.marker, "-");
3812        assert!(!list1.is_ordered);
3813        assert_eq!(list1.marker_column, 0);
3814        assert_eq!(list1.content_column, 2);
3815
3816        // Line 2: "  * Nested item"
3817        let line2 = &ctx.lines[1];
3818        assert!(line2.list_item.is_some());
3819        let list2 = line2.list_item.as_ref().unwrap();
3820        assert_eq!(list2.marker, "*");
3821        assert_eq!(list2.marker_column, 2);
3822
3823        // Line 3: "1. Ordered item"
3824        let line3 = &ctx.lines[2];
3825        assert!(line3.list_item.is_some());
3826        let list3 = line3.list_item.as_ref().unwrap();
3827        assert_eq!(list3.marker, "1.");
3828        assert!(list3.is_ordered);
3829        assert_eq!(list3.number, Some(1));
3830
3831        // Line 6: "Not a list"
3832        let line6 = &ctx.lines[5];
3833        assert!(line6.list_item.is_none());
3834    }
3835
3836    #[test]
3837    fn test_offset_to_line_col_edge_cases() {
3838        let content = "a\nb\nc";
3839        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3840        // line_offsets: [0, 2, 4]
3841        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
3842        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
3843        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
3844        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
3845        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
3846        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
3847    }
3848
3849    #[test]
3850    fn test_mdx_esm_blocks() {
3851        let content = r##"import {Chart} from './snowfall.js'
3852export const year = 2023
3853
3854# Last year's snowfall
3855
3856In {year}, the snowfall was above average.
3857It was followed by a warm spring which caused
3858flood conditions in many of the nearby rivers.
3859
3860<Chart color="#fcb32c" year={year} />
3861"##;
3862
3863        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3864
3865        // Check that lines 1 and 2 are marked as ESM blocks
3866        assert_eq!(ctx.lines.len(), 10);
3867        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3868        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3869        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3870        assert!(
3871            !ctx.lines[3].in_esm_block,
3872            "Line 4 (heading) should NOT be in_esm_block"
3873        );
3874        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3875        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3876    }
3877
3878    #[test]
3879    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3880        let content = r#"import {Chart} from './snowfall.js'
3881export const year = 2023
3882
3883# Last year's snowfall
3884"#;
3885
3886        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3887
3888        // ESM blocks should NOT be detected in Standard flavor
3889        assert!(
3890            !ctx.lines[0].in_esm_block,
3891            "Line 1 should NOT be in_esm_block in Standard flavor"
3892        );
3893        assert!(
3894            !ctx.lines[1].in_esm_block,
3895            "Line 2 should NOT be in_esm_block in Standard flavor"
3896        );
3897    }
3898}