rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12/// Macro for profiling sections - only active in non-WASM builds
13#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15    ($name:expr, $profile:expr, $code:expr) => {{
16        let start = std::time::Instant::now();
17        let result = $code;
18        if $profile {
19            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20        }
21        result
22    }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30// Comprehensive link pattern that captures both inline and reference links
31// Use (?s) flag to make . match newlines
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        r#"(?sx)
35        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36        (?:
37            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
38            |
39            \[([^\]]*)\]      # Reference ID in group 6
40        )"#
41    ).unwrap()
42});
43
44// Image pattern (similar to links but with ! prefix)
45// Use (?s) flag to make . match newlines
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(
48        r#"(?sx)
49        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50        (?:
51            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
52            |
53            \[([^\]]*)\]      # Reference ID in group 6
54        )"#
55    ).unwrap()
56});
57
58// Reference definition pattern
59static REF_DEF_PATTERN: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62// Pattern for bare URLs - uses centralized URL pattern from regex_cache
63
64// Pattern for email addresses
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68// Pattern for blockquote prefix in parse_list_blocks
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71/// Pre-computed information about a line
72#[derive(Debug, Clone)]
73pub struct LineInfo {
74    /// Byte offset where this line starts in the document
75    pub byte_offset: usize,
76    /// Length of the line in bytes (without newline)
77    pub byte_len: usize,
78    /// Number of bytes of leading whitespace (for substring extraction)
79    pub indent: usize,
80    /// Visual column width of leading whitespace (with proper tab expansion)
81    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
82    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
83    pub visual_indent: usize,
84    /// Whether the line is blank (empty or only whitespace)
85    pub is_blank: bool,
86    /// Whether this line is inside a code block
87    pub in_code_block: bool,
88    /// Whether this line is inside front matter
89    pub in_front_matter: bool,
90    /// Whether this line is inside an HTML block
91    pub in_html_block: bool,
92    /// Whether this line is inside an HTML comment
93    pub in_html_comment: bool,
94    /// List item information if this line starts a list item
95    pub list_item: Option<ListItemInfo>,
96    /// Heading information if this line is a heading
97    pub heading: Option<HeadingInfo>,
98    /// Blockquote information if this line is a blockquote
99    pub blockquote: Option<BlockquoteInfo>,
100    /// Whether this line is inside a mkdocstrings autodoc block
101    pub in_mkdocstrings: bool,
102    /// Whether this line is part of an ESM import/export block (MDX only)
103    pub in_esm_block: bool,
104    /// Whether this line is a continuation of a multi-line code span from a previous line
105    pub in_code_span_continuation: bool,
106    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
107    /// Pre-computed for consistent detection across all rules
108    pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112    /// Get the line content as a string slice from the source document
113    pub fn content<'a>(&self, source: &'a str) -> &'a str {
114        &source[self.byte_offset..self.byte_offset + self.byte_len]
115    }
116}
117
118/// Information about a list item
119#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121    /// The marker used (*, -, +, or number with . or ))
122    pub marker: String,
123    /// Whether it's ordered (true) or unordered (false)
124    pub is_ordered: bool,
125    /// The number for ordered lists
126    pub number: Option<usize>,
127    /// Column where the marker starts (0-based)
128    pub marker_column: usize,
129    /// Column where content after marker starts
130    pub content_column: usize,
131}
132
133/// Heading style type
134#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136    /// ATX style heading (# Heading)
137    ATX,
138    /// Setext style heading with = underline
139    Setext1,
140    /// Setext style heading with - underline
141    Setext2,
142}
143
144/// Parsed link information
145#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147    /// Line number (1-indexed)
148    pub line: usize,
149    /// Start column (0-indexed) in the line
150    pub start_col: usize,
151    /// End column (0-indexed) in the line
152    pub end_col: usize,
153    /// Byte offset in document
154    pub byte_offset: usize,
155    /// End byte offset in document
156    pub byte_end: usize,
157    /// Link text
158    pub text: Cow<'a, str>,
159    /// Link URL or reference
160    pub url: Cow<'a, str>,
161    /// Whether this is a reference link [text][ref] vs inline [text](url)
162    pub is_reference: bool,
163    /// Reference ID for reference links
164    pub reference_id: Option<Cow<'a, str>>,
165    /// Link type from pulldown-cmark
166    pub link_type: LinkType,
167}
168
169/// Information about a broken link reported by pulldown-cmark
170#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172    /// The reference text that couldn't be resolved
173    pub reference: String,
174    /// Byte span in the source document
175    pub span: std::ops::Range<usize>,
176}
177
178/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
179#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181    /// The footnote ID (without the ^ prefix)
182    pub id: String,
183    /// Line number (1-indexed)
184    pub line: usize,
185    /// Start byte offset in document
186    pub byte_offset: usize,
187    /// End byte offset in document
188    pub byte_end: usize,
189}
190
191/// Parsed image information
192#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194    /// Line number (1-indexed)
195    pub line: usize,
196    /// Start column (0-indexed) in the line
197    pub start_col: usize,
198    /// End column (0-indexed) in the line
199    pub end_col: usize,
200    /// Byte offset in document
201    pub byte_offset: usize,
202    /// End byte offset in document
203    pub byte_end: usize,
204    /// Alt text
205    pub alt_text: Cow<'a, str>,
206    /// Image URL or reference
207    pub url: Cow<'a, str>,
208    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
209    pub is_reference: bool,
210    /// Reference ID for reference images
211    pub reference_id: Option<Cow<'a, str>>,
212    /// Link type from pulldown-cmark
213    pub link_type: LinkType,
214}
215
216/// Reference definition [ref]: url "title"
217#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219    /// Line number (1-indexed)
220    pub line: usize,
221    /// Reference ID (normalized to lowercase)
222    pub id: String,
223    /// URL
224    pub url: String,
225    /// Optional title
226    pub title: Option<String>,
227    /// Byte offset where the reference definition starts
228    pub byte_offset: usize,
229    /// Byte offset where the reference definition ends
230    pub byte_end: usize,
231    /// Byte offset where the title starts (if present, includes quote)
232    pub title_byte_start: Option<usize>,
233    /// Byte offset where the title ends (if present, includes quote)
234    pub title_byte_end: Option<usize>,
235}
236
237/// Parsed code span information
238#[derive(Debug, Clone)]
239pub struct CodeSpan {
240    /// Line number where the code span starts (1-indexed)
241    pub line: usize,
242    /// Line number where the code span ends (1-indexed)
243    pub end_line: usize,
244    /// Start column (0-indexed) in the line
245    pub start_col: usize,
246    /// End column (0-indexed) in the line
247    pub end_col: usize,
248    /// Byte offset in document
249    pub byte_offset: usize,
250    /// End byte offset in document
251    pub byte_end: usize,
252    /// Number of backticks used (1, 2, 3, etc.)
253    pub backtick_count: usize,
254    /// Content inside the code span (without backticks)
255    pub content: String,
256}
257
258/// Information about a heading
259#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261    /// Heading level (1-6 for ATX, 1-2 for Setext)
262    pub level: u8,
263    /// Style of heading
264    pub style: HeadingStyle,
265    /// The heading marker (# characters or underline)
266    pub marker: String,
267    /// Column where the marker starts (0-based)
268    pub marker_column: usize,
269    /// Column where heading text starts
270    pub content_column: usize,
271    /// The heading text (without markers and without custom ID syntax)
272    pub text: String,
273    /// Custom header ID if present (e.g., from {#custom-id} syntax)
274    pub custom_id: Option<String>,
275    /// Original heading text including custom ID syntax
276    pub raw_text: String,
277    /// Whether it has a closing sequence (for ATX)
278    pub has_closing_sequence: bool,
279    /// The closing sequence if present
280    pub closing_sequence: String,
281    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
282    /// False for malformed headings like `#NoSpace` that MD018 should flag
283    pub is_valid: bool,
284}
285
286/// A valid heading from a filtered iteration
287///
288/// Only includes headings that are CommonMark-compliant (have space after #).
289/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
290#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292    /// The 1-indexed line number in the document
293    pub line_num: usize,
294    /// Reference to the heading information
295    pub heading: &'a HeadingInfo,
296    /// Reference to the full line info (for rules that need additional context)
297    pub line_info: &'a LineInfo,
298}
299
300/// Iterator over valid CommonMark headings in a document
301///
302/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
303/// but should not be processed by other heading rules.
304pub struct ValidHeadingsIter<'a> {
305    lines: &'a [LineInfo],
306    current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310    fn new(lines: &'a [LineInfo]) -> Self {
311        Self {
312            lines,
313            current_index: 0,
314        }
315    }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319    type Item = ValidHeading<'a>;
320
321    fn next(&mut self) -> Option<Self::Item> {
322        while self.current_index < self.lines.len() {
323            let idx = self.current_index;
324            self.current_index += 1;
325
326            let line_info = &self.lines[idx];
327            if let Some(heading) = &line_info.heading
328                && heading.is_valid
329            {
330                return Some(ValidHeading {
331                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
332                    heading,
333                    line_info,
334                });
335            }
336        }
337        None
338    }
339}
340
341/// Information about a blockquote line
342#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344    /// Nesting level (1 for >, 2 for >>, etc.)
345    pub nesting_level: usize,
346    /// The indentation before the blockquote marker
347    pub indent: String,
348    /// Column where the first > starts (0-based)
349    pub marker_column: usize,
350    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
351    pub prefix: String,
352    /// Content after the blockquote marker(s)
353    pub content: String,
354    /// Whether the line has no space after the marker
355    pub has_no_space_after_marker: bool,
356    /// Whether the line has multiple spaces after the marker
357    pub has_multiple_spaces_after_marker: bool,
358    /// Whether this is an empty blockquote line needing MD028 fix
359    pub needs_md028_fix: bool,
360}
361
362/// Information about a list block
363#[derive(Debug, Clone)]
364pub struct ListBlock {
365    /// Line number where the list starts (1-indexed)
366    pub start_line: usize,
367    /// Line number where the list ends (1-indexed)
368    pub end_line: usize,
369    /// Whether it's ordered or unordered
370    pub is_ordered: bool,
371    /// The consistent marker for unordered lists (if any)
372    pub marker: Option<String>,
373    /// Blockquote prefix for this list (empty if not in blockquote)
374    pub blockquote_prefix: String,
375    /// Lines that are list items within this block
376    pub item_lines: Vec<usize>,
377    /// Nesting level (0 for top-level lists)
378    pub nesting_level: usize,
379    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
380    pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385/// Character frequency data for fast content analysis
386#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388    /// Count of # characters (headings)
389    pub hash_count: usize,
390    /// Count of * characters (emphasis, lists, horizontal rules)
391    pub asterisk_count: usize,
392    /// Count of _ characters (emphasis, horizontal rules)
393    pub underscore_count: usize,
394    /// Count of - characters (lists, horizontal rules, setext headings)
395    pub hyphen_count: usize,
396    /// Count of + characters (lists)
397    pub plus_count: usize,
398    /// Count of > characters (blockquotes)
399    pub gt_count: usize,
400    /// Count of | characters (tables)
401    pub pipe_count: usize,
402    /// Count of [ characters (links, images)
403    pub bracket_count: usize,
404    /// Count of ` characters (code spans, code blocks)
405    pub backtick_count: usize,
406    /// Count of < characters (HTML tags, autolinks)
407    pub lt_count: usize,
408    /// Count of ! characters (images)
409    pub exclamation_count: usize,
410    /// Count of newline characters
411    pub newline_count: usize,
412}
413
414/// Pre-parsed HTML tag information
415#[derive(Debug, Clone)]
416pub struct HtmlTag {
417    /// Line number (1-indexed)
418    pub line: usize,
419    /// Start column (0-indexed) in the line
420    pub start_col: usize,
421    /// End column (0-indexed) in the line
422    pub end_col: usize,
423    /// Byte offset in document
424    pub byte_offset: usize,
425    /// End byte offset in document
426    pub byte_end: usize,
427    /// Tag name (e.g., "div", "img", "br")
428    pub tag_name: String,
429    /// Whether it's a closing tag (`</tag>`)
430    pub is_closing: bool,
431    /// Whether it's self-closing (`<tag />`)
432    pub is_self_closing: bool,
433    /// Raw tag content
434    pub raw_content: String,
435}
436
437/// Pre-parsed emphasis span information
438#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440    /// Line number (1-indexed)
441    pub line: usize,
442    /// Start column (0-indexed) in the line
443    pub start_col: usize,
444    /// End column (0-indexed) in the line
445    pub end_col: usize,
446    /// Byte offset in document
447    pub byte_offset: usize,
448    /// End byte offset in document
449    pub byte_end: usize,
450    /// Type of emphasis ('*' or '_')
451    pub marker: char,
452    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
453    pub marker_count: usize,
454    /// Content inside the emphasis
455    pub content: String,
456}
457
458/// Pre-parsed table row information
459#[derive(Debug, Clone)]
460pub struct TableRow {
461    /// Line number (1-indexed)
462    pub line: usize,
463    /// Whether this is a separator row (contains only |, -, :, and spaces)
464    pub is_separator: bool,
465    /// Number of columns (pipe-separated cells)
466    pub column_count: usize,
467    /// Alignment info from separator row
468    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
469}
470
471/// Pre-parsed bare URL information (not in links)
472#[derive(Debug, Clone)]
473pub struct BareUrl {
474    /// Line number (1-indexed)
475    pub line: usize,
476    /// Start column (0-indexed) in the line
477    pub start_col: usize,
478    /// End column (0-indexed) in the line
479    pub end_col: usize,
480    /// Byte offset in document
481    pub byte_offset: usize,
482    /// End byte offset in document
483    pub byte_end: usize,
484    /// The URL string
485    pub url: String,
486    /// Type of URL ("http", "https", "ftp", "email")
487    pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491    pub content: &'a str,
492    pub line_offsets: Vec<usize>,
493    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
494    pub lines: Vec<LineInfo>,             // Pre-computed line information
495    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
496    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
497    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
498    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
499    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
500    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
501    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
502    pub char_frequency: CharFrequency,    // Character frequency analysis
503    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
504    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
505    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
506    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
507    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
508    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
509    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
510    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
511    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
512    pub flavor: MarkdownFlavor,           // Markdown flavor being used
513    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
514}
515
516/// Detailed blockquote parse result with all components
517struct BlockquoteComponents<'a> {
518    indent: &'a str,
519    markers: &'a str,
520    spaces_after: &'a str,
521    content: &'a str,
522}
523
524/// Parse blockquote prefix with detailed components using manual parsing
525#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527    let bytes = line.as_bytes();
528    let mut pos = 0;
529
530    // Parse leading whitespace (indent)
531    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532        pos += 1;
533    }
534    let indent_end = pos;
535
536    // Must have at least one '>' marker
537    if pos >= bytes.len() || bytes[pos] != b'>' {
538        return None;
539    }
540
541    // Parse '>' markers
542    while pos < bytes.len() && bytes[pos] == b'>' {
543        pos += 1;
544    }
545    let markers_end = pos;
546
547    // Parse spaces after markers
548    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549        pos += 1;
550    }
551    let spaces_end = pos;
552
553    Some(BlockquoteComponents {
554        indent: &line[0..indent_end],
555        markers: &line[indent_end..markers_end],
556        spaces_after: &line[markers_end..spaces_end],
557        content: &line[spaces_end..],
558    })
559}
560
561impl<'a> LintContext<'a> {
562    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563        #[cfg(not(target_arch = "wasm32"))]
564        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565        #[cfg(target_arch = "wasm32")]
566        let profile = false;
567
568        let line_offsets = profile_section!("Line offsets", profile, {
569            let mut offsets = vec![0];
570            for (i, c) in content.char_indices() {
571                if c == '\n' {
572                    offsets.push(i + 1);
573                }
574            }
575            offsets
576        });
577
578        // Detect code blocks once and cache them
579        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581        // Pre-compute HTML comment ranges ONCE for all operations
582        let html_comment_ranges = profile_section!(
583            "HTML comment ranges",
584            profile,
585            crate::utils::skip_context::compute_html_comment_ranges(content)
586        );
587
588        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
589        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590            if flavor == MarkdownFlavor::MkDocs {
591                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592            } else {
593                Vec::new()
594            }
595        });
596
597        // Pre-compute line information (without headings/blockquotes yet)
598        let mut lines = profile_section!(
599            "Basic line info",
600            profile,
601            Self::compute_basic_line_info(
602                content,
603                &line_offsets,
604                &code_blocks,
605                flavor,
606                &html_comment_ranges,
607                &autodoc_ranges,
608            )
609        );
610
611        // Detect HTML blocks BEFORE heading detection
612        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614        // Detect ESM import/export blocks in MDX files BEFORE heading detection
615        profile_section!(
616            "ESM blocks",
617            profile,
618            Self::detect_esm_blocks(content, &mut lines, flavor)
619        );
620
621        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
622        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624        // Now detect headings and blockquotes
625        profile_section!(
626            "Headings & blockquotes",
627            profile,
628            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629        );
630
631        // Parse code spans early so we can exclude them from link/image parsing
632        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634        // Mark lines that are continuations of multi-line code spans
635        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
636        for span in &code_spans {
637            if span.end_line > span.line {
638                // Mark lines after the first line as continuations
639                for line_num in (span.line + 1)..=span.end_line {
640                    if let Some(line_info) = lines.get_mut(line_num - 1) {
641                        line_info.in_code_span_continuation = true;
642                    }
643                }
644            }
645        }
646
647        // Parse links, images, references, and list blocks
648        let (links, broken_links, footnote_refs) = profile_section!(
649            "Links",
650            profile,
651            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652        );
653
654        let images = profile_section!(
655            "Images",
656            profile,
657            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658        );
659
660        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664        // Compute character frequency for fast content analysis
665        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
668        let table_blocks = profile_section!(
669            "Table blocks",
670            profile,
671            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672                content,
673                &code_blocks,
674                &code_spans,
675                &html_comment_ranges,
676            )
677        );
678
679        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
680        let line_index = profile_section!(
681            "Line index",
682            profile,
683            crate::utils::range_utils::LineIndex::new(content)
684        );
685
686        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
687        let jinja_ranges = profile_section!(
688            "Jinja ranges",
689            profile,
690            crate::utils::jinja_utils::find_jinja_ranges(content)
691        );
692
693        Self {
694            content,
695            line_offsets,
696            code_blocks,
697            lines,
698            links,
699            images,
700            broken_links,
701            footnote_refs,
702            reference_defs,
703            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704            list_blocks,
705            char_frequency,
706            html_tags_cache: OnceLock::new(),
707            emphasis_spans_cache: OnceLock::new(),
708            table_rows_cache: OnceLock::new(),
709            bare_urls_cache: OnceLock::new(),
710            has_mixed_list_nesting_cache: OnceLock::new(),
711            html_comment_ranges,
712            table_blocks,
713            line_index,
714            jinja_ranges,
715            flavor,
716            source_file,
717        }
718    }
719
720    /// Get code spans - computed lazily on first access
721    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722        Arc::clone(
723            self.code_spans_cache
724                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725        )
726    }
727
728    /// Get HTML comment ranges - pre-computed during LintContext construction
729    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730        &self.html_comment_ranges
731    }
732
733    /// Get HTML tags - computed lazily on first access
734    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735        Arc::clone(self.html_tags_cache.get_or_init(|| {
736            Arc::new(Self::parse_html_tags(
737                self.content,
738                &self.lines,
739                &self.code_blocks,
740                self.flavor,
741            ))
742        }))
743    }
744
745    /// Get emphasis spans - computed lazily on first access
746    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747        Arc::clone(
748            self.emphasis_spans_cache
749                .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750        )
751    }
752
753    /// Get table rows - computed lazily on first access
754    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755        Arc::clone(
756            self.table_rows_cache
757                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758        )
759    }
760
761    /// Get bare URLs - computed lazily on first access
762    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763        Arc::clone(
764            self.bare_urls_cache
765                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766        )
767    }
768
769    /// Check if document has mixed ordered/unordered list nesting.
770    /// Result is cached after first computation (document-level invariant).
771    /// This is used by MD007 for smart style auto-detection.
772    pub fn has_mixed_list_nesting(&self) -> bool {
773        *self
774            .has_mixed_list_nesting_cache
775            .get_or_init(|| self.compute_mixed_list_nesting())
776    }
777
778    /// Internal computation for mixed list nesting (only called once per LintContext).
779    fn compute_mixed_list_nesting(&self) -> bool {
780        // Track parent list items by their marker position and type
781        // Using marker_column instead of indent because it works correctly
782        // for blockquoted content where indent doesn't account for the prefix
783        // Stack stores: (marker_column, is_ordered)
784        let mut stack: Vec<(usize, bool)> = Vec::new();
785        let mut last_was_blank = false;
786
787        for line_info in &self.lines {
788            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
789            if line_info.in_code_block
790                || line_info.in_front_matter
791                || line_info.in_mkdocstrings
792                || line_info.in_html_comment
793                || line_info.in_esm_block
794            {
795                continue;
796            }
797
798            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
799            if line_info.is_blank {
800                last_was_blank = true;
801                continue;
802            }
803
804            if let Some(list_item) = &line_info.list_item {
805                // Normalize column 1 to column 0 (consistent with MD007 check function)
806                let current_pos = if list_item.marker_column == 1 {
807                    0
808                } else {
809                    list_item.marker_column
810                };
811
812                // If there was a blank line and this item is at root level, reset stack
813                if last_was_blank && current_pos == 0 {
814                    stack.clear();
815                }
816                last_was_blank = false;
817
818                // Pop items at same or greater position (they're siblings or deeper, not parents)
819                while let Some(&(pos, _)) = stack.last() {
820                    if pos >= current_pos {
821                        stack.pop();
822                    } else {
823                        break;
824                    }
825                }
826
827                // Check if immediate parent has different type - this is mixed nesting
828                if let Some(&(_, parent_is_ordered)) = stack.last()
829                    && parent_is_ordered != list_item.is_ordered
830                {
831                    return true; // Found mixed nesting - early exit
832                }
833
834                stack.push((current_pos, list_item.is_ordered));
835            } else {
836                // Non-list line (but not blank) - could be paragraph or other content
837                last_was_blank = false;
838            }
839        }
840
841        false
842    }
843
844    /// Map a byte offset to (line, column)
845    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846        match self.line_offsets.binary_search(&offset) {
847            Ok(line) => (line + 1, 1),
848            Err(line) => {
849                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850                (line, offset - line_start + 1)
851            }
852        }
853    }
854
855    /// Check if a position is within a code block or code span
856    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857        // Check code blocks first
858        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859            return true;
860        }
861
862        // Check inline code spans (lazy load if needed)
863        self.code_spans()
864            .iter()
865            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866    }
867
868    /// Get line information by line number (1-indexed)
869    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870        if line_num > 0 {
871            self.lines.get(line_num - 1)
872        } else {
873            None
874        }
875    }
876
877    /// Get byte offset for a line number (1-indexed)
878    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879        self.line_info(line_num).map(|info| info.byte_offset)
880    }
881
882    /// Get URL for a reference link/image by its ID
883    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884        let normalized_id = ref_id.to_lowercase();
885        self.reference_defs
886            .iter()
887            .find(|def| def.id == normalized_id)
888            .map(|def| def.url.as_str())
889    }
890
891    /// Check if a line is part of a list block
892    pub fn is_in_list_block(&self, line_num: usize) -> bool {
893        self.list_blocks
894            .iter()
895            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896    }
897
898    /// Get the list block containing a specific line
899    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900        self.list_blocks
901            .iter()
902            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903    }
904
905    // Compatibility methods for DocumentStructure migration
906
907    /// Check if a line is within a code block
908    pub fn is_in_code_block(&self, line_num: usize) -> bool {
909        if line_num == 0 || line_num > self.lines.len() {
910            return false;
911        }
912        self.lines[line_num - 1].in_code_block
913    }
914
915    /// Check if a line is within front matter
916    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917        if line_num == 0 || line_num > self.lines.len() {
918            return false;
919        }
920        self.lines[line_num - 1].in_front_matter
921    }
922
923    /// Check if a line is within an HTML block
924    pub fn is_in_html_block(&self, line_num: usize) -> bool {
925        if line_num == 0 || line_num > self.lines.len() {
926            return false;
927        }
928        self.lines[line_num - 1].in_html_block
929    }
930
931    /// Check if a line and column is within a code span
932    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933        if line_num == 0 || line_num > self.lines.len() {
934            return false;
935        }
936
937        // Use the code spans cache to check
938        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
939        // Convert col to 0-indexed for comparison
940        let col_0indexed = if col > 0 { col - 1 } else { 0 };
941        let code_spans = self.code_spans();
942        code_spans.iter().any(|span| {
943            // Check if line is within the span's line range
944            if line_num < span.line || line_num > span.end_line {
945                return false;
946            }
947
948            if span.line == span.end_line {
949                // Single-line span: check column bounds
950                col_0indexed >= span.start_col && col_0indexed < span.end_col
951            } else if line_num == span.line {
952                // First line of multi-line span: anything after start_col is in span
953                col_0indexed >= span.start_col
954            } else if line_num == span.end_line {
955                // Last line of multi-line span: anything before end_col is in span
956                col_0indexed < span.end_col
957            } else {
958                // Middle line of multi-line span: entire line is in span
959                true
960            }
961        })
962    }
963
964    /// Check if a byte offset is within a code span
965    #[inline]
966    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967        let code_spans = self.code_spans();
968        code_spans
969            .iter()
970            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971    }
972
973    /// Check if a byte position is within a reference definition
974    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
975    #[inline]
976    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977        self.reference_defs
978            .iter()
979            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980    }
981
982    /// Check if a byte position is within an HTML comment
983    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
984    /// where k is the number of HTML comments (typically very small)
985    #[inline]
986    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987        self.html_comment_ranges
988            .iter()
989            .any(|range| byte_pos >= range.start && byte_pos < range.end)
990    }
991
992    /// Check if a byte position is within an HTML tag (including multiline tags)
993    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
994    #[inline]
995    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996        self.html_tags()
997            .iter()
998            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999    }
1000
1001    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1002    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003        self.jinja_ranges
1004            .iter()
1005            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006    }
1007
1008    /// Check if a byte position is within a link reference definition title
1009    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010        self.reference_defs.iter().any(|def| {
1011            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012                byte_pos >= start && byte_pos < end
1013            } else {
1014                false
1015            }
1016        })
1017    }
1018
1019    /// Check if content has any instances of a specific character (fast)
1020    pub fn has_char(&self, ch: char) -> bool {
1021        match ch {
1022            '#' => self.char_frequency.hash_count > 0,
1023            '*' => self.char_frequency.asterisk_count > 0,
1024            '_' => self.char_frequency.underscore_count > 0,
1025            '-' => self.char_frequency.hyphen_count > 0,
1026            '+' => self.char_frequency.plus_count > 0,
1027            '>' => self.char_frequency.gt_count > 0,
1028            '|' => self.char_frequency.pipe_count > 0,
1029            '[' => self.char_frequency.bracket_count > 0,
1030            '`' => self.char_frequency.backtick_count > 0,
1031            '<' => self.char_frequency.lt_count > 0,
1032            '!' => self.char_frequency.exclamation_count > 0,
1033            '\n' => self.char_frequency.newline_count > 0,
1034            _ => self.content.contains(ch), // Fallback for other characters
1035        }
1036    }
1037
1038    /// Get count of a specific character (fast)
1039    pub fn char_count(&self, ch: char) -> usize {
1040        match ch {
1041            '#' => self.char_frequency.hash_count,
1042            '*' => self.char_frequency.asterisk_count,
1043            '_' => self.char_frequency.underscore_count,
1044            '-' => self.char_frequency.hyphen_count,
1045            '+' => self.char_frequency.plus_count,
1046            '>' => self.char_frequency.gt_count,
1047            '|' => self.char_frequency.pipe_count,
1048            '[' => self.char_frequency.bracket_count,
1049            '`' => self.char_frequency.backtick_count,
1050            '<' => self.char_frequency.lt_count,
1051            '!' => self.char_frequency.exclamation_count,
1052            '\n' => self.char_frequency.newline_count,
1053            _ => self.content.matches(ch).count(), // Fallback for other characters
1054        }
1055    }
1056
1057    /// Check if content likely contains headings (fast)
1058    pub fn likely_has_headings(&self) -> bool {
1059        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1060    }
1061
1062    /// Check if content likely contains lists (fast)
1063    pub fn likely_has_lists(&self) -> bool {
1064        self.char_frequency.asterisk_count > 0
1065            || self.char_frequency.hyphen_count > 0
1066            || self.char_frequency.plus_count > 0
1067    }
1068
1069    /// Check if content likely contains emphasis (fast)
1070    pub fn likely_has_emphasis(&self) -> bool {
1071        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072    }
1073
1074    /// Check if content likely contains tables (fast)
1075    pub fn likely_has_tables(&self) -> bool {
1076        self.char_frequency.pipe_count > 2
1077    }
1078
1079    /// Check if content likely contains blockquotes (fast)
1080    pub fn likely_has_blockquotes(&self) -> bool {
1081        self.char_frequency.gt_count > 0
1082    }
1083
1084    /// Check if content likely contains code (fast)
1085    pub fn likely_has_code(&self) -> bool {
1086        self.char_frequency.backtick_count > 0
1087    }
1088
1089    /// Check if content likely contains links or images (fast)
1090    pub fn likely_has_links_or_images(&self) -> bool {
1091        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092    }
1093
1094    /// Check if content likely contains HTML (fast)
1095    pub fn likely_has_html(&self) -> bool {
1096        self.char_frequency.lt_count > 0
1097    }
1098
1099    /// Get HTML tags on a specific line
1100    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101        self.html_tags()
1102            .iter()
1103            .filter(|tag| tag.line == line_num)
1104            .cloned()
1105            .collect()
1106    }
1107
1108    /// Get emphasis spans on a specific line
1109    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110        self.emphasis_spans()
1111            .iter()
1112            .filter(|span| span.line == line_num)
1113            .cloned()
1114            .collect()
1115    }
1116
1117    /// Get table rows on a specific line
1118    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119        self.table_rows()
1120            .iter()
1121            .filter(|row| row.line == line_num)
1122            .cloned()
1123            .collect()
1124    }
1125
1126    /// Get bare URLs on a specific line
1127    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128        self.bare_urls()
1129            .iter()
1130            .filter(|url| url.line == line_num)
1131            .cloned()
1132            .collect()
1133    }
1134
1135    /// Find the line index for a given byte offset using binary search.
1136    /// Returns (line_index, line_number, column) where:
1137    /// - line_index is the 0-based index in the lines array
1138    /// - line_number is the 1-based line number
1139    /// - column is the byte offset within that line
1140    #[inline]
1141    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142        // Binary search to find the line containing this byte offset
1143        let idx = match lines.binary_search_by(|line| {
1144            if byte_offset < line.byte_offset {
1145                std::cmp::Ordering::Greater
1146            } else if byte_offset > line.byte_offset + line.byte_len {
1147                std::cmp::Ordering::Less
1148            } else {
1149                std::cmp::Ordering::Equal
1150            }
1151        }) {
1152            Ok(idx) => idx,
1153            Err(idx) => idx.saturating_sub(1),
1154        };
1155
1156        let line = &lines[idx];
1157        let line_num = idx + 1;
1158        let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160        (idx, line_num, col)
1161    }
1162
1163    /// Check if a byte offset is within a code span using binary search
1164    #[inline]
1165    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166        // Since spans are sorted by byte_offset, use partition_point for binary search
1167        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169        // Check the span that starts at or before our offset
1170        if idx > 0 {
1171            let span = &code_spans[idx - 1];
1172            if offset >= span.byte_offset && offset < span.byte_end {
1173                return true;
1174            }
1175        }
1176
1177        false
1178    }
1179
1180    /// Collect byte ranges of all links using pulldown-cmark
1181    /// This is used to skip heading detection for lines that fall within link syntax
1182    /// (e.g., multiline links like `[text](url\n#fragment)`)
1183    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186        let mut link_ranges = Vec::new();
1187        let mut options = Options::empty();
1188        options.insert(Options::ENABLE_WIKILINKS);
1189        options.insert(Options::ENABLE_FOOTNOTES);
1190
1191        let parser = Parser::new_ext(content, options).into_offset_iter();
1192        let mut link_stack: Vec<usize> = Vec::new();
1193
1194        for (event, range) in parser {
1195            match event {
1196                Event::Start(Tag::Link { .. }) => {
1197                    link_stack.push(range.start);
1198                }
1199                Event::End(TagEnd::Link) => {
1200                    if let Some(start_pos) = link_stack.pop() {
1201                        link_ranges.push((start_pos, range.end));
1202                    }
1203                }
1204                _ => {}
1205            }
1206        }
1207
1208        link_ranges
1209    }
1210
1211    /// Parse all links in the content
1212    fn parse_links(
1213        content: &'a str,
1214        lines: &[LineInfo],
1215        code_blocks: &[(usize, usize)],
1216        code_spans: &[CodeSpan],
1217        flavor: MarkdownFlavor,
1218        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221        use std::collections::HashSet;
1222
1223        let mut links = Vec::with_capacity(content.len() / 500);
1224        let mut broken_links = Vec::new();
1225        let mut footnote_refs = Vec::new();
1226
1227        // Track byte positions of links found by pulldown-cmark
1228        let mut found_positions = HashSet::new();
1229
1230        // Use pulldown-cmark's streaming parser with BrokenLink callback
1231        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1232        // This automatically handles:
1233        // - Escaped links (won't generate events)
1234        // - Links in code blocks/spans (won't generate Link events)
1235        // - Images (generates Tag::Image instead)
1236        // - Reference resolution (dest_url is already resolved!)
1237        // - Broken references (callback is invoked)
1238        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1239        let mut options = Options::empty();
1240        options.insert(Options::ENABLE_WIKILINKS);
1241        options.insert(Options::ENABLE_FOOTNOTES);
1242
1243        let parser = Parser::new_with_broken_link_callback(
1244            content,
1245            options,
1246            Some(|link: BrokenLink<'_>| {
1247                broken_links.push(BrokenLinkInfo {
1248                    reference: link.reference.to_string(),
1249                    span: link.span.clone(),
1250                });
1251                None
1252            }),
1253        )
1254        .into_offset_iter();
1255
1256        let mut link_stack: Vec<(
1257            usize,
1258            usize,
1259            pulldown_cmark::CowStr<'a>,
1260            LinkType,
1261            pulldown_cmark::CowStr<'a>,
1262        )> = Vec::new();
1263        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1264
1265        for (event, range) in parser {
1266            match event {
1267                Event::Start(Tag::Link {
1268                    link_type,
1269                    dest_url,
1270                    id,
1271                    ..
1272                }) => {
1273                    // Link start - record position, URL, and reference ID
1274                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1275                    text_chunks.clear();
1276                }
1277                Event::Text(text) if !link_stack.is_empty() => {
1278                    // Track text content with its byte range
1279                    text_chunks.push((text.to_string(), range.start, range.end));
1280                }
1281                Event::Code(code) if !link_stack.is_empty() => {
1282                    // Include inline code in link text (with backticks)
1283                    let code_text = format!("`{code}`");
1284                    text_chunks.push((code_text, range.start, range.end));
1285                }
1286                Event::End(TagEnd::Link) => {
1287                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288                        // Skip if in HTML comment
1289                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290                            text_chunks.clear();
1291                            continue;
1292                        }
1293
1294                        // Find line and column information
1295                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297                        // Skip if this link is on a MkDocs snippet line
1298                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299                            text_chunks.clear();
1300                            continue;
1301                        }
1302
1303                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305                        let is_reference = matches!(
1306                            link_type,
1307                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308                        );
1309
1310                        // Extract link text directly from source bytes to preserve escaping
1311                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1312                        let link_text = if start_pos < content.len() {
1313                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315                            // Find MATCHING ] by tracking bracket depth for nested brackets
1316                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1317                            // Brackets inside code spans (between backticks) should be ignored
1318                            let mut close_pos = None;
1319                            let mut depth = 0;
1320                            let mut in_code_span = false;
1321
1322                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323                                // Count preceding backslashes
1324                                let mut backslash_count = 0;
1325                                let mut j = i;
1326                                while j > 0 && link_bytes[j - 1] == b'\\' {
1327                                    backslash_count += 1;
1328                                    j -= 1;
1329                                }
1330                                let is_escaped = backslash_count % 2 != 0;
1331
1332                                // Track code spans - backticks toggle in/out of code
1333                                if byte == b'`' && !is_escaped {
1334                                    in_code_span = !in_code_span;
1335                                }
1336
1337                                // Only count brackets when NOT in a code span
1338                                if !is_escaped && !in_code_span {
1339                                    if byte == b'[' {
1340                                        depth += 1;
1341                                    } else if byte == b']' {
1342                                        if depth == 0 {
1343                                            // Found the matching closing bracket
1344                                            close_pos = Some(i);
1345                                            break;
1346                                        } else {
1347                                            depth -= 1;
1348                                        }
1349                                    }
1350                                }
1351                            }
1352
1353                            if let Some(pos) = close_pos {
1354                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355                            } else {
1356                                Cow::Borrowed("")
1357                            }
1358                        } else {
1359                            Cow::Borrowed("")
1360                        };
1361
1362                        // For reference links, use the actual reference ID from pulldown-cmark
1363                        let reference_id = if is_reference && !ref_id.is_empty() {
1364                            Some(Cow::Owned(ref_id.to_lowercase()))
1365                        } else if is_reference {
1366                            // For collapsed/shortcut references without explicit ID, use the link text
1367                            Some(Cow::Owned(link_text.to_lowercase()))
1368                        } else {
1369                            None
1370                        };
1371
1372                        // Track this position as found
1373                        found_positions.insert(start_pos);
1374
1375                        links.push(ParsedLink {
1376                            line: line_num,
1377                            start_col: col_start,
1378                            end_col: col_end,
1379                            byte_offset: start_pos,
1380                            byte_end: range.end,
1381                            text: link_text,
1382                            url: Cow::Owned(url.to_string()),
1383                            is_reference,
1384                            reference_id,
1385                            link_type,
1386                        });
1387
1388                        text_chunks.clear();
1389                    }
1390                }
1391                Event::FootnoteReference(footnote_id) => {
1392                    // Capture footnote references like [^1], [^note]
1393                    // Skip if in HTML comment
1394                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395                        continue;
1396                    }
1397
1398                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399                    footnote_refs.push(FootnoteRef {
1400                        id: footnote_id.to_string(),
1401                        line: line_num,
1402                        byte_offset: range.start,
1403                        byte_end: range.end,
1404                    });
1405                }
1406                _ => {}
1407            }
1408        }
1409
1410        // Also find undefined references using regex
1411        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1412        // because the reference is undefined
1413        for cap in LINK_PATTERN.captures_iter(content) {
1414            let full_match = cap.get(0).unwrap();
1415            let match_start = full_match.start();
1416            let match_end = full_match.end();
1417
1418            // Skip if this was already found by pulldown-cmark (it's a valid link)
1419            if found_positions.contains(&match_start) {
1420                continue;
1421            }
1422
1423            // Skip if escaped
1424            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425                continue;
1426            }
1427
1428            // Skip if it's an image
1429            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430                continue;
1431            }
1432
1433            // Skip if in code block
1434            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435                continue;
1436            }
1437
1438            // Skip if in code span
1439            if Self::is_offset_in_code_span(code_spans, match_start) {
1440                continue;
1441            }
1442
1443            // Skip if in HTML comment
1444            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445                continue;
1446            }
1447
1448            // Find line and column information
1449            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451            // Skip if this link is on a MkDocs snippet line
1452            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453                continue;
1454            }
1455
1456            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458            let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460            // Only process reference links (group 6)
1461            if let Some(ref_id) = cap.get(6) {
1462                let ref_id_str = ref_id.as_str();
1463                let normalized_ref = if ref_id_str.is_empty() {
1464                    Cow::Owned(text.to_lowercase()) // Implicit reference
1465                } else {
1466                    Cow::Owned(ref_id_str.to_lowercase())
1467                };
1468
1469                // This is an undefined reference (pulldown-cmark didn't parse it)
1470                links.push(ParsedLink {
1471                    line: line_num,
1472                    start_col: col_start,
1473                    end_col: col_end,
1474                    byte_offset: match_start,
1475                    byte_end: match_end,
1476                    text: Cow::Borrowed(text),
1477                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1478                    is_reference: true,
1479                    reference_id: Some(normalized_ref),
1480                    link_type: LinkType::Reference, // Undefined references are reference-style
1481                });
1482            }
1483        }
1484
1485        (links, broken_links, footnote_refs)
1486    }
1487
1488    /// Parse all images in the content
1489    fn parse_images(
1490        content: &'a str,
1491        lines: &[LineInfo],
1492        code_blocks: &[(usize, usize)],
1493        code_spans: &[CodeSpan],
1494        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495    ) -> Vec<ParsedImage<'a>> {
1496        use crate::utils::skip_context::is_in_html_comment_ranges;
1497        use std::collections::HashSet;
1498
1499        // Pre-size based on a heuristic: images are less common than links
1500        let mut images = Vec::with_capacity(content.len() / 1000);
1501        let mut found_positions = HashSet::new();
1502
1503        // Use pulldown-cmark for parsing - more accurate and faster
1504        let parser = Parser::new(content).into_offset_iter();
1505        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506            Vec::new();
1507        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1508
1509        for (event, range) in parser {
1510            match event {
1511                Event::Start(Tag::Image {
1512                    link_type,
1513                    dest_url,
1514                    id,
1515                    ..
1516                }) => {
1517                    image_stack.push((range.start, dest_url, link_type, id));
1518                    text_chunks.clear();
1519                }
1520                Event::Text(text) if !image_stack.is_empty() => {
1521                    text_chunks.push((text.to_string(), range.start, range.end));
1522                }
1523                Event::Code(code) if !image_stack.is_empty() => {
1524                    let code_text = format!("`{code}`");
1525                    text_chunks.push((code_text, range.start, range.end));
1526                }
1527                Event::End(TagEnd::Image) => {
1528                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529                        // Skip if in code block
1530                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531                            continue;
1532                        }
1533
1534                        // Skip if in code span
1535                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1536                            continue;
1537                        }
1538
1539                        // Skip if in HTML comment
1540                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541                            continue;
1542                        }
1543
1544                        // Find line and column using binary search
1545                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548                        let is_reference = matches!(
1549                            link_type,
1550                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551                        );
1552
1553                        // Extract alt text directly from source bytes to preserve escaping
1554                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1555                        let alt_text = if start_pos < content.len() {
1556                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558                            // Find MATCHING ] by tracking bracket depth for nested brackets
1559                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1560                            let mut close_pos = None;
1561                            let mut depth = 0;
1562
1563                            if image_bytes.len() > 2 {
1564                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565                                    // Count preceding backslashes
1566                                    let mut backslash_count = 0;
1567                                    let mut j = i;
1568                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1569                                        backslash_count += 1;
1570                                        j -= 1;
1571                                    }
1572                                    let is_escaped = backslash_count % 2 != 0;
1573
1574                                    if !is_escaped {
1575                                        if byte == b'[' {
1576                                            depth += 1;
1577                                        } else if byte == b']' {
1578                                            if depth == 0 {
1579                                                // Found the matching closing bracket
1580                                                close_pos = Some(i);
1581                                                break;
1582                                            } else {
1583                                                depth -= 1;
1584                                            }
1585                                        }
1586                                    }
1587                                }
1588                            }
1589
1590                            if let Some(pos) = close_pos {
1591                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592                            } else {
1593                                Cow::Borrowed("")
1594                            }
1595                        } else {
1596                            Cow::Borrowed("")
1597                        };
1598
1599                        let reference_id = if is_reference && !ref_id.is_empty() {
1600                            Some(Cow::Owned(ref_id.to_lowercase()))
1601                        } else if is_reference {
1602                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1603                        } else {
1604                            None
1605                        };
1606
1607                        found_positions.insert(start_pos);
1608                        images.push(ParsedImage {
1609                            line: line_num,
1610                            start_col: col_start,
1611                            end_col: col_end,
1612                            byte_offset: start_pos,
1613                            byte_end: range.end,
1614                            alt_text,
1615                            url: Cow::Owned(url.to_string()),
1616                            is_reference,
1617                            reference_id,
1618                            link_type,
1619                        });
1620                    }
1621                }
1622                _ => {}
1623            }
1624        }
1625
1626        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1627        for cap in IMAGE_PATTERN.captures_iter(content) {
1628            let full_match = cap.get(0).unwrap();
1629            let match_start = full_match.start();
1630            let match_end = full_match.end();
1631
1632            // Skip if already found by pulldown-cmark
1633            if found_positions.contains(&match_start) {
1634                continue;
1635            }
1636
1637            // Skip if the ! is escaped
1638            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639                continue;
1640            }
1641
1642            // Skip if in code block, code span, or HTML comment
1643            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644                || Self::is_offset_in_code_span(code_spans, match_start)
1645                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646            {
1647                continue;
1648            }
1649
1650            // Only process reference images (undefined references not found by pulldown-cmark)
1651            if let Some(ref_id) = cap.get(6) {
1652                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655                let ref_id_str = ref_id.as_str();
1656                let normalized_ref = if ref_id_str.is_empty() {
1657                    Cow::Owned(alt_text.to_lowercase())
1658                } else {
1659                    Cow::Owned(ref_id_str.to_lowercase())
1660                };
1661
1662                images.push(ParsedImage {
1663                    line: line_num,
1664                    start_col: col_start,
1665                    end_col: col_end,
1666                    byte_offset: match_start,
1667                    byte_end: match_end,
1668                    alt_text: Cow::Borrowed(alt_text),
1669                    url: Cow::Borrowed(""),
1670                    is_reference: true,
1671                    reference_id: Some(normalized_ref),
1672                    link_type: LinkType::Reference, // Undefined references are reference-style
1673                });
1674            }
1675        }
1676
1677        images
1678    }
1679
1680    /// Parse reference definitions
1681    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682        // Pre-size based on lines count as reference definitions are line-based
1683        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1684
1685        for (line_idx, line_info) in lines.iter().enumerate() {
1686            // Skip lines in code blocks
1687            if line_info.in_code_block {
1688                continue;
1689            }
1690
1691            let line = line_info.content(content);
1692            let line_num = line_idx + 1;
1693
1694            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695                let id = cap.get(1).unwrap().as_str().to_lowercase();
1696                let url = cap.get(2).unwrap().as_str().to_string();
1697                let title_match = cap.get(3).or_else(|| cap.get(4));
1698                let title = title_match.map(|m| m.as_str().to_string());
1699
1700                // Calculate byte positions
1701                // The match starts at the beginning of the line (0) and extends to the end
1702                let match_obj = cap.get(0).unwrap();
1703                let byte_offset = line_info.byte_offset + match_obj.start();
1704                let byte_end = line_info.byte_offset + match_obj.end();
1705
1706                // Calculate title byte positions (includes the quote character before content)
1707                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708                    // The match is the content inside quotes, so we include the quote before
1709                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1710                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1711                    (Some(start), Some(end))
1712                } else {
1713                    (None, None)
1714                };
1715
1716                refs.push(ReferenceDef {
1717                    line: line_num,
1718                    id,
1719                    url,
1720                    title,
1721                    byte_offset,
1722                    byte_end,
1723                    title_byte_start,
1724                    title_byte_end,
1725                });
1726            }
1727        }
1728
1729        refs
1730    }
1731
1732    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1733    /// Handles nested blockquotes like `> > > content`
1734    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1735    #[inline]
1736    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737        let trimmed_start = line.trim_start();
1738        if !trimmed_start.starts_with('>') {
1739            return None;
1740        }
1741
1742        // Track total prefix length to handle nested blockquotes
1743        let mut remaining = line;
1744        let mut total_prefix_len = 0;
1745
1746        loop {
1747            let trimmed = remaining.trim_start();
1748            if !trimmed.starts_with('>') {
1749                break;
1750            }
1751
1752            // Add leading whitespace + '>' to prefix
1753            let leading_ws_len = remaining.len() - trimmed.len();
1754            total_prefix_len += leading_ws_len + 1;
1755
1756            let after_gt = &trimmed[1..];
1757
1758            // Handle optional whitespace after '>' (space or tab)
1759            if let Some(stripped) = after_gt.strip_prefix(' ') {
1760                total_prefix_len += 1;
1761                remaining = stripped;
1762            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763                total_prefix_len += 1;
1764                remaining = stripped;
1765            } else {
1766                remaining = after_gt;
1767            }
1768        }
1769
1770        Some((&line[..total_prefix_len], remaining))
1771    }
1772
1773    /// Fast unordered list parser - replaces regex for 5-10x speedup
1774    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1775    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1776    #[inline]
1777    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1778        let bytes = line.as_bytes();
1779        let mut i = 0;
1780
1781        // Skip leading whitespace
1782        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1783            i += 1;
1784        }
1785
1786        // Check for marker
1787        if i >= bytes.len() {
1788            return None;
1789        }
1790        let marker = bytes[i] as char;
1791        if marker != '-' && marker != '*' && marker != '+' {
1792            return None;
1793        }
1794        let marker_pos = i;
1795        i += 1;
1796
1797        // Collect spacing after marker (space or tab only)
1798        let spacing_start = i;
1799        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1800            i += 1;
1801        }
1802
1803        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1804    }
1805
1806    /// Fast ordered list parser - replaces regex for 5-10x speedup
1807    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
1808    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
1809    #[inline]
1810    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1811        let bytes = line.as_bytes();
1812        let mut i = 0;
1813
1814        // Skip leading whitespace
1815        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1816            i += 1;
1817        }
1818
1819        // Collect digits
1820        let number_start = i;
1821        while i < bytes.len() && bytes[i].is_ascii_digit() {
1822            i += 1;
1823        }
1824        if i == number_start {
1825            return None; // No digits found
1826        }
1827
1828        // Check for delimiter
1829        if i >= bytes.len() {
1830            return None;
1831        }
1832        let delimiter = bytes[i] as char;
1833        if delimiter != '.' && delimiter != ')' {
1834            return None;
1835        }
1836        let delimiter_pos = i;
1837        i += 1;
1838
1839        // Collect spacing after delimiter (space or tab only)
1840        let spacing_start = i;
1841        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1842            i += 1;
1843        }
1844
1845        Some((
1846            &line[..number_start],
1847            &line[number_start..delimiter_pos],
1848            delimiter,
1849            &line[spacing_start..i],
1850            &line[i..],
1851        ))
1852    }
1853
1854    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
1855    /// Returns a Vec<bool> where index i indicates if line i is in a code block
1856    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1857        let num_lines = line_offsets.len();
1858        let mut in_code_block = vec![false; num_lines];
1859
1860        // For each code block, mark all lines within it
1861        for &(start, end) in code_blocks {
1862            // Ensure we're at valid UTF-8 boundaries
1863            let safe_start = if start > 0 && !content.is_char_boundary(start) {
1864                let mut boundary = start;
1865                while boundary > 0 && !content.is_char_boundary(boundary) {
1866                    boundary -= 1;
1867                }
1868                boundary
1869            } else {
1870                start
1871            };
1872
1873            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1874                let mut boundary = end;
1875                while boundary < content.len() && !content.is_char_boundary(boundary) {
1876                    boundary += 1;
1877                }
1878                boundary
1879            } else {
1880                end.min(content.len())
1881            };
1882
1883            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
1884            // That function now has proper list context awareness (see code_block_utils.rs)
1885            // and correctly distinguishes between:
1886            // - Fenced code blocks (``` or ~~~)
1887            // - Indented code blocks at document level (4 spaces + blank line before)
1888            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
1889            //
1890            // We no longer need to re-validate here. The original validation logic
1891            // was causing false positives by marking list continuation paragraphs as
1892            // code blocks when they have 4 spaces of indentation.
1893
1894            // Use binary search to find the first and last line indices
1895            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
1896            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
1897            //
1898            // Find the line that CONTAINS safe_start: the line with the largest
1899            // start offset that is <= safe_start. partition_point gives us the
1900            // first line that starts AFTER safe_start, so we subtract 1.
1901            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1902            let first_line = first_line_after.saturating_sub(1);
1903            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1904
1905            // Mark all lines in the range at once
1906            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1907                *flag = true;
1908            }
1909        }
1910
1911        in_code_block
1912    }
1913
1914    /// Pre-compute basic line information (without headings/blockquotes)
1915    fn compute_basic_line_info(
1916        content: &str,
1917        line_offsets: &[usize],
1918        code_blocks: &[(usize, usize)],
1919        flavor: MarkdownFlavor,
1920        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1921        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1922    ) -> Vec<LineInfo> {
1923        let content_lines: Vec<&str> = content.lines().collect();
1924        let mut lines = Vec::with_capacity(content_lines.len());
1925
1926        // Pre-compute which lines are in code blocks
1927        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1928
1929        // Detect front matter boundaries FIRST, before any other parsing
1930        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
1931        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1932
1933        for (i, line) in content_lines.iter().enumerate() {
1934            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1935            let indent = line.len() - line.trim_start().len();
1936            // Compute visual indent with proper CommonMark tab expansion
1937            let visual_indent = ElementCache::calculate_indentation_width_default(line);
1938
1939            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
1940            let blockquote_parse = Self::parse_blockquote_prefix(line);
1941
1942            // For blank detection, consider blockquote context
1943            let is_blank = if let Some((_, content)) = blockquote_parse {
1944                // In blockquote context, check if content after prefix is blank
1945                content.trim().is_empty()
1946            } else {
1947                line.trim().is_empty()
1948            };
1949
1950            // Use pre-computed map for O(1) lookup instead of O(m) iteration
1951            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1952
1953            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
1954            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1955                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1956            // Check if the ENTIRE line is within an HTML comment (not just the line start)
1957            // This ensures content after `-->` on the same line is not incorrectly skipped
1958            let line_end_offset = byte_offset + line.len();
1959            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1960                html_comment_ranges,
1961                byte_offset,
1962                line_end_offset,
1963            );
1964            let list_item = if !(in_code_block
1965                || is_blank
1966                || in_mkdocstrings
1967                || in_html_comment
1968                || (front_matter_end > 0 && i < front_matter_end))
1969            {
1970                // Strip blockquote prefix if present for list detection (reuse cached result)
1971                let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1972                    (content, prefix.len())
1973                } else {
1974                    (&**line, 0)
1975                };
1976
1977                if let Some((leading_spaces, marker, spacing, _content)) =
1978                    Self::parse_unordered_list(line_for_list_check)
1979                {
1980                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1981                    let content_column = marker_column + 1 + spacing.len();
1982
1983                    // According to CommonMark spec, unordered list items MUST have at least one space
1984                    // after the marker (-, *, or +). Without a space, it's not a list item.
1985                    // This also naturally handles cases like:
1986                    // - *emphasis* (not a list)
1987                    // - **bold** (not a list)
1988                    // - --- (horizontal rule, not a list)
1989                    if spacing.is_empty() {
1990                        None
1991                    } else {
1992                        Some(ListItemInfo {
1993                            marker: marker.to_string(),
1994                            is_ordered: false,
1995                            number: None,
1996                            marker_column,
1997                            content_column,
1998                        })
1999                    }
2000                } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2001                    Self::parse_ordered_list(line_for_list_check)
2002                {
2003                    let marker = format!("{number_str}{delimiter}");
2004                    let marker_column = blockquote_prefix_len + leading_spaces.len();
2005                    let content_column = marker_column + marker.len() + spacing.len();
2006
2007                    // According to CommonMark spec, ordered list items MUST have at least one space
2008                    // after the marker (period or parenthesis). Without a space, it's not a list item.
2009                    if spacing.is_empty() {
2010                        None
2011                    } else {
2012                        Some(ListItemInfo {
2013                            marker,
2014                            is_ordered: true,
2015                            number: number_str.parse().ok(),
2016                            marker_column,
2017                            content_column,
2018                        })
2019                    }
2020                } else {
2021                    None
2022                }
2023            } else {
2024                None
2025            };
2026
2027            // Detect horizontal rules (only outside code blocks and frontmatter)
2028            // Uses CommonMark-compliant check including leading indentation validation
2029            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2030            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2031
2032            lines.push(LineInfo {
2033                byte_offset,
2034                byte_len: line.len(),
2035                indent,
2036                visual_indent,
2037                is_blank,
2038                in_code_block,
2039                in_front_matter,
2040                in_html_block: false, // Will be populated after line creation
2041                in_html_comment,
2042                list_item,
2043                heading: None,    // Will be populated in second pass for Setext headings
2044                blockquote: None, // Will be populated after line creation
2045                in_mkdocstrings,
2046                in_esm_block: false, // Will be populated after line creation for MDX files
2047                in_code_span_continuation: false, // Will be populated after code spans are parsed
2048                is_horizontal_rule: is_hr,
2049            });
2050        }
2051
2052        lines
2053    }
2054
2055    /// Detect headings and blockquotes (called after HTML block detection)
2056    fn detect_headings_and_blockquotes(
2057        content: &str,
2058        lines: &mut [LineInfo],
2059        flavor: MarkdownFlavor,
2060        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2061        link_byte_ranges: &[(usize, usize)],
2062    ) {
2063        // Regex for heading detection
2064        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2065            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2066        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2067            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2068
2069        let content_lines: Vec<&str> = content.lines().collect();
2070
2071        // Detect front matter boundaries to skip those lines
2072        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2073
2074        // Detect headings (including Setext which needs look-ahead) and blockquotes
2075        for i in 0..lines.len() {
2076            if lines[i].in_code_block {
2077                continue;
2078            }
2079
2080            // Skip lines in front matter
2081            if front_matter_end > 0 && i < front_matter_end {
2082                continue;
2083            }
2084
2085            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2086            if lines[i].in_html_block {
2087                continue;
2088            }
2089
2090            let line = content_lines[i];
2091
2092            // Check for blockquotes (even on blank lines within blockquotes)
2093            if let Some(bq) = parse_blockquote_detailed(line) {
2094                let nesting_level = bq.markers.len(); // Each '>' is one level
2095                let marker_column = bq.indent.len();
2096
2097                // Build the prefix (indentation + markers + space)
2098                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2099
2100                // Check for various blockquote issues
2101                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2102                // Only flag multiple literal spaces, not tabs
2103                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
2104                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2105
2106                // Check if needs MD028 fix (empty blockquote line without proper spacing)
2107                // MD028 flags empty blockquote lines that don't have a single space after the marker
2108                // Lines like "> " or ">> " are already correct and don't need fixing
2109                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2110
2111                lines[i].blockquote = Some(BlockquoteInfo {
2112                    nesting_level,
2113                    indent: bq.indent.to_string(),
2114                    marker_column,
2115                    prefix,
2116                    content: bq.content.to_string(),
2117                    has_no_space_after_marker: has_no_space,
2118                    has_multiple_spaces_after_marker: has_multiple_spaces,
2119                    needs_md028_fix,
2120                });
2121            }
2122
2123            // Skip heading detection for blank lines
2124            if lines[i].is_blank {
2125                continue;
2126            }
2127
2128            // Check for ATX headings (but skip MkDocs snippet lines)
2129            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2130            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2131                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2132                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2133            } else {
2134                false
2135            };
2136
2137            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2138                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2139                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2140                    continue;
2141                }
2142                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2143                // This prevents false positives where `#fragment` is detected as a heading
2144                let line_offset = lines[i].byte_offset;
2145                if link_byte_ranges
2146                    .iter()
2147                    .any(|&(start, end)| line_offset > start && line_offset < end)
2148                {
2149                    continue;
2150                }
2151                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2152                let hashes = caps.get(2).map_or("", |m| m.as_str());
2153                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2154                let rest = caps.get(4).map_or("", |m| m.as_str());
2155
2156                let level = hashes.len() as u8;
2157                let marker_column = leading_spaces.len();
2158
2159                // Check for closing sequence, but handle custom IDs that might come after
2160                let (text, has_closing, closing_seq) = {
2161                    // First check if there's a custom ID at the end
2162                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2163                        // Check if this looks like a valid custom ID (ends with })
2164                        if rest[id_start..].trim_end().ends_with('}') {
2165                            // Split off the custom ID
2166                            (&rest[..id_start], &rest[id_start..])
2167                        } else {
2168                            (rest, "")
2169                        }
2170                    } else {
2171                        (rest, "")
2172                    };
2173
2174                    // Now look for closing hashes in the part before the custom ID
2175                    let trimmed_rest = rest_without_id.trim_end();
2176                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2177                        // Find the start of the hash sequence by walking backwards
2178                        // Use char_indices to get byte positions at char boundaries
2179                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2180
2181                        // Find which char index corresponds to last_hash_byte_pos
2182                        let last_hash_char_idx = char_positions
2183                            .iter()
2184                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2185
2186                        if let Some(mut char_idx) = last_hash_char_idx {
2187                            // Walk backwards to find start of hash sequence
2188                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2189                                char_idx -= 1;
2190                            }
2191
2192                            // Get the byte position of the start of hashes
2193                            let start_of_hashes = char_positions[char_idx].0;
2194
2195                            // Check if there's at least one space before the closing hashes
2196                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2197
2198                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2199                            let potential_closing = &trimmed_rest[start_of_hashes..];
2200                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2201
2202                            if is_all_hashes && has_space_before {
2203                                // This is a closing sequence
2204                                let closing_hashes = potential_closing.to_string();
2205                                // The text is everything before the closing hashes
2206                                // Don't include the custom ID here - it will be extracted later
2207                                let text_part = if !custom_id_part.is_empty() {
2208                                    // If we have a custom ID, append it back to get the full rest
2209                                    // This allows the extract_header_id function to handle it properly
2210                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2211                                } else {
2212                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2213                                };
2214                                (text_part, true, closing_hashes)
2215                            } else {
2216                                // Not a valid closing sequence, return the full content
2217                                (rest.to_string(), false, String::new())
2218                            }
2219                        } else {
2220                            // Couldn't find char boundary, return the full content
2221                            (rest.to_string(), false, String::new())
2222                        }
2223                    } else {
2224                        // No hashes found, return the full content
2225                        (rest.to_string(), false, String::new())
2226                    }
2227                };
2228
2229                let content_column = marker_column + hashes.len() + spaces_after.len();
2230
2231                // Extract custom header ID if present
2232                let raw_text = text.trim().to_string();
2233                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2234
2235                // If no custom ID was found on the header line, check the next line for standalone attr-list
2236                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2237                    let next_line = content_lines[i + 1];
2238                    if !lines[i + 1].in_code_block
2239                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2240                        && let Some(next_line_id) =
2241                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2242                    {
2243                        custom_id = Some(next_line_id);
2244                    }
2245                }
2246
2247                // ATX heading is "valid" for processing by heading rules if:
2248                // 1. Has space after # (CommonMark compliant): `# Heading`
2249                // 2. Is empty (just hashes): `#`
2250                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2251                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2252                //
2253                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2254                // - `#tag` - single # with lowercase (social hashtag)
2255                // - `#123` - single # with number (GitHub issue ref)
2256                let is_valid = !spaces_after.is_empty()
2257                    || rest.is_empty()
2258                    || level > 1
2259                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2260
2261                lines[i].heading = Some(HeadingInfo {
2262                    level,
2263                    style: HeadingStyle::ATX,
2264                    marker: hashes.to_string(),
2265                    marker_column,
2266                    content_column,
2267                    text: clean_text,
2268                    custom_id,
2269                    raw_text,
2270                    has_closing_sequence: has_closing,
2271                    closing_sequence: closing_seq,
2272                    is_valid,
2273                });
2274            }
2275            // Check for Setext headings (need to look at next line)
2276            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2277                let next_line = content_lines[i + 1];
2278                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2279                    // Skip if next line is front matter delimiter
2280                    if front_matter_end > 0 && i < front_matter_end {
2281                        continue;
2282                    }
2283
2284                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2285                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2286                    {
2287                        continue;
2288                    }
2289
2290                    let underline = next_line.trim();
2291
2292                    let level = if underline.starts_with('=') { 1 } else { 2 };
2293                    let style = if level == 1 {
2294                        HeadingStyle::Setext1
2295                    } else {
2296                        HeadingStyle::Setext2
2297                    };
2298
2299                    // Extract custom header ID if present
2300                    let raw_text = line.trim().to_string();
2301                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2302
2303                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2304                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2305                        let attr_line = content_lines[i + 2];
2306                        if !lines[i + 2].in_code_block
2307                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2308                            && let Some(attr_line_id) =
2309                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2310                        {
2311                            custom_id = Some(attr_line_id);
2312                        }
2313                    }
2314
2315                    lines[i].heading = Some(HeadingInfo {
2316                        level,
2317                        style,
2318                        marker: underline.to_string(),
2319                        marker_column: next_line.len() - next_line.trim_start().len(),
2320                        content_column: lines[i].indent,
2321                        text: clean_text,
2322                        custom_id,
2323                        raw_text,
2324                        has_closing_sequence: false,
2325                        closing_sequence: String::new(),
2326                        is_valid: true, // Setext headings are always valid
2327                    });
2328                }
2329            }
2330        }
2331    }
2332
2333    /// Detect HTML blocks in the content
2334    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2335        // HTML block elements that trigger block context
2336        // Includes HTML5 media, embedded content, and interactive elements
2337        const BLOCK_ELEMENTS: &[&str] = &[
2338            "address",
2339            "article",
2340            "aside",
2341            "audio",
2342            "blockquote",
2343            "canvas",
2344            "details",
2345            "dialog",
2346            "dd",
2347            "div",
2348            "dl",
2349            "dt",
2350            "embed",
2351            "fieldset",
2352            "figcaption",
2353            "figure",
2354            "footer",
2355            "form",
2356            "h1",
2357            "h2",
2358            "h3",
2359            "h4",
2360            "h5",
2361            "h6",
2362            "header",
2363            "hr",
2364            "iframe",
2365            "li",
2366            "main",
2367            "menu",
2368            "nav",
2369            "noscript",
2370            "object",
2371            "ol",
2372            "p",
2373            "picture",
2374            "pre",
2375            "script",
2376            "search",
2377            "section",
2378            "source",
2379            "style",
2380            "summary",
2381            "svg",
2382            "table",
2383            "tbody",
2384            "td",
2385            "template",
2386            "textarea",
2387            "tfoot",
2388            "th",
2389            "thead",
2390            "tr",
2391            "track",
2392            "ul",
2393            "video",
2394        ];
2395
2396        let mut i = 0;
2397        while i < lines.len() {
2398            // Skip if already in code block or front matter
2399            if lines[i].in_code_block || lines[i].in_front_matter {
2400                i += 1;
2401                continue;
2402            }
2403
2404            let trimmed = lines[i].content(content).trim_start();
2405
2406            // Check if line starts with an HTML tag
2407            if trimmed.starts_with('<') && trimmed.len() > 1 {
2408                // Extract tag name safely
2409                let after_bracket = &trimmed[1..];
2410                let is_closing = after_bracket.starts_with('/');
2411                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2412
2413                // Extract tag name (stop at space, >, /, or end of string)
2414                let tag_name = tag_start
2415                    .chars()
2416                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2417                    .collect::<String>()
2418                    .to_lowercase();
2419
2420                // Check if it's a block element
2421                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2422                    // Mark this line as in HTML block
2423                    lines[i].in_html_block = true;
2424
2425                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2426                    // This avoids complex nesting logic that might cause infinite loops
2427                    if !is_closing {
2428                        let closing_tag = format!("</{tag_name}>");
2429                        // style and script tags can contain blank lines (CSS/JS formatting)
2430                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2431                        let mut j = i + 1;
2432                        while j < lines.len() && j < i + 100 {
2433                            // Limit search to 100 lines
2434                            // Stop at blank lines (except for style/script tags)
2435                            if !allow_blank_lines && lines[j].is_blank {
2436                                break;
2437                            }
2438
2439                            lines[j].in_html_block = true;
2440
2441                            // Check if this line contains the closing tag
2442                            if lines[j].content(content).contains(&closing_tag) {
2443                                break;
2444                            }
2445                            j += 1;
2446                        }
2447                    }
2448                }
2449            }
2450
2451            i += 1;
2452        }
2453    }
2454
2455    /// Detect ESM import/export blocks in MDX files
2456    /// ESM blocks consist of contiguous import/export statements at the top of the file
2457    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2458        // Only process MDX files
2459        if !flavor.supports_esm_blocks() {
2460            return;
2461        }
2462
2463        let mut in_multiline_comment = false;
2464
2465        for line in lines.iter_mut() {
2466            // Skip blank lines and HTML comments
2467            if line.is_blank || line.in_html_comment {
2468                continue;
2469            }
2470
2471            let trimmed = line.content(content).trim_start();
2472
2473            // Handle continuation of multi-line JS comments
2474            if in_multiline_comment {
2475                if trimmed.contains("*/") {
2476                    in_multiline_comment = false;
2477                }
2478                continue;
2479            }
2480
2481            // Skip single-line JS comments (// and ///)
2482            if trimmed.starts_with("//") {
2483                continue;
2484            }
2485
2486            // Handle start of multi-line JS comment
2487            if trimmed.starts_with("/*") {
2488                if !trimmed.contains("*/") {
2489                    in_multiline_comment = true;
2490                }
2491                continue;
2492            }
2493
2494            // Check if line starts with import or export
2495            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2496                line.in_esm_block = true;
2497            } else {
2498                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2499                break;
2500            }
2501        }
2502    }
2503
2504    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2505    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2506        let mut code_spans = Vec::new();
2507
2508        // Quick check - if no backticks, no code spans
2509        if !content.contains('`') {
2510            return code_spans;
2511        }
2512
2513        // Use pulldown-cmark's streaming parser with byte offsets
2514        let parser = Parser::new(content).into_offset_iter();
2515
2516        for (event, range) in parser {
2517            if let Event::Code(_) = event {
2518                let start_pos = range.start;
2519                let end_pos = range.end;
2520
2521                // The range includes the backticks, extract the actual content
2522                let full_span = &content[start_pos..end_pos];
2523                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2524
2525                // Extract content between backticks, preserving spaces
2526                let content_start = start_pos + backtick_count;
2527                let content_end = end_pos - backtick_count;
2528                let span_content = if content_start < content_end {
2529                    content[content_start..content_end].to_string()
2530                } else {
2531                    String::new()
2532                };
2533
2534                // Use binary search to find line number - O(log n) instead of O(n)
2535                // Find the rightmost line whose byte_offset <= start_pos
2536                let line_idx = lines
2537                    .partition_point(|line| line.byte_offset <= start_pos)
2538                    .saturating_sub(1);
2539                let line_num = line_idx + 1;
2540                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2541
2542                // Find end column using binary search
2543                let end_line_idx = lines
2544                    .partition_point(|line| line.byte_offset <= end_pos)
2545                    .saturating_sub(1);
2546                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2547
2548                // Convert byte offsets to character positions for correct Unicode handling
2549                // This ensures consistency with warning.column which uses character positions
2550                let line_content = lines[line_idx].content(content);
2551                let col_start = if byte_col_start <= line_content.len() {
2552                    line_content[..byte_col_start].chars().count()
2553                } else {
2554                    line_content.chars().count()
2555                };
2556
2557                let end_line_content = lines[end_line_idx].content(content);
2558                let col_end = if byte_col_end <= end_line_content.len() {
2559                    end_line_content[..byte_col_end].chars().count()
2560                } else {
2561                    end_line_content.chars().count()
2562                };
2563
2564                code_spans.push(CodeSpan {
2565                    line: line_num,
2566                    end_line: end_line_idx + 1,
2567                    start_col: col_start,
2568                    end_col: col_end,
2569                    byte_offset: start_pos,
2570                    byte_end: end_pos,
2571                    backtick_count,
2572                    content: span_content,
2573                });
2574            }
2575        }
2576
2577        // Sort by position to ensure consistent ordering
2578        code_spans.sort_by_key(|span| span.byte_offset);
2579
2580        code_spans
2581    }
2582
2583    /// Parse all list blocks in the content (legacy line-by-line approach)
2584    ///
2585    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2586    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
2587    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
2588    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
2589    ///   treated as list continuation (based on the list marker width)
2590    ///
2591    /// When a new list item is encountered, we check if list-breaking content was seen
2592    /// since the last item. If so, we start a new list block.
2593    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2594        // Minimum indentation for unordered list continuation per CommonMark spec
2595        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2596
2597        /// Initialize or reset the forward-scanning tracking state.
2598        /// This helper eliminates code duplication across three initialization sites.
2599        #[inline]
2600        fn reset_tracking_state(
2601            list_item: &ListItemInfo,
2602            has_list_breaking_content: &mut bool,
2603            min_continuation: &mut usize,
2604        ) {
2605            *has_list_breaking_content = false;
2606            let marker_width = if list_item.is_ordered {
2607                list_item.marker.len() + 1 // Ordered markers need space after period/paren
2608            } else {
2609                list_item.marker.len()
2610            };
2611            *min_continuation = if list_item.is_ordered {
2612                marker_width
2613            } else {
2614                UNORDERED_LIST_MIN_CONTINUATION_INDENT
2615            };
2616        }
2617
2618        // Pre-size based on lines that could be list items
2619        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
2620        let mut current_block: Option<ListBlock> = None;
2621        let mut last_list_item_line = 0;
2622        let mut current_indent_level = 0;
2623        let mut last_marker_width = 0;
2624
2625        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
2626        let mut has_list_breaking_content_since_last_item = false;
2627        let mut min_continuation_for_tracking = 0;
2628
2629        for (line_idx, line_info) in lines.iter().enumerate() {
2630            let line_num = line_idx + 1;
2631
2632            // Enhanced code block handling using Design #3's context analysis
2633            if line_info.in_code_block {
2634                if let Some(ref mut block) = current_block {
2635                    // Calculate minimum indentation for list continuation
2636                    let min_continuation_indent =
2637                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2638
2639                    // Analyze code block context using the three-tier classification
2640                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2641
2642                    match context {
2643                        CodeBlockContext::Indented => {
2644                            // Code block is properly indented - continues the list
2645                            block.end_line = line_num;
2646                            continue;
2647                        }
2648                        CodeBlockContext::Standalone => {
2649                            // Code block separates lists - end current block
2650                            let completed_block = current_block.take().unwrap();
2651                            list_blocks.push(completed_block);
2652                            continue;
2653                        }
2654                        CodeBlockContext::Adjacent => {
2655                            // Edge case - use conservative behavior (continue list)
2656                            block.end_line = line_num;
2657                            continue;
2658                        }
2659                    }
2660                } else {
2661                    // No current list block - skip code block lines
2662                    continue;
2663                }
2664            }
2665
2666            // Extract blockquote prefix if any
2667            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2668                caps.get(0).unwrap().as_str().to_string()
2669            } else {
2670                String::new()
2671            };
2672
2673            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
2674            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
2675            if current_block.is_some()
2676                && line_info.list_item.is_none()
2677                && !line_info.is_blank
2678                && !line_info.in_code_span_continuation
2679            {
2680                let line_content = line_info.content(content).trim();
2681
2682                // Check for structural separators that break lists
2683                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
2684                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
2685                // as they indicate improper indentation rather than lazy continuation.
2686                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2687                let breaks_list = line_info.heading.is_some()
2688                    || line_content.starts_with("---")
2689                    || line_content.starts_with("***")
2690                    || line_content.starts_with("___")
2691                    || crate::utils::skip_context::is_table_line(line_content)
2692                    || line_content.starts_with(">")
2693                    || (line_info.indent > 0
2694                        && line_info.indent < min_continuation_for_tracking
2695                        && !is_lazy_continuation);
2696
2697                if breaks_list {
2698                    has_list_breaking_content_since_last_item = true;
2699                }
2700            }
2701
2702            // If this line is a code span continuation within an active list block,
2703            // extend the block's end_line to include this line (maintains list continuity)
2704            if line_info.in_code_span_continuation
2705                && line_info.list_item.is_none()
2706                && let Some(ref mut block) = current_block
2707            {
2708                block.end_line = line_num;
2709            }
2710
2711            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
2712            // properly indented lines within the list). This ensures the workaround at line 2448
2713            // works correctly when there are multiple continuation lines before a nested list item.
2714            // Also include lazy continuation lines (indent=0) per CommonMark spec.
2715            let is_valid_continuation =
2716                line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
2717            if !line_info.in_code_span_continuation
2718                && line_info.list_item.is_none()
2719                && !line_info.is_blank
2720                && !line_info.in_code_block
2721                && is_valid_continuation
2722                && let Some(ref mut block) = current_block
2723            {
2724                block.end_line = line_num;
2725            }
2726
2727            // Check if this line is a list item
2728            if let Some(list_item) = &line_info.list_item {
2729                // Calculate nesting level based on indentation
2730                let item_indent = list_item.marker_column;
2731                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
2732
2733                if let Some(ref mut block) = current_block {
2734                    // Check if this continues the current block
2735                    // For nested lists, we need to check if this is a nested item (higher nesting level)
2736                    // or a continuation at the same or lower level
2737                    let is_nested = nesting > block.nesting_level;
2738                    let same_type =
2739                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2740                    let same_context = block.blockquote_prefix == blockquote_prefix;
2741                    // Allow one blank line after last item, or lines immediately after block content
2742                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2743
2744                    // For unordered lists, also check marker consistency
2745                    let marker_compatible =
2746                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2747
2748                    // O(1) check: Use the tracked variable instead of O(n) nested loop
2749                    // This eliminates the quadratic bottleneck from issue #148
2750                    let has_non_list_content = has_list_breaking_content_since_last_item;
2751
2752                    // A list continues if:
2753                    // 1. It's a nested item (indented more than the parent), OR
2754                    // 2. It's the same type at the same level with reasonable distance
2755                    let mut continues_list = if is_nested {
2756                        // Nested items always continue the list if they're in the same context
2757                        same_context && reasonable_distance && !has_non_list_content
2758                    } else {
2759                        // Same-level items need to match type and markers
2760                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2761                    };
2762
2763                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
2764                    // This handles edge cases where content patterns might otherwise split lists incorrectly
2765                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2766                        // Check if the previous line was a list item or a continuation of a list item
2767                        // (including lazy continuation lines)
2768                        if block.item_lines.contains(&(line_num - 1)) {
2769                            // They're consecutive list items - force them to be in the same list
2770                            continues_list = true;
2771                        } else {
2772                            // Previous line is a continuation line within this block
2773                            // (e.g., lazy continuation with indent=0)
2774                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
2775                            continues_list = true;
2776                        }
2777                    }
2778
2779                    if continues_list {
2780                        // Extend current block
2781                        block.end_line = line_num;
2782                        block.item_lines.push(line_num);
2783
2784                        // Update max marker width
2785                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2786                            list_item.marker.len() + 1
2787                        } else {
2788                            list_item.marker.len()
2789                        });
2790
2791                        // Update marker consistency for unordered lists
2792                        if !block.is_ordered
2793                            && block.marker.is_some()
2794                            && block.marker.as_ref() != Some(&list_item.marker)
2795                        {
2796                            // Mixed markers, clear the marker field
2797                            block.marker = None;
2798                        }
2799
2800                        // Reset tracked state for issue #148 optimization
2801                        reset_tracking_state(
2802                            list_item,
2803                            &mut has_list_breaking_content_since_last_item,
2804                            &mut min_continuation_for_tracking,
2805                        );
2806                    } else {
2807                        // End current block and start a new one
2808
2809                        list_blocks.push(block.clone());
2810
2811                        *block = ListBlock {
2812                            start_line: line_num,
2813                            end_line: line_num,
2814                            is_ordered: list_item.is_ordered,
2815                            marker: if list_item.is_ordered {
2816                                None
2817                            } else {
2818                                Some(list_item.marker.clone())
2819                            },
2820                            blockquote_prefix: blockquote_prefix.clone(),
2821                            item_lines: vec![line_num],
2822                            nesting_level: nesting,
2823                            max_marker_width: if list_item.is_ordered {
2824                                list_item.marker.len() + 1
2825                            } else {
2826                                list_item.marker.len()
2827                            },
2828                        };
2829
2830                        // Initialize tracked state for new block (issue #148 optimization)
2831                        reset_tracking_state(
2832                            list_item,
2833                            &mut has_list_breaking_content_since_last_item,
2834                            &mut min_continuation_for_tracking,
2835                        );
2836                    }
2837                } else {
2838                    // Start a new block
2839                    current_block = Some(ListBlock {
2840                        start_line: line_num,
2841                        end_line: line_num,
2842                        is_ordered: list_item.is_ordered,
2843                        marker: if list_item.is_ordered {
2844                            None
2845                        } else {
2846                            Some(list_item.marker.clone())
2847                        },
2848                        blockquote_prefix,
2849                        item_lines: vec![line_num],
2850                        nesting_level: nesting,
2851                        max_marker_width: list_item.marker.len(),
2852                    });
2853
2854                    // Initialize tracked state for new block (issue #148 optimization)
2855                    reset_tracking_state(
2856                        list_item,
2857                        &mut has_list_breaking_content_since_last_item,
2858                        &mut min_continuation_for_tracking,
2859                    );
2860                }
2861
2862                last_list_item_line = line_num;
2863                current_indent_level = item_indent;
2864                last_marker_width = if list_item.is_ordered {
2865                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
2866                } else {
2867                    list_item.marker.len()
2868                };
2869            } else if let Some(ref mut block) = current_block {
2870                // Not a list item - check if it continues the current block
2871
2872                // For MD032 compatibility, we use a simple approach:
2873                // - Indented lines continue the list
2874                // - Blank lines followed by indented content continue the list
2875                // - Everything else ends the list
2876
2877                // Check if the last line in the list block ended with a backslash (hard line break)
2878                // This handles cases where list items use backslash for hard line breaks
2879                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2880                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2881                } else {
2882                    false
2883                };
2884
2885                // Calculate minimum indentation for list continuation
2886                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
2887                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
2888                let min_continuation_indent = if block.is_ordered {
2889                    current_indent_level + last_marker_width
2890                } else {
2891                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
2892                };
2893
2894                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2895                    // Indented line or backslash continuation continues the list
2896                    block.end_line = line_num;
2897                } else if line_info.is_blank {
2898                    // Blank line - check if it's internal to the list or ending it
2899                    // We only include blank lines that are followed by more list content
2900                    let mut check_idx = line_idx + 1;
2901                    let mut found_continuation = false;
2902
2903                    // Skip additional blank lines
2904                    while check_idx < lines.len() && lines[check_idx].is_blank {
2905                        check_idx += 1;
2906                    }
2907
2908                    if check_idx < lines.len() {
2909                        let next_line = &lines[check_idx];
2910                        // Check if followed by indented content (list continuation)
2911                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2912                            found_continuation = true;
2913                        }
2914                        // Check if followed by another list item at the same level
2915                        else if !next_line.in_code_block
2916                            && next_line.list_item.is_some()
2917                            && let Some(item) = &next_line.list_item
2918                        {
2919                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2920                                .find(next_line.content(content))
2921                                .map_or(String::new(), |m| m.as_str().to_string());
2922                            if item.marker_column == current_indent_level
2923                                && item.is_ordered == block.is_ordered
2924                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2925                            {
2926                                // Check if there was meaningful content between the list items (unused now)
2927                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
2928                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2929                                    if let Some(between_line) = lines.get(idx) {
2930                                        let between_content = between_line.content(content);
2931                                        let trimmed = between_content.trim();
2932                                        // Skip empty lines
2933                                        if trimmed.is_empty() {
2934                                            return false;
2935                                        }
2936                                        // Check for meaningful content
2937                                        let line_indent = between_content.len() - between_content.trim_start().len();
2938
2939                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
2940                                        if trimmed.starts_with("```")
2941                                            || trimmed.starts_with("~~~")
2942                                            || trimmed.starts_with("---")
2943                                            || trimmed.starts_with("***")
2944                                            || trimmed.starts_with("___")
2945                                            || trimmed.starts_with(">")
2946                                            || crate::utils::skip_context::is_table_line(trimmed)
2947                                            || between_line.heading.is_some()
2948                                        {
2949                                            return true; // These are structural separators - meaningful content that breaks lists
2950                                        }
2951
2952                                        // Only properly indented content continues the list
2953                                        line_indent >= min_continuation_indent
2954                                    } else {
2955                                        false
2956                                    }
2957                                });
2958
2959                                if block.is_ordered {
2960                                    // For ordered lists: don't continue if there are structural separators
2961                                    // Check if there are structural separators between the list items
2962                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2963                                        if let Some(between_line) = lines.get(idx) {
2964                                            let trimmed = between_line.content(content).trim();
2965                                            if trimmed.is_empty() {
2966                                                return false;
2967                                            }
2968                                            // Check for structural separators that break lists
2969                                            trimmed.starts_with("```")
2970                                                || trimmed.starts_with("~~~")
2971                                                || trimmed.starts_with("---")
2972                                                || trimmed.starts_with("***")
2973                                                || trimmed.starts_with("___")
2974                                                || trimmed.starts_with(">")
2975                                                || crate::utils::skip_context::is_table_line(trimmed)
2976                                                || between_line.heading.is_some()
2977                                        } else {
2978                                            false
2979                                        }
2980                                    });
2981                                    found_continuation = !has_structural_separators;
2982                                } else {
2983                                    // For unordered lists: also check for structural separators
2984                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2985                                        if let Some(between_line) = lines.get(idx) {
2986                                            let trimmed = between_line.content(content).trim();
2987                                            if trimmed.is_empty() {
2988                                                return false;
2989                                            }
2990                                            // Check for structural separators that break lists
2991                                            trimmed.starts_with("```")
2992                                                || trimmed.starts_with("~~~")
2993                                                || trimmed.starts_with("---")
2994                                                || trimmed.starts_with("***")
2995                                                || trimmed.starts_with("___")
2996                                                || trimmed.starts_with(">")
2997                                                || crate::utils::skip_context::is_table_line(trimmed)
2998                                                || between_line.heading.is_some()
2999                                        } else {
3000                                            false
3001                                        }
3002                                    });
3003                                    found_continuation = !has_structural_separators;
3004                                }
3005                            }
3006                        }
3007                    }
3008
3009                    if found_continuation {
3010                        // Include the blank line in the block
3011                        block.end_line = line_num;
3012                    } else {
3013                        // Blank line ends the list - don't include it
3014                        list_blocks.push(block.clone());
3015                        current_block = None;
3016                    }
3017                } else {
3018                    // Check for lazy continuation - non-indented line immediately after a list item
3019                    // But only if the line has sufficient indentation for the list type
3020                    let min_required_indent = if block.is_ordered {
3021                        current_indent_level + last_marker_width
3022                    } else {
3023                        current_indent_level + 2
3024                    };
3025
3026                    // For lazy continuation to apply, the line must either:
3027                    // 1. Have no indentation (true lazy continuation)
3028                    // 2. Have sufficient indentation for the list type
3029                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3030                    let line_content = line_info.content(content).trim();
3031
3032                    // Check for table-like patterns
3033                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3034
3035                    let is_structural_separator = line_info.heading.is_some()
3036                        || line_content.starts_with("```")
3037                        || line_content.starts_with("~~~")
3038                        || line_content.starts_with("---")
3039                        || line_content.starts_with("***")
3040                        || line_content.starts_with("___")
3041                        || line_content.starts_with(">")
3042                        || looks_like_table;
3043
3044                    // Allow lazy continuation if we're still within the same list block
3045                    // (not just immediately after a list item)
3046                    let is_lazy_continuation = !is_structural_separator
3047                        && !line_info.is_blank
3048                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3049
3050                    if is_lazy_continuation {
3051                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3052                        // it's probably not a continuation
3053                        let content_to_check = if !blockquote_prefix.is_empty() {
3054                            // Strip blockquote prefix to check the actual content
3055                            line_info
3056                                .content(content)
3057                                .strip_prefix(&blockquote_prefix)
3058                                .unwrap_or(line_info.content(content))
3059                                .trim()
3060                        } else {
3061                            line_info.content(content).trim()
3062                        };
3063
3064                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3065
3066                        // If it starts with uppercase and the previous line ended with punctuation,
3067                        // it's likely a new paragraph, not a continuation
3068                        if starts_with_uppercase && last_list_item_line > 0 {
3069                            // This looks like a new paragraph
3070                            list_blocks.push(block.clone());
3071                            current_block = None;
3072                        } else {
3073                            // This is a lazy continuation line
3074                            block.end_line = line_num;
3075                        }
3076                    } else {
3077                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3078                        list_blocks.push(block.clone());
3079                        current_block = None;
3080                    }
3081                }
3082            }
3083        }
3084
3085        // Don't forget the last block
3086        if let Some(block) = current_block {
3087            list_blocks.push(block);
3088        }
3089
3090        // Merge adjacent blocks that should be one
3091        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3092
3093        list_blocks
3094    }
3095
3096    /// Compute character frequency for fast content analysis
3097    fn compute_char_frequency(content: &str) -> CharFrequency {
3098        let mut frequency = CharFrequency::default();
3099
3100        for ch in content.chars() {
3101            match ch {
3102                '#' => frequency.hash_count += 1,
3103                '*' => frequency.asterisk_count += 1,
3104                '_' => frequency.underscore_count += 1,
3105                '-' => frequency.hyphen_count += 1,
3106                '+' => frequency.plus_count += 1,
3107                '>' => frequency.gt_count += 1,
3108                '|' => frequency.pipe_count += 1,
3109                '[' => frequency.bracket_count += 1,
3110                '`' => frequency.backtick_count += 1,
3111                '<' => frequency.lt_count += 1,
3112                '!' => frequency.exclamation_count += 1,
3113                '\n' => frequency.newline_count += 1,
3114                _ => {}
3115            }
3116        }
3117
3118        frequency
3119    }
3120
3121    /// Parse HTML tags in the content
3122    fn parse_html_tags(
3123        content: &str,
3124        lines: &[LineInfo],
3125        code_blocks: &[(usize, usize)],
3126        flavor: MarkdownFlavor,
3127    ) -> Vec<HtmlTag> {
3128        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3129            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3130
3131        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3132
3133        for cap in HTML_TAG_REGEX.captures_iter(content) {
3134            let full_match = cap.get(0).unwrap();
3135            let match_start = full_match.start();
3136            let match_end = full_match.end();
3137
3138            // Skip if in code block
3139            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3140                continue;
3141            }
3142
3143            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3144            let tag_name_original = cap.get(2).unwrap().as_str();
3145            let tag_name = tag_name_original.to_lowercase();
3146            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3147
3148            // Skip JSX components in MDX files (tags starting with uppercase letter)
3149            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3150            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3151                continue;
3152            }
3153
3154            // Find which line this tag is on
3155            let mut line_num = 1;
3156            let mut col_start = match_start;
3157            let mut col_end = match_end;
3158            for (idx, line_info) in lines.iter().enumerate() {
3159                if match_start >= line_info.byte_offset {
3160                    line_num = idx + 1;
3161                    col_start = match_start - line_info.byte_offset;
3162                    col_end = match_end - line_info.byte_offset;
3163                } else {
3164                    break;
3165                }
3166            }
3167
3168            html_tags.push(HtmlTag {
3169                line: line_num,
3170                start_col: col_start,
3171                end_col: col_end,
3172                byte_offset: match_start,
3173                byte_end: match_end,
3174                tag_name,
3175                is_closing,
3176                is_self_closing,
3177                raw_content: full_match.as_str().to_string(),
3178            });
3179        }
3180
3181        html_tags
3182    }
3183
3184    /// Parse emphasis spans in the content
3185    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3186        static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3187            LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3188
3189        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3190
3191        for cap in EMPHASIS_REGEX.captures_iter(content) {
3192            let full_match = cap.get(0).unwrap();
3193            let match_start = full_match.start();
3194            let match_end = full_match.end();
3195
3196            // Skip if in code block
3197            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3198                continue;
3199            }
3200
3201            let opening_markers = cap.get(1).unwrap().as_str();
3202            let content_part = cap.get(2).unwrap().as_str();
3203            let closing_markers = cap.get(3).unwrap().as_str();
3204
3205            // Validate matching markers
3206            if opening_markers.chars().next() != closing_markers.chars().next()
3207                || opening_markers.len() != closing_markers.len()
3208            {
3209                continue;
3210            }
3211
3212            let marker = opening_markers.chars().next().unwrap();
3213            let marker_count = opening_markers.len();
3214
3215            // Find which line this emphasis is on
3216            let mut line_num = 1;
3217            let mut col_start = match_start;
3218            let mut col_end = match_end;
3219            for (idx, line_info) in lines.iter().enumerate() {
3220                if match_start >= line_info.byte_offset {
3221                    line_num = idx + 1;
3222                    col_start = match_start - line_info.byte_offset;
3223                    col_end = match_end - line_info.byte_offset;
3224                } else {
3225                    break;
3226                }
3227            }
3228
3229            emphasis_spans.push(EmphasisSpan {
3230                line: line_num,
3231                start_col: col_start,
3232                end_col: col_end,
3233                byte_offset: match_start,
3234                byte_end: match_end,
3235                marker,
3236                marker_count,
3237                content: content_part.to_string(),
3238            });
3239        }
3240
3241        emphasis_spans
3242    }
3243
3244    /// Parse table rows in the content
3245    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3246        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3247
3248        for (line_idx, line_info) in lines.iter().enumerate() {
3249            // Skip lines in code blocks or blank lines
3250            if line_info.in_code_block || line_info.is_blank {
3251                continue;
3252            }
3253
3254            let line = line_info.content(content);
3255            let line_num = line_idx + 1;
3256
3257            // Check if this line contains pipes (potential table row)
3258            if !line.contains('|') {
3259                continue;
3260            }
3261
3262            // Count columns by splitting on pipes
3263            let parts: Vec<&str> = line.split('|').collect();
3264            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3265
3266            // Check if this is a separator row
3267            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3268            let mut column_alignments = Vec::new();
3269
3270            if is_separator {
3271                for part in &parts[1..parts.len() - 1] {
3272                    // Skip first and last empty parts
3273                    let trimmed = part.trim();
3274                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3275                        "center".to_string()
3276                    } else if trimmed.ends_with(':') {
3277                        "right".to_string()
3278                    } else if trimmed.starts_with(':') {
3279                        "left".to_string()
3280                    } else {
3281                        "none".to_string()
3282                    };
3283                    column_alignments.push(alignment);
3284                }
3285            }
3286
3287            table_rows.push(TableRow {
3288                line: line_num,
3289                is_separator,
3290                column_count,
3291                column_alignments,
3292            });
3293        }
3294
3295        table_rows
3296    }
3297
3298    /// Parse bare URLs and emails in the content
3299    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3300        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3301
3302        // Check for bare URLs (not in angle brackets or markdown links)
3303        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3304            let full_match = cap.get(0).unwrap();
3305            let match_start = full_match.start();
3306            let match_end = full_match.end();
3307
3308            // Skip if in code block
3309            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3310                continue;
3311            }
3312
3313            // Skip if already in angle brackets or markdown links
3314            let preceding_char = if match_start > 0 {
3315                content.chars().nth(match_start - 1)
3316            } else {
3317                None
3318            };
3319            let following_char = content.chars().nth(match_end);
3320
3321            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3322                continue;
3323            }
3324            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3325                continue;
3326            }
3327
3328            let url = full_match.as_str();
3329            let url_type = if url.starts_with("https://") {
3330                "https"
3331            } else if url.starts_with("http://") {
3332                "http"
3333            } else if url.starts_with("ftp://") {
3334                "ftp"
3335            } else {
3336                "other"
3337            };
3338
3339            // Find which line this URL is on
3340            let mut line_num = 1;
3341            let mut col_start = match_start;
3342            let mut col_end = match_end;
3343            for (idx, line_info) in lines.iter().enumerate() {
3344                if match_start >= line_info.byte_offset {
3345                    line_num = idx + 1;
3346                    col_start = match_start - line_info.byte_offset;
3347                    col_end = match_end - line_info.byte_offset;
3348                } else {
3349                    break;
3350                }
3351            }
3352
3353            bare_urls.push(BareUrl {
3354                line: line_num,
3355                start_col: col_start,
3356                end_col: col_end,
3357                byte_offset: match_start,
3358                byte_end: match_end,
3359                url: url.to_string(),
3360                url_type: url_type.to_string(),
3361            });
3362        }
3363
3364        // Check for bare email addresses
3365        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3366            let full_match = cap.get(0).unwrap();
3367            let match_start = full_match.start();
3368            let match_end = full_match.end();
3369
3370            // Skip if in code block
3371            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3372                continue;
3373            }
3374
3375            // Skip if already in angle brackets or markdown links
3376            let preceding_char = if match_start > 0 {
3377                content.chars().nth(match_start - 1)
3378            } else {
3379                None
3380            };
3381            let following_char = content.chars().nth(match_end);
3382
3383            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3384                continue;
3385            }
3386            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3387                continue;
3388            }
3389
3390            let email = full_match.as_str();
3391
3392            // Find which line this email is on
3393            let mut line_num = 1;
3394            let mut col_start = match_start;
3395            let mut col_end = match_end;
3396            for (idx, line_info) in lines.iter().enumerate() {
3397                if match_start >= line_info.byte_offset {
3398                    line_num = idx + 1;
3399                    col_start = match_start - line_info.byte_offset;
3400                    col_end = match_end - line_info.byte_offset;
3401                } else {
3402                    break;
3403                }
3404            }
3405
3406            bare_urls.push(BareUrl {
3407                line: line_num,
3408                start_col: col_start,
3409                end_col: col_end,
3410                byte_offset: match_start,
3411                byte_end: match_end,
3412                url: email.to_string(),
3413                url_type: "email".to_string(),
3414            });
3415        }
3416
3417        bare_urls
3418    }
3419
3420    /// Get an iterator over valid CommonMark headings
3421    ///
3422    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
3423    /// that should be flagged by MD018 but should not be processed by other heading rules.
3424    ///
3425    /// # Examples
3426    ///
3427    /// ```rust
3428    /// use rumdl_lib::lint_context::LintContext;
3429    /// use rumdl_lib::config::MarkdownFlavor;
3430    ///
3431    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
3432    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3433    ///
3434    /// for heading in ctx.valid_headings() {
3435    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
3436    /// }
3437    /// // Only prints valid headings, skips `#NoSpace`
3438    /// ```
3439    #[must_use]
3440    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3441        ValidHeadingsIter::new(&self.lines)
3442    }
3443
3444    /// Check if the document contains any valid CommonMark headings
3445    ///
3446    /// Returns `true` if there is at least one heading with proper space after `#`.
3447    #[must_use]
3448    pub fn has_valid_headings(&self) -> bool {
3449        self.lines
3450            .iter()
3451            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3452    }
3453}
3454
3455/// Merge adjacent list blocks that should be treated as one
3456fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3457    if list_blocks.len() < 2 {
3458        return;
3459    }
3460
3461    let mut merger = ListBlockMerger::new(content, lines);
3462    *list_blocks = merger.merge(list_blocks);
3463}
3464
3465/// Helper struct to manage the complex logic of merging list blocks
3466struct ListBlockMerger<'a> {
3467    content: &'a str,
3468    lines: &'a [LineInfo],
3469}
3470
3471impl<'a> ListBlockMerger<'a> {
3472    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3473        Self { content, lines }
3474    }
3475
3476    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3477        let mut merged = Vec::with_capacity(list_blocks.len());
3478        let mut current = list_blocks[0].clone();
3479
3480        for next in list_blocks.iter().skip(1) {
3481            if self.should_merge_blocks(&current, next) {
3482                current = self.merge_two_blocks(current, next);
3483            } else {
3484                merged.push(current);
3485                current = next.clone();
3486            }
3487        }
3488
3489        merged.push(current);
3490        merged
3491    }
3492
3493    /// Determine if two adjacent list blocks should be merged
3494    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3495        // Basic compatibility checks
3496        if !self.blocks_are_compatible(current, next) {
3497            return false;
3498        }
3499
3500        // Check spacing and content between blocks
3501        let spacing = self.analyze_spacing_between(current, next);
3502        match spacing {
3503            BlockSpacing::Consecutive => true,
3504            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3505            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3506                self.can_merge_with_content_between(current, next)
3507            }
3508        }
3509    }
3510
3511    /// Check if blocks have compatible structure for merging
3512    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3513        current.is_ordered == next.is_ordered
3514            && current.blockquote_prefix == next.blockquote_prefix
3515            && current.nesting_level == next.nesting_level
3516    }
3517
3518    /// Analyze the spacing between two list blocks
3519    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3520        let gap = next.start_line - current.end_line;
3521
3522        match gap {
3523            1 => BlockSpacing::Consecutive,
3524            2 => BlockSpacing::SingleBlank,
3525            _ if gap > 2 => {
3526                if self.has_only_blank_lines_between(current, next) {
3527                    BlockSpacing::MultipleBlanks
3528                } else {
3529                    BlockSpacing::ContentBetween
3530                }
3531            }
3532            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
3533        }
3534    }
3535
3536    /// Check if unordered lists can be merged with a single blank line between
3537    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3538        // Check if there are structural separators between the blocks
3539        // If has_meaningful_content_between returns true, it means there are structural separators
3540        if has_meaningful_content_between(self.content, current, next, self.lines) {
3541            return false; // Structural separators prevent merging
3542        }
3543
3544        // Only merge unordered lists with same marker across single blank
3545        !current.is_ordered && current.marker == next.marker
3546    }
3547
3548    /// Check if ordered lists can be merged when there's content between them
3549    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3550        // Do not merge lists if there are structural separators between them
3551        if has_meaningful_content_between(self.content, current, next, self.lines) {
3552            return false; // Structural separators prevent merging
3553        }
3554
3555        // Only consider merging ordered lists if there's no structural content between
3556        current.is_ordered && next.is_ordered
3557    }
3558
3559    /// Check if there are only blank lines between blocks
3560    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3561        for line_num in (current.end_line + 1)..next.start_line {
3562            if let Some(line_info) = self.lines.get(line_num - 1)
3563                && !line_info.content(self.content).trim().is_empty()
3564            {
3565                return false;
3566            }
3567        }
3568        true
3569    }
3570
3571    /// Merge two compatible list blocks into one
3572    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3573        current.end_line = next.end_line;
3574        current.item_lines.extend_from_slice(&next.item_lines);
3575
3576        // Update max marker width
3577        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3578
3579        // Handle marker consistency for unordered lists
3580        if !current.is_ordered && self.markers_differ(&current, next) {
3581            current.marker = None; // Mixed markers
3582        }
3583
3584        current
3585    }
3586
3587    /// Check if two blocks have different markers
3588    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3589        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3590    }
3591}
3592
3593/// Types of spacing between list blocks
3594#[derive(Debug, PartialEq)]
3595enum BlockSpacing {
3596    Consecutive,    // No gap between blocks
3597    SingleBlank,    // One blank line between blocks
3598    MultipleBlanks, // Multiple blank lines but no content
3599    ContentBetween, // Content exists between blocks
3600}
3601
3602/// Check if there's meaningful content (not just blank lines) between two list blocks
3603fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3604    // Check lines between current.end_line and next.start_line
3605    for line_num in (current.end_line + 1)..next.start_line {
3606        if let Some(line_info) = lines.get(line_num - 1) {
3607            // Convert to 0-indexed
3608            let trimmed = line_info.content(content).trim();
3609
3610            // Skip empty lines
3611            if trimmed.is_empty() {
3612                continue;
3613            }
3614
3615            // Check for structural separators that should separate lists (CommonMark compliant)
3616
3617            // Headings separate lists
3618            if line_info.heading.is_some() {
3619                return true; // Has meaningful content - headings separate lists
3620            }
3621
3622            // Horizontal rules separate lists (---, ***, ___)
3623            if is_horizontal_rule(trimmed) {
3624                return true; // Has meaningful content - horizontal rules separate lists
3625            }
3626
3627            // Tables separate lists
3628            if crate::utils::skip_context::is_table_line(trimmed) {
3629                return true; // Has meaningful content - tables separate lists
3630            }
3631
3632            // Blockquotes separate lists
3633            if trimmed.starts_with('>') {
3634                return true; // Has meaningful content - blockquotes separate lists
3635            }
3636
3637            // Code block fences separate lists (unless properly indented as list content)
3638            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3639                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3640
3641                // Check if this code block is properly indented as list continuation
3642                let min_continuation_indent = if current.is_ordered {
3643                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
3644                } else {
3645                    current.nesting_level + 2
3646                };
3647
3648                if line_indent < min_continuation_indent {
3649                    // This is a standalone code block that separates lists
3650                    return true; // Has meaningful content - standalone code blocks separate lists
3651                }
3652            }
3653
3654            // Check if this line has proper indentation for list continuation
3655            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3656
3657            // Calculate minimum indentation needed to be list continuation
3658            let min_indent = if current.is_ordered {
3659                current.nesting_level + current.max_marker_width
3660            } else {
3661                current.nesting_level + 2
3662            };
3663
3664            // If the line is not indented enough to be list continuation, it's meaningful content
3665            if line_indent < min_indent {
3666                return true; // Has meaningful content - content not indented as list continuation
3667            }
3668
3669            // If we reach here, the line is properly indented as list continuation
3670            // Continue checking other lines
3671        }
3672    }
3673
3674    // Only blank lines or properly indented list continuation content between blocks
3675    false
3676}
3677
3678/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
3679/// CommonMark rules for thematic breaks (horizontal rules):
3680/// - May have 0-3 spaces of leading indentation (but NOT tabs)
3681/// - Must have 3+ of the same character (-, *, or _)
3682/// - May have spaces between characters
3683/// - No other characters allowed
3684pub fn is_horizontal_rule_line(line: &str) -> bool {
3685    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
3686    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3687    if leading_spaces > 3 || line.starts_with('\t') {
3688        return false;
3689    }
3690
3691    is_horizontal_rule_content(line.trim())
3692}
3693
3694/// Check if trimmed content matches horizontal rule pattern.
3695/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
3696pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3697    if trimmed.len() < 3 {
3698        return false;
3699    }
3700
3701    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
3702    let chars: Vec<char> = trimmed.chars().collect();
3703    if let Some(&first_char) = chars.first()
3704        && (first_char == '-' || first_char == '*' || first_char == '_')
3705    {
3706        let mut count = 0;
3707        for &ch in &chars {
3708            if ch == first_char {
3709                count += 1;
3710            } else if ch != ' ' && ch != '\t' {
3711                return false; // Non-matching, non-whitespace character
3712            }
3713        }
3714        return count >= 3;
3715    }
3716    false
3717}
3718
3719/// Backwards-compatible alias for `is_horizontal_rule_content`
3720pub fn is_horizontal_rule(trimmed: &str) -> bool {
3721    is_horizontal_rule_content(trimmed)
3722}
3723
3724/// Check if content contains patterns that cause the markdown crate to panic
3725#[cfg(test)]
3726mod tests {
3727    use super::*;
3728
3729    #[test]
3730    fn test_empty_content() {
3731        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3732        assert_eq!(ctx.content, "");
3733        assert_eq!(ctx.line_offsets, vec![0]);
3734        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3735        assert_eq!(ctx.lines.len(), 0);
3736    }
3737
3738    #[test]
3739    fn test_single_line() {
3740        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3741        assert_eq!(ctx.content, "# Hello");
3742        assert_eq!(ctx.line_offsets, vec![0]);
3743        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3744        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3745    }
3746
3747    #[test]
3748    fn test_multi_line() {
3749        let content = "# Title\n\nSecond line\nThird line";
3750        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3751        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3752        // Test offset to line/col
3753        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
3754        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
3755        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
3756        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
3757        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
3758    }
3759
3760    #[test]
3761    fn test_line_info() {
3762        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
3763        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3764
3765        // Test line info
3766        assert_eq!(ctx.lines.len(), 7);
3767
3768        // Line 1: "# Title"
3769        let line1 = &ctx.lines[0];
3770        assert_eq!(line1.content(ctx.content), "# Title");
3771        assert_eq!(line1.byte_offset, 0);
3772        assert_eq!(line1.indent, 0);
3773        assert!(!line1.is_blank);
3774        assert!(!line1.in_code_block);
3775        assert!(line1.list_item.is_none());
3776
3777        // Line 2: "    indented"
3778        let line2 = &ctx.lines[1];
3779        assert_eq!(line2.content(ctx.content), "    indented");
3780        assert_eq!(line2.byte_offset, 8);
3781        assert_eq!(line2.indent, 4);
3782        assert!(!line2.is_blank);
3783
3784        // Line 3: "" (blank)
3785        let line3 = &ctx.lines[2];
3786        assert_eq!(line3.content(ctx.content), "");
3787        assert!(line3.is_blank);
3788
3789        // Test helper methods
3790        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3791        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3792        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3793        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3794    }
3795
3796    #[test]
3797    fn test_list_item_detection() {
3798        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
3799        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3800
3801        // Line 1: "- Unordered item"
3802        let line1 = &ctx.lines[0];
3803        assert!(line1.list_item.is_some());
3804        let list1 = line1.list_item.as_ref().unwrap();
3805        assert_eq!(list1.marker, "-");
3806        assert!(!list1.is_ordered);
3807        assert_eq!(list1.marker_column, 0);
3808        assert_eq!(list1.content_column, 2);
3809
3810        // Line 2: "  * Nested item"
3811        let line2 = &ctx.lines[1];
3812        assert!(line2.list_item.is_some());
3813        let list2 = line2.list_item.as_ref().unwrap();
3814        assert_eq!(list2.marker, "*");
3815        assert_eq!(list2.marker_column, 2);
3816
3817        // Line 3: "1. Ordered item"
3818        let line3 = &ctx.lines[2];
3819        assert!(line3.list_item.is_some());
3820        let list3 = line3.list_item.as_ref().unwrap();
3821        assert_eq!(list3.marker, "1.");
3822        assert!(list3.is_ordered);
3823        assert_eq!(list3.number, Some(1));
3824
3825        // Line 6: "Not a list"
3826        let line6 = &ctx.lines[5];
3827        assert!(line6.list_item.is_none());
3828    }
3829
3830    #[test]
3831    fn test_offset_to_line_col_edge_cases() {
3832        let content = "a\nb\nc";
3833        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3834        // line_offsets: [0, 2, 4]
3835        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
3836        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
3837        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
3838        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
3839        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
3840        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
3841    }
3842
3843    #[test]
3844    fn test_mdx_esm_blocks() {
3845        let content = r##"import {Chart} from './snowfall.js'
3846export const year = 2023
3847
3848# Last year's snowfall
3849
3850In {year}, the snowfall was above average.
3851It was followed by a warm spring which caused
3852flood conditions in many of the nearby rivers.
3853
3854<Chart color="#fcb32c" year={year} />
3855"##;
3856
3857        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3858
3859        // Check that lines 1 and 2 are marked as ESM blocks
3860        assert_eq!(ctx.lines.len(), 10);
3861        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3862        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3863        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3864        assert!(
3865            !ctx.lines[3].in_esm_block,
3866            "Line 4 (heading) should NOT be in_esm_block"
3867        );
3868        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3869        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3870    }
3871
3872    #[test]
3873    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3874        let content = r#"import {Chart} from './snowfall.js'
3875export const year = 2023
3876
3877# Last year's snowfall
3878"#;
3879
3880        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3881
3882        // ESM blocks should NOT be detected in Standard flavor
3883        assert!(
3884            !ctx.lines[0].in_esm_block,
3885            "Line 1 should NOT be in_esm_block in Standard flavor"
3886        );
3887        assert!(
3888            !ctx.lines[1].in_esm_block,
3889            "Line 2 should NOT be in_esm_block in Standard flavor"
3890        );
3891    }
3892}