rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10/// Macro for profiling sections - only active in non-WASM builds
11#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13    ($name:expr, $profile:expr, $code:expr) => {{
14        let start = std::time::Instant::now();
15        let result = $code;
16        if $profile {
17            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18        }
19        result
20    }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28// Comprehensive link pattern that captures both inline and reference links
29// Use (?s) flag to make . match newlines
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31    Regex::new(
32        r#"(?sx)
33        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34        (?:
35            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
36            |
37            \[([^\]]*)\]      # Reference ID in group 6
38        )"#
39    ).unwrap()
40});
41
42// Image pattern (similar to links but with ! prefix)
43// Use (?s) flag to make . match newlines
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45    Regex::new(
46        r#"(?sx)
47        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48        (?:
49            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
50            |
51            \[([^\]]*)\]      # Reference ID in group 6
52        )"#
53    ).unwrap()
54});
55
56// Reference definition pattern
57static REF_DEF_PATTERN: LazyLock<Regex> =
58    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60// Pattern for bare URLs
61static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62    Regex::new(
63        r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64    ).unwrap()
65});
66
67// Pattern for email addresses
68static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71// Pattern for blockquote prefix in parse_list_blocks
72static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74/// Pre-computed information about a line
75#[derive(Debug, Clone)]
76pub struct LineInfo {
77    /// Byte offset where this line starts in the document
78    pub byte_offset: usize,
79    /// Length of the line in bytes (without newline)
80    pub byte_len: usize,
81    /// Number of leading spaces/tabs
82    pub indent: usize,
83    /// Whether the line is blank (empty or only whitespace)
84    pub is_blank: bool,
85    /// Whether this line is inside a code block
86    pub in_code_block: bool,
87    /// Whether this line is inside front matter
88    pub in_front_matter: bool,
89    /// Whether this line is inside an HTML block
90    pub in_html_block: bool,
91    /// Whether this line is inside an HTML comment
92    pub in_html_comment: bool,
93    /// List item information if this line starts a list item
94    pub list_item: Option<ListItemInfo>,
95    /// Heading information if this line is a heading
96    pub heading: Option<HeadingInfo>,
97    /// Blockquote information if this line is a blockquote
98    pub blockquote: Option<BlockquoteInfo>,
99    /// Whether this line is inside a mkdocstrings autodoc block
100    pub in_mkdocstrings: bool,
101    /// Whether this line is part of an ESM import/export block (MDX only)
102    pub in_esm_block: bool,
103    /// Whether this line is a continuation of a multi-line code span from a previous line
104    pub in_code_span_continuation: bool,
105    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
106    /// Pre-computed for consistent detection across all rules
107    pub is_horizontal_rule: bool,
108}
109
110impl LineInfo {
111    /// Get the line content as a string slice from the source document
112    pub fn content<'a>(&self, source: &'a str) -> &'a str {
113        &source[self.byte_offset..self.byte_offset + self.byte_len]
114    }
115}
116
117/// Information about a list item
118#[derive(Debug, Clone)]
119pub struct ListItemInfo {
120    /// The marker used (*, -, +, or number with . or ))
121    pub marker: String,
122    /// Whether it's ordered (true) or unordered (false)
123    pub is_ordered: bool,
124    /// The number for ordered lists
125    pub number: Option<usize>,
126    /// Column where the marker starts (0-based)
127    pub marker_column: usize,
128    /// Column where content after marker starts
129    pub content_column: usize,
130}
131
132/// Heading style type
133#[derive(Debug, Clone, PartialEq)]
134pub enum HeadingStyle {
135    /// ATX style heading (# Heading)
136    ATX,
137    /// Setext style heading with = underline
138    Setext1,
139    /// Setext style heading with - underline
140    Setext2,
141}
142
143/// Parsed link information
144#[derive(Debug, Clone)]
145pub struct ParsedLink<'a> {
146    /// Line number (1-indexed)
147    pub line: usize,
148    /// Start column (0-indexed) in the line
149    pub start_col: usize,
150    /// End column (0-indexed) in the line
151    pub end_col: usize,
152    /// Byte offset in document
153    pub byte_offset: usize,
154    /// End byte offset in document
155    pub byte_end: usize,
156    /// Link text
157    pub text: Cow<'a, str>,
158    /// Link URL or reference
159    pub url: Cow<'a, str>,
160    /// Whether this is a reference link [text][ref] vs inline [text](url)
161    pub is_reference: bool,
162    /// Reference ID for reference links
163    pub reference_id: Option<Cow<'a, str>>,
164    /// Link type from pulldown-cmark
165    pub link_type: LinkType,
166}
167
168/// Information about a broken link reported by pulldown-cmark
169#[derive(Debug, Clone)]
170pub struct BrokenLinkInfo {
171    /// The reference text that couldn't be resolved
172    pub reference: String,
173    /// Byte span in the source document
174    pub span: std::ops::Range<usize>,
175}
176
177/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
178#[derive(Debug, Clone)]
179pub struct FootnoteRef {
180    /// The footnote ID (without the ^ prefix)
181    pub id: String,
182    /// Line number (1-indexed)
183    pub line: usize,
184    /// Start byte offset in document
185    pub byte_offset: usize,
186    /// End byte offset in document
187    pub byte_end: usize,
188}
189
190/// Parsed image information
191#[derive(Debug, Clone)]
192pub struct ParsedImage<'a> {
193    /// Line number (1-indexed)
194    pub line: usize,
195    /// Start column (0-indexed) in the line
196    pub start_col: usize,
197    /// End column (0-indexed) in the line
198    pub end_col: usize,
199    /// Byte offset in document
200    pub byte_offset: usize,
201    /// End byte offset in document
202    pub byte_end: usize,
203    /// Alt text
204    pub alt_text: Cow<'a, str>,
205    /// Image URL or reference
206    pub url: Cow<'a, str>,
207    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
208    pub is_reference: bool,
209    /// Reference ID for reference images
210    pub reference_id: Option<Cow<'a, str>>,
211    /// Link type from pulldown-cmark
212    pub link_type: LinkType,
213}
214
215/// Reference definition [ref]: url "title"
216#[derive(Debug, Clone)]
217pub struct ReferenceDef {
218    /// Line number (1-indexed)
219    pub line: usize,
220    /// Reference ID (normalized to lowercase)
221    pub id: String,
222    /// URL
223    pub url: String,
224    /// Optional title
225    pub title: Option<String>,
226    /// Byte offset where the reference definition starts
227    pub byte_offset: usize,
228    /// Byte offset where the reference definition ends
229    pub byte_end: usize,
230    /// Byte offset where the title starts (if present, includes quote)
231    pub title_byte_start: Option<usize>,
232    /// Byte offset where the title ends (if present, includes quote)
233    pub title_byte_end: Option<usize>,
234}
235
236/// Parsed code span information
237#[derive(Debug, Clone)]
238pub struct CodeSpan {
239    /// Line number where the code span starts (1-indexed)
240    pub line: usize,
241    /// Line number where the code span ends (1-indexed)
242    pub end_line: usize,
243    /// Start column (0-indexed) in the line
244    pub start_col: usize,
245    /// End column (0-indexed) in the line
246    pub end_col: usize,
247    /// Byte offset in document
248    pub byte_offset: usize,
249    /// End byte offset in document
250    pub byte_end: usize,
251    /// Number of backticks used (1, 2, 3, etc.)
252    pub backtick_count: usize,
253    /// Content inside the code span (without backticks)
254    pub content: String,
255}
256
257/// Information about a heading
258#[derive(Debug, Clone)]
259pub struct HeadingInfo {
260    /// Heading level (1-6 for ATX, 1-2 for Setext)
261    pub level: u8,
262    /// Style of heading
263    pub style: HeadingStyle,
264    /// The heading marker (# characters or underline)
265    pub marker: String,
266    /// Column where the marker starts (0-based)
267    pub marker_column: usize,
268    /// Column where heading text starts
269    pub content_column: usize,
270    /// The heading text (without markers and without custom ID syntax)
271    pub text: String,
272    /// Custom header ID if present (e.g., from {#custom-id} syntax)
273    pub custom_id: Option<String>,
274    /// Original heading text including custom ID syntax
275    pub raw_text: String,
276    /// Whether it has a closing sequence (for ATX)
277    pub has_closing_sequence: bool,
278    /// The closing sequence if present
279    pub closing_sequence: String,
280    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
281    /// False for malformed headings like `#NoSpace` that MD018 should flag
282    pub is_valid: bool,
283}
284
285/// A valid heading from a filtered iteration
286///
287/// Only includes headings that are CommonMark-compliant (have space after #).
288/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
289#[derive(Debug, Clone)]
290pub struct ValidHeading<'a> {
291    /// The 1-indexed line number in the document
292    pub line_num: usize,
293    /// Reference to the heading information
294    pub heading: &'a HeadingInfo,
295    /// Reference to the full line info (for rules that need additional context)
296    pub line_info: &'a LineInfo,
297}
298
299/// Iterator over valid CommonMark headings in a document
300///
301/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
302/// but should not be processed by other heading rules.
303pub struct ValidHeadingsIter<'a> {
304    lines: &'a [LineInfo],
305    current_index: usize,
306}
307
308impl<'a> ValidHeadingsIter<'a> {
309    fn new(lines: &'a [LineInfo]) -> Self {
310        Self {
311            lines,
312            current_index: 0,
313        }
314    }
315}
316
317impl<'a> Iterator for ValidHeadingsIter<'a> {
318    type Item = ValidHeading<'a>;
319
320    fn next(&mut self) -> Option<Self::Item> {
321        while self.current_index < self.lines.len() {
322            let idx = self.current_index;
323            self.current_index += 1;
324
325            let line_info = &self.lines[idx];
326            if let Some(heading) = &line_info.heading
327                && heading.is_valid
328            {
329                return Some(ValidHeading {
330                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
331                    heading,
332                    line_info,
333                });
334            }
335        }
336        None
337    }
338}
339
340/// Information about a blockquote line
341#[derive(Debug, Clone)]
342pub struct BlockquoteInfo {
343    /// Nesting level (1 for >, 2 for >>, etc.)
344    pub nesting_level: usize,
345    /// The indentation before the blockquote marker
346    pub indent: String,
347    /// Column where the first > starts (0-based)
348    pub marker_column: usize,
349    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
350    pub prefix: String,
351    /// Content after the blockquote marker(s)
352    pub content: String,
353    /// Whether the line has no space after the marker
354    pub has_no_space_after_marker: bool,
355    /// Whether the line has multiple spaces after the marker
356    pub has_multiple_spaces_after_marker: bool,
357    /// Whether this is an empty blockquote line needing MD028 fix
358    pub needs_md028_fix: bool,
359}
360
361/// Information about a list block
362#[derive(Debug, Clone)]
363pub struct ListBlock {
364    /// Line number where the list starts (1-indexed)
365    pub start_line: usize,
366    /// Line number where the list ends (1-indexed)
367    pub end_line: usize,
368    /// Whether it's ordered or unordered
369    pub is_ordered: bool,
370    /// The consistent marker for unordered lists (if any)
371    pub marker: Option<String>,
372    /// Blockquote prefix for this list (empty if not in blockquote)
373    pub blockquote_prefix: String,
374    /// Lines that are list items within this block
375    pub item_lines: Vec<usize>,
376    /// Nesting level (0 for top-level lists)
377    pub nesting_level: usize,
378    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
379    pub max_marker_width: usize,
380}
381
382use std::sync::{Arc, OnceLock};
383
384/// Character frequency data for fast content analysis
385#[derive(Debug, Clone, Default)]
386pub struct CharFrequency {
387    /// Count of # characters (headings)
388    pub hash_count: usize,
389    /// Count of * characters (emphasis, lists, horizontal rules)
390    pub asterisk_count: usize,
391    /// Count of _ characters (emphasis, horizontal rules)
392    pub underscore_count: usize,
393    /// Count of - characters (lists, horizontal rules, setext headings)
394    pub hyphen_count: usize,
395    /// Count of + characters (lists)
396    pub plus_count: usize,
397    /// Count of > characters (blockquotes)
398    pub gt_count: usize,
399    /// Count of | characters (tables)
400    pub pipe_count: usize,
401    /// Count of [ characters (links, images)
402    pub bracket_count: usize,
403    /// Count of ` characters (code spans, code blocks)
404    pub backtick_count: usize,
405    /// Count of < characters (HTML tags, autolinks)
406    pub lt_count: usize,
407    /// Count of ! characters (images)
408    pub exclamation_count: usize,
409    /// Count of newline characters
410    pub newline_count: usize,
411}
412
413/// Pre-parsed HTML tag information
414#[derive(Debug, Clone)]
415pub struct HtmlTag {
416    /// Line number (1-indexed)
417    pub line: usize,
418    /// Start column (0-indexed) in the line
419    pub start_col: usize,
420    /// End column (0-indexed) in the line
421    pub end_col: usize,
422    /// Byte offset in document
423    pub byte_offset: usize,
424    /// End byte offset in document
425    pub byte_end: usize,
426    /// Tag name (e.g., "div", "img", "br")
427    pub tag_name: String,
428    /// Whether it's a closing tag (`</tag>`)
429    pub is_closing: bool,
430    /// Whether it's self-closing (`<tag />`)
431    pub is_self_closing: bool,
432    /// Raw tag content
433    pub raw_content: String,
434}
435
436/// Pre-parsed emphasis span information
437#[derive(Debug, Clone)]
438pub struct EmphasisSpan {
439    /// Line number (1-indexed)
440    pub line: usize,
441    /// Start column (0-indexed) in the line
442    pub start_col: usize,
443    /// End column (0-indexed) in the line
444    pub end_col: usize,
445    /// Byte offset in document
446    pub byte_offset: usize,
447    /// End byte offset in document
448    pub byte_end: usize,
449    /// Type of emphasis ('*' or '_')
450    pub marker: char,
451    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
452    pub marker_count: usize,
453    /// Content inside the emphasis
454    pub content: String,
455}
456
457/// Pre-parsed table row information
458#[derive(Debug, Clone)]
459pub struct TableRow {
460    /// Line number (1-indexed)
461    pub line: usize,
462    /// Whether this is a separator row (contains only |, -, :, and spaces)
463    pub is_separator: bool,
464    /// Number of columns (pipe-separated cells)
465    pub column_count: usize,
466    /// Alignment info from separator row
467    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
468}
469
470/// Pre-parsed bare URL information (not in links)
471#[derive(Debug, Clone)]
472pub struct BareUrl {
473    /// Line number (1-indexed)
474    pub line: usize,
475    /// Start column (0-indexed) in the line
476    pub start_col: usize,
477    /// End column (0-indexed) in the line
478    pub end_col: usize,
479    /// Byte offset in document
480    pub byte_offset: usize,
481    /// End byte offset in document
482    pub byte_end: usize,
483    /// The URL string
484    pub url: String,
485    /// Type of URL ("http", "https", "ftp", "email")
486    pub url_type: String,
487}
488
489pub struct LintContext<'a> {
490    pub content: &'a str,
491    pub line_offsets: Vec<usize>,
492    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
493    pub lines: Vec<LineInfo>,             // Pre-computed line information
494    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
495    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
496    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
497    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
498    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
499    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
500    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
501    pub char_frequency: CharFrequency,    // Character frequency analysis
502    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
503    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
504    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
505    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
506    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
507    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
508    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
509    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
510    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
511    pub flavor: MarkdownFlavor,           // Markdown flavor being used
512    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
513}
514
515/// Detailed blockquote parse result with all components
516struct BlockquoteComponents<'a> {
517    indent: &'a str,
518    markers: &'a str,
519    spaces_after: &'a str,
520    content: &'a str,
521}
522
523/// Parse blockquote prefix with detailed components using manual parsing
524#[inline]
525fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
526    let bytes = line.as_bytes();
527    let mut pos = 0;
528
529    // Parse leading whitespace (indent)
530    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
531        pos += 1;
532    }
533    let indent_end = pos;
534
535    // Must have at least one '>' marker
536    if pos >= bytes.len() || bytes[pos] != b'>' {
537        return None;
538    }
539
540    // Parse '>' markers
541    while pos < bytes.len() && bytes[pos] == b'>' {
542        pos += 1;
543    }
544    let markers_end = pos;
545
546    // Parse spaces after markers
547    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
548        pos += 1;
549    }
550    let spaces_end = pos;
551
552    Some(BlockquoteComponents {
553        indent: &line[0..indent_end],
554        markers: &line[indent_end..markers_end],
555        spaces_after: &line[markers_end..spaces_end],
556        content: &line[spaces_end..],
557    })
558}
559
560impl<'a> LintContext<'a> {
561    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
562        #[cfg(not(target_arch = "wasm32"))]
563        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
564        #[cfg(target_arch = "wasm32")]
565        let profile = false;
566
567        let line_offsets = profile_section!("Line offsets", profile, {
568            let mut offsets = vec![0];
569            for (i, c) in content.char_indices() {
570                if c == '\n' {
571                    offsets.push(i + 1);
572                }
573            }
574            offsets
575        });
576
577        // Detect code blocks once and cache them
578        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
579
580        // Pre-compute HTML comment ranges ONCE for all operations
581        let html_comment_ranges = profile_section!(
582            "HTML comment ranges",
583            profile,
584            crate::utils::skip_context::compute_html_comment_ranges(content)
585        );
586
587        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
588        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
589            if flavor == MarkdownFlavor::MkDocs {
590                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
591            } else {
592                Vec::new()
593            }
594        });
595
596        // Pre-compute line information (without headings/blockquotes yet)
597        let mut lines = profile_section!(
598            "Basic line info",
599            profile,
600            Self::compute_basic_line_info(
601                content,
602                &line_offsets,
603                &code_blocks,
604                flavor,
605                &html_comment_ranges,
606                &autodoc_ranges,
607            )
608        );
609
610        // Detect HTML blocks BEFORE heading detection
611        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
612
613        // Detect ESM import/export blocks in MDX files BEFORE heading detection
614        profile_section!(
615            "ESM blocks",
616            profile,
617            Self::detect_esm_blocks(content, &mut lines, flavor)
618        );
619
620        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
621        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
622
623        // Now detect headings and blockquotes
624        profile_section!(
625            "Headings & blockquotes",
626            profile,
627            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
628        );
629
630        // Parse code spans early so we can exclude them from link/image parsing
631        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
632
633        // Mark lines that are continuations of multi-line code spans
634        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
635        for span in &code_spans {
636            if span.end_line > span.line {
637                // Mark lines after the first line as continuations
638                for line_num in (span.line + 1)..=span.end_line {
639                    if let Some(line_info) = lines.get_mut(line_num - 1) {
640                        line_info.in_code_span_continuation = true;
641                    }
642                }
643            }
644        }
645
646        // Parse links, images, references, and list blocks
647        let (links, broken_links, footnote_refs) = profile_section!(
648            "Links",
649            profile,
650            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
651        );
652
653        let images = profile_section!(
654            "Images",
655            profile,
656            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
657        );
658
659        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
660
661        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
662
663        // Compute character frequency for fast content analysis
664        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
665
666        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
667        let table_blocks = profile_section!(
668            "Table blocks",
669            profile,
670            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
671                content,
672                &code_blocks,
673                &code_spans,
674                &html_comment_ranges,
675            )
676        );
677
678        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
679        let line_index = profile_section!(
680            "Line index",
681            profile,
682            crate::utils::range_utils::LineIndex::new(content)
683        );
684
685        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
686        let jinja_ranges = profile_section!(
687            "Jinja ranges",
688            profile,
689            crate::utils::jinja_utils::find_jinja_ranges(content)
690        );
691
692        Self {
693            content,
694            line_offsets,
695            code_blocks,
696            lines,
697            links,
698            images,
699            broken_links,
700            footnote_refs,
701            reference_defs,
702            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
703            list_blocks,
704            char_frequency,
705            html_tags_cache: OnceLock::new(),
706            emphasis_spans_cache: OnceLock::new(),
707            table_rows_cache: OnceLock::new(),
708            bare_urls_cache: OnceLock::new(),
709            has_mixed_list_nesting_cache: OnceLock::new(),
710            html_comment_ranges,
711            table_blocks,
712            line_index,
713            jinja_ranges,
714            flavor,
715            source_file,
716        }
717    }
718
719    /// Get code spans - computed lazily on first access
720    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
721        Arc::clone(
722            self.code_spans_cache
723                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
724        )
725    }
726
727    /// Get HTML comment ranges - pre-computed during LintContext construction
728    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
729        &self.html_comment_ranges
730    }
731
732    /// Get HTML tags - computed lazily on first access
733    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
734        Arc::clone(self.html_tags_cache.get_or_init(|| {
735            Arc::new(Self::parse_html_tags(
736                self.content,
737                &self.lines,
738                &self.code_blocks,
739                self.flavor,
740            ))
741        }))
742    }
743
744    /// Get emphasis spans - computed lazily on first access
745    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
746        Arc::clone(
747            self.emphasis_spans_cache
748                .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
749        )
750    }
751
752    /// Get table rows - computed lazily on first access
753    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
754        Arc::clone(
755            self.table_rows_cache
756                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
757        )
758    }
759
760    /// Get bare URLs - computed lazily on first access
761    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
762        Arc::clone(
763            self.bare_urls_cache
764                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
765        )
766    }
767
768    /// Check if document has mixed ordered/unordered list nesting.
769    /// Result is cached after first computation (document-level invariant).
770    /// This is used by MD007 for smart style auto-detection.
771    pub fn has_mixed_list_nesting(&self) -> bool {
772        *self
773            .has_mixed_list_nesting_cache
774            .get_or_init(|| self.compute_mixed_list_nesting())
775    }
776
777    /// Internal computation for mixed list nesting (only called once per LintContext).
778    fn compute_mixed_list_nesting(&self) -> bool {
779        // Track parent list items by their marker position and type
780        // Using marker_column instead of indent because it works correctly
781        // for blockquoted content where indent doesn't account for the prefix
782        // Stack stores: (marker_column, is_ordered)
783        let mut stack: Vec<(usize, bool)> = Vec::new();
784        let mut last_was_blank = false;
785
786        for line_info in &self.lines {
787            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
788            if line_info.in_code_block
789                || line_info.in_front_matter
790                || line_info.in_mkdocstrings
791                || line_info.in_html_comment
792                || line_info.in_esm_block
793            {
794                continue;
795            }
796
797            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
798            if line_info.is_blank {
799                last_was_blank = true;
800                continue;
801            }
802
803            if let Some(list_item) = &line_info.list_item {
804                // Normalize column 1 to column 0 (consistent with MD007 check function)
805                let current_pos = if list_item.marker_column == 1 {
806                    0
807                } else {
808                    list_item.marker_column
809                };
810
811                // If there was a blank line and this item is at root level, reset stack
812                if last_was_blank && current_pos == 0 {
813                    stack.clear();
814                }
815                last_was_blank = false;
816
817                // Pop items at same or greater position (they're siblings or deeper, not parents)
818                while let Some(&(pos, _)) = stack.last() {
819                    if pos >= current_pos {
820                        stack.pop();
821                    } else {
822                        break;
823                    }
824                }
825
826                // Check if immediate parent has different type - this is mixed nesting
827                if let Some(&(_, parent_is_ordered)) = stack.last()
828                    && parent_is_ordered != list_item.is_ordered
829                {
830                    return true; // Found mixed nesting - early exit
831                }
832
833                stack.push((current_pos, list_item.is_ordered));
834            } else {
835                // Non-list line (but not blank) - could be paragraph or other content
836                last_was_blank = false;
837            }
838        }
839
840        false
841    }
842
843    /// Map a byte offset to (line, column)
844    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
845        match self.line_offsets.binary_search(&offset) {
846            Ok(line) => (line + 1, 1),
847            Err(line) => {
848                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
849                (line, offset - line_start + 1)
850            }
851        }
852    }
853
854    /// Check if a position is within a code block or code span
855    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
856        // Check code blocks first
857        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
858            return true;
859        }
860
861        // Check inline code spans (lazy load if needed)
862        self.code_spans()
863            .iter()
864            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
865    }
866
867    /// Get line information by line number (1-indexed)
868    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
869        if line_num > 0 {
870            self.lines.get(line_num - 1)
871        } else {
872            None
873        }
874    }
875
876    /// Get byte offset for a line number (1-indexed)
877    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
878        self.line_info(line_num).map(|info| info.byte_offset)
879    }
880
881    /// Get URL for a reference link/image by its ID
882    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
883        let normalized_id = ref_id.to_lowercase();
884        self.reference_defs
885            .iter()
886            .find(|def| def.id == normalized_id)
887            .map(|def| def.url.as_str())
888    }
889
890    /// Check if a line is part of a list block
891    pub fn is_in_list_block(&self, line_num: usize) -> bool {
892        self.list_blocks
893            .iter()
894            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
895    }
896
897    /// Get the list block containing a specific line
898    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
899        self.list_blocks
900            .iter()
901            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
902    }
903
904    // Compatibility methods for DocumentStructure migration
905
906    /// Check if a line is within a code block
907    pub fn is_in_code_block(&self, line_num: usize) -> bool {
908        if line_num == 0 || line_num > self.lines.len() {
909            return false;
910        }
911        self.lines[line_num - 1].in_code_block
912    }
913
914    /// Check if a line is within front matter
915    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
916        if line_num == 0 || line_num > self.lines.len() {
917            return false;
918        }
919        self.lines[line_num - 1].in_front_matter
920    }
921
922    /// Check if a line is within an HTML block
923    pub fn is_in_html_block(&self, line_num: usize) -> bool {
924        if line_num == 0 || line_num > self.lines.len() {
925            return false;
926        }
927        self.lines[line_num - 1].in_html_block
928    }
929
930    /// Check if a line and column is within a code span
931    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
932        if line_num == 0 || line_num > self.lines.len() {
933            return false;
934        }
935
936        // Use the code spans cache to check
937        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
938        // Convert col to 0-indexed for comparison
939        let col_0indexed = if col > 0 { col - 1 } else { 0 };
940        let code_spans = self.code_spans();
941        code_spans.iter().any(|span| {
942            // Check if line is within the span's line range
943            if line_num < span.line || line_num > span.end_line {
944                return false;
945            }
946
947            if span.line == span.end_line {
948                // Single-line span: check column bounds
949                col_0indexed >= span.start_col && col_0indexed < span.end_col
950            } else if line_num == span.line {
951                // First line of multi-line span: anything after start_col is in span
952                col_0indexed >= span.start_col
953            } else if line_num == span.end_line {
954                // Last line of multi-line span: anything before end_col is in span
955                col_0indexed < span.end_col
956            } else {
957                // Middle line of multi-line span: entire line is in span
958                true
959            }
960        })
961    }
962
963    /// Check if a byte offset is within a code span
964    #[inline]
965    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
966        let code_spans = self.code_spans();
967        code_spans
968            .iter()
969            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
970    }
971
972    /// Check if a byte position is within a reference definition
973    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
974    #[inline]
975    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
976        self.reference_defs
977            .iter()
978            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
979    }
980
981    /// Check if a byte position is within an HTML comment
982    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
983    /// where k is the number of HTML comments (typically very small)
984    #[inline]
985    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
986        self.html_comment_ranges
987            .iter()
988            .any(|range| byte_pos >= range.start && byte_pos < range.end)
989    }
990
991    /// Check if a byte position is within an HTML tag (including multiline tags)
992    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
993    #[inline]
994    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
995        self.html_tags()
996            .iter()
997            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
998    }
999
1000    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1001    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1002        self.jinja_ranges
1003            .iter()
1004            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1005    }
1006
1007    /// Check if a byte position is within a link reference definition title
1008    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1009        self.reference_defs.iter().any(|def| {
1010            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1011                byte_pos >= start && byte_pos < end
1012            } else {
1013                false
1014            }
1015        })
1016    }
1017
1018    /// Check if content has any instances of a specific character (fast)
1019    pub fn has_char(&self, ch: char) -> bool {
1020        match ch {
1021            '#' => self.char_frequency.hash_count > 0,
1022            '*' => self.char_frequency.asterisk_count > 0,
1023            '_' => self.char_frequency.underscore_count > 0,
1024            '-' => self.char_frequency.hyphen_count > 0,
1025            '+' => self.char_frequency.plus_count > 0,
1026            '>' => self.char_frequency.gt_count > 0,
1027            '|' => self.char_frequency.pipe_count > 0,
1028            '[' => self.char_frequency.bracket_count > 0,
1029            '`' => self.char_frequency.backtick_count > 0,
1030            '<' => self.char_frequency.lt_count > 0,
1031            '!' => self.char_frequency.exclamation_count > 0,
1032            '\n' => self.char_frequency.newline_count > 0,
1033            _ => self.content.contains(ch), // Fallback for other characters
1034        }
1035    }
1036
1037    /// Get count of a specific character (fast)
1038    pub fn char_count(&self, ch: char) -> usize {
1039        match ch {
1040            '#' => self.char_frequency.hash_count,
1041            '*' => self.char_frequency.asterisk_count,
1042            '_' => self.char_frequency.underscore_count,
1043            '-' => self.char_frequency.hyphen_count,
1044            '+' => self.char_frequency.plus_count,
1045            '>' => self.char_frequency.gt_count,
1046            '|' => self.char_frequency.pipe_count,
1047            '[' => self.char_frequency.bracket_count,
1048            '`' => self.char_frequency.backtick_count,
1049            '<' => self.char_frequency.lt_count,
1050            '!' => self.char_frequency.exclamation_count,
1051            '\n' => self.char_frequency.newline_count,
1052            _ => self.content.matches(ch).count(), // Fallback for other characters
1053        }
1054    }
1055
1056    /// Check if content likely contains headings (fast)
1057    pub fn likely_has_headings(&self) -> bool {
1058        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1059    }
1060
1061    /// Check if content likely contains lists (fast)
1062    pub fn likely_has_lists(&self) -> bool {
1063        self.char_frequency.asterisk_count > 0
1064            || self.char_frequency.hyphen_count > 0
1065            || self.char_frequency.plus_count > 0
1066    }
1067
1068    /// Check if content likely contains emphasis (fast)
1069    pub fn likely_has_emphasis(&self) -> bool {
1070        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1071    }
1072
1073    /// Check if content likely contains tables (fast)
1074    pub fn likely_has_tables(&self) -> bool {
1075        self.char_frequency.pipe_count > 2
1076    }
1077
1078    /// Check if content likely contains blockquotes (fast)
1079    pub fn likely_has_blockquotes(&self) -> bool {
1080        self.char_frequency.gt_count > 0
1081    }
1082
1083    /// Check if content likely contains code (fast)
1084    pub fn likely_has_code(&self) -> bool {
1085        self.char_frequency.backtick_count > 0
1086    }
1087
1088    /// Check if content likely contains links or images (fast)
1089    pub fn likely_has_links_or_images(&self) -> bool {
1090        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1091    }
1092
1093    /// Check if content likely contains HTML (fast)
1094    pub fn likely_has_html(&self) -> bool {
1095        self.char_frequency.lt_count > 0
1096    }
1097
1098    /// Get HTML tags on a specific line
1099    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1100        self.html_tags()
1101            .iter()
1102            .filter(|tag| tag.line == line_num)
1103            .cloned()
1104            .collect()
1105    }
1106
1107    /// Get emphasis spans on a specific line
1108    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1109        self.emphasis_spans()
1110            .iter()
1111            .filter(|span| span.line == line_num)
1112            .cloned()
1113            .collect()
1114    }
1115
1116    /// Get table rows on a specific line
1117    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1118        self.table_rows()
1119            .iter()
1120            .filter(|row| row.line == line_num)
1121            .cloned()
1122            .collect()
1123    }
1124
1125    /// Get bare URLs on a specific line
1126    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1127        self.bare_urls()
1128            .iter()
1129            .filter(|url| url.line == line_num)
1130            .cloned()
1131            .collect()
1132    }
1133
1134    /// Find the line index for a given byte offset using binary search.
1135    /// Returns (line_index, line_number, column) where:
1136    /// - line_index is the 0-based index in the lines array
1137    /// - line_number is the 1-based line number
1138    /// - column is the byte offset within that line
1139    #[inline]
1140    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1141        // Binary search to find the line containing this byte offset
1142        let idx = match lines.binary_search_by(|line| {
1143            if byte_offset < line.byte_offset {
1144                std::cmp::Ordering::Greater
1145            } else if byte_offset > line.byte_offset + line.byte_len {
1146                std::cmp::Ordering::Less
1147            } else {
1148                std::cmp::Ordering::Equal
1149            }
1150        }) {
1151            Ok(idx) => idx,
1152            Err(idx) => idx.saturating_sub(1),
1153        };
1154
1155        let line = &lines[idx];
1156        let line_num = idx + 1;
1157        let col = byte_offset.saturating_sub(line.byte_offset);
1158
1159        (idx, line_num, col)
1160    }
1161
1162    /// Check if a byte offset is within a code span using binary search
1163    #[inline]
1164    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1165        // Since spans are sorted by byte_offset, use partition_point for binary search
1166        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1167
1168        // Check the span that starts at or before our offset
1169        if idx > 0 {
1170            let span = &code_spans[idx - 1];
1171            if offset >= span.byte_offset && offset < span.byte_end {
1172                return true;
1173            }
1174        }
1175
1176        false
1177    }
1178
1179    /// Collect byte ranges of all links using pulldown-cmark
1180    /// This is used to skip heading detection for lines that fall within link syntax
1181    /// (e.g., multiline links like `[text](url\n#fragment)`)
1182    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1183        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1184
1185        let mut link_ranges = Vec::new();
1186        let mut options = Options::empty();
1187        options.insert(Options::ENABLE_WIKILINKS);
1188        options.insert(Options::ENABLE_FOOTNOTES);
1189
1190        let parser = Parser::new_ext(content, options).into_offset_iter();
1191        let mut link_stack: Vec<usize> = Vec::new();
1192
1193        for (event, range) in parser {
1194            match event {
1195                Event::Start(Tag::Link { .. }) => {
1196                    link_stack.push(range.start);
1197                }
1198                Event::End(TagEnd::Link) => {
1199                    if let Some(start_pos) = link_stack.pop() {
1200                        link_ranges.push((start_pos, range.end));
1201                    }
1202                }
1203                _ => {}
1204            }
1205        }
1206
1207        link_ranges
1208    }
1209
1210    /// Parse all links in the content
1211    fn parse_links(
1212        content: &'a str,
1213        lines: &[LineInfo],
1214        code_blocks: &[(usize, usize)],
1215        code_spans: &[CodeSpan],
1216        flavor: MarkdownFlavor,
1217        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1218    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1219        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1220        use std::collections::HashSet;
1221
1222        let mut links = Vec::with_capacity(content.len() / 500);
1223        let mut broken_links = Vec::new();
1224        let mut footnote_refs = Vec::new();
1225
1226        // Track byte positions of links found by pulldown-cmark
1227        let mut found_positions = HashSet::new();
1228
1229        // Use pulldown-cmark's streaming parser with BrokenLink callback
1230        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1231        // This automatically handles:
1232        // - Escaped links (won't generate events)
1233        // - Links in code blocks/spans (won't generate Link events)
1234        // - Images (generates Tag::Image instead)
1235        // - Reference resolution (dest_url is already resolved!)
1236        // - Broken references (callback is invoked)
1237        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1238        let mut options = Options::empty();
1239        options.insert(Options::ENABLE_WIKILINKS);
1240        options.insert(Options::ENABLE_FOOTNOTES);
1241
1242        let parser = Parser::new_with_broken_link_callback(
1243            content,
1244            options,
1245            Some(|link: BrokenLink<'_>| {
1246                broken_links.push(BrokenLinkInfo {
1247                    reference: link.reference.to_string(),
1248                    span: link.span.clone(),
1249                });
1250                None
1251            }),
1252        )
1253        .into_offset_iter();
1254
1255        let mut link_stack: Vec<(
1256            usize,
1257            usize,
1258            pulldown_cmark::CowStr<'a>,
1259            LinkType,
1260            pulldown_cmark::CowStr<'a>,
1261        )> = Vec::new();
1262        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1263
1264        for (event, range) in parser {
1265            match event {
1266                Event::Start(Tag::Link {
1267                    link_type,
1268                    dest_url,
1269                    id,
1270                    ..
1271                }) => {
1272                    // Link start - record position, URL, and reference ID
1273                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1274                    text_chunks.clear();
1275                }
1276                Event::Text(text) if !link_stack.is_empty() => {
1277                    // Track text content with its byte range
1278                    text_chunks.push((text.to_string(), range.start, range.end));
1279                }
1280                Event::Code(code) if !link_stack.is_empty() => {
1281                    // Include inline code in link text (with backticks)
1282                    let code_text = format!("`{code}`");
1283                    text_chunks.push((code_text, range.start, range.end));
1284                }
1285                Event::End(TagEnd::Link) => {
1286                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1287                        // Skip if in HTML comment
1288                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1289                            text_chunks.clear();
1290                            continue;
1291                        }
1292
1293                        // Find line and column information
1294                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1295
1296                        // Skip if this link is on a MkDocs snippet line
1297                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1298                            text_chunks.clear();
1299                            continue;
1300                        }
1301
1302                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1303
1304                        let is_reference = matches!(
1305                            link_type,
1306                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1307                        );
1308
1309                        // Extract link text directly from source bytes to preserve escaping
1310                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1311                        let link_text = if start_pos < content.len() {
1312                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1313
1314                            // Find MATCHING ] by tracking bracket depth for nested brackets
1315                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1316                            // Brackets inside code spans (between backticks) should be ignored
1317                            let mut close_pos = None;
1318                            let mut depth = 0;
1319                            let mut in_code_span = false;
1320
1321                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1322                                // Count preceding backslashes
1323                                let mut backslash_count = 0;
1324                                let mut j = i;
1325                                while j > 0 && link_bytes[j - 1] == b'\\' {
1326                                    backslash_count += 1;
1327                                    j -= 1;
1328                                }
1329                                let is_escaped = backslash_count % 2 != 0;
1330
1331                                // Track code spans - backticks toggle in/out of code
1332                                if byte == b'`' && !is_escaped {
1333                                    in_code_span = !in_code_span;
1334                                }
1335
1336                                // Only count brackets when NOT in a code span
1337                                if !is_escaped && !in_code_span {
1338                                    if byte == b'[' {
1339                                        depth += 1;
1340                                    } else if byte == b']' {
1341                                        if depth == 0 {
1342                                            // Found the matching closing bracket
1343                                            close_pos = Some(i);
1344                                            break;
1345                                        } else {
1346                                            depth -= 1;
1347                                        }
1348                                    }
1349                                }
1350                            }
1351
1352                            if let Some(pos) = close_pos {
1353                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1354                            } else {
1355                                Cow::Borrowed("")
1356                            }
1357                        } else {
1358                            Cow::Borrowed("")
1359                        };
1360
1361                        // For reference links, use the actual reference ID from pulldown-cmark
1362                        let reference_id = if is_reference && !ref_id.is_empty() {
1363                            Some(Cow::Owned(ref_id.to_lowercase()))
1364                        } else if is_reference {
1365                            // For collapsed/shortcut references without explicit ID, use the link text
1366                            Some(Cow::Owned(link_text.to_lowercase()))
1367                        } else {
1368                            None
1369                        };
1370
1371                        // Track this position as found
1372                        found_positions.insert(start_pos);
1373
1374                        links.push(ParsedLink {
1375                            line: line_num,
1376                            start_col: col_start,
1377                            end_col: col_end,
1378                            byte_offset: start_pos,
1379                            byte_end: range.end,
1380                            text: link_text,
1381                            url: Cow::Owned(url.to_string()),
1382                            is_reference,
1383                            reference_id,
1384                            link_type,
1385                        });
1386
1387                        text_chunks.clear();
1388                    }
1389                }
1390                Event::FootnoteReference(footnote_id) => {
1391                    // Capture footnote references like [^1], [^note]
1392                    // Skip if in HTML comment
1393                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1394                        continue;
1395                    }
1396
1397                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1398                    footnote_refs.push(FootnoteRef {
1399                        id: footnote_id.to_string(),
1400                        line: line_num,
1401                        byte_offset: range.start,
1402                        byte_end: range.end,
1403                    });
1404                }
1405                _ => {}
1406            }
1407        }
1408
1409        // Also find undefined references using regex
1410        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1411        // because the reference is undefined
1412        for cap in LINK_PATTERN.captures_iter(content) {
1413            let full_match = cap.get(0).unwrap();
1414            let match_start = full_match.start();
1415            let match_end = full_match.end();
1416
1417            // Skip if this was already found by pulldown-cmark (it's a valid link)
1418            if found_positions.contains(&match_start) {
1419                continue;
1420            }
1421
1422            // Skip if escaped
1423            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1424                continue;
1425            }
1426
1427            // Skip if it's an image
1428            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1429                continue;
1430            }
1431
1432            // Skip if in code block
1433            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1434                continue;
1435            }
1436
1437            // Skip if in code span
1438            if Self::is_offset_in_code_span(code_spans, match_start) {
1439                continue;
1440            }
1441
1442            // Skip if in HTML comment
1443            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1444                continue;
1445            }
1446
1447            // Find line and column information
1448            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1449
1450            // Skip if this link is on a MkDocs snippet line
1451            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1452                continue;
1453            }
1454
1455            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1456
1457            let text = cap.get(1).map_or("", |m| m.as_str());
1458
1459            // Only process reference links (group 6)
1460            if let Some(ref_id) = cap.get(6) {
1461                let ref_id_str = ref_id.as_str();
1462                let normalized_ref = if ref_id_str.is_empty() {
1463                    Cow::Owned(text.to_lowercase()) // Implicit reference
1464                } else {
1465                    Cow::Owned(ref_id_str.to_lowercase())
1466                };
1467
1468                // This is an undefined reference (pulldown-cmark didn't parse it)
1469                links.push(ParsedLink {
1470                    line: line_num,
1471                    start_col: col_start,
1472                    end_col: col_end,
1473                    byte_offset: match_start,
1474                    byte_end: match_end,
1475                    text: Cow::Borrowed(text),
1476                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1477                    is_reference: true,
1478                    reference_id: Some(normalized_ref),
1479                    link_type: LinkType::Reference, // Undefined references are reference-style
1480                });
1481            }
1482        }
1483
1484        (links, broken_links, footnote_refs)
1485    }
1486
1487    /// Parse all images in the content
1488    fn parse_images(
1489        content: &'a str,
1490        lines: &[LineInfo],
1491        code_blocks: &[(usize, usize)],
1492        code_spans: &[CodeSpan],
1493        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1494    ) -> Vec<ParsedImage<'a>> {
1495        use crate::utils::skip_context::is_in_html_comment_ranges;
1496        use std::collections::HashSet;
1497
1498        // Pre-size based on a heuristic: images are less common than links
1499        let mut images = Vec::with_capacity(content.len() / 1000);
1500        let mut found_positions = HashSet::new();
1501
1502        // Use pulldown-cmark for parsing - more accurate and faster
1503        let parser = Parser::new(content).into_offset_iter();
1504        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1505            Vec::new();
1506        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1507
1508        for (event, range) in parser {
1509            match event {
1510                Event::Start(Tag::Image {
1511                    link_type,
1512                    dest_url,
1513                    id,
1514                    ..
1515                }) => {
1516                    image_stack.push((range.start, dest_url, link_type, id));
1517                    text_chunks.clear();
1518                }
1519                Event::Text(text) if !image_stack.is_empty() => {
1520                    text_chunks.push((text.to_string(), range.start, range.end));
1521                }
1522                Event::Code(code) if !image_stack.is_empty() => {
1523                    let code_text = format!("`{code}`");
1524                    text_chunks.push((code_text, range.start, range.end));
1525                }
1526                Event::End(TagEnd::Image) => {
1527                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1528                        // Skip if in code block
1529                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1530                            continue;
1531                        }
1532
1533                        // Skip if in code span
1534                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1535                            continue;
1536                        }
1537
1538                        // Skip if in HTML comment
1539                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1540                            continue;
1541                        }
1542
1543                        // Find line and column using binary search
1544                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1545                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1546
1547                        let is_reference = matches!(
1548                            link_type,
1549                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1550                        );
1551
1552                        // Extract alt text directly from source bytes to preserve escaping
1553                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1554                        let alt_text = if start_pos < content.len() {
1555                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1556
1557                            // Find MATCHING ] by tracking bracket depth for nested brackets
1558                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1559                            let mut close_pos = None;
1560                            let mut depth = 0;
1561
1562                            if image_bytes.len() > 2 {
1563                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1564                                    // Count preceding backslashes
1565                                    let mut backslash_count = 0;
1566                                    let mut j = i;
1567                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1568                                        backslash_count += 1;
1569                                        j -= 1;
1570                                    }
1571                                    let is_escaped = backslash_count % 2 != 0;
1572
1573                                    if !is_escaped {
1574                                        if byte == b'[' {
1575                                            depth += 1;
1576                                        } else if byte == b']' {
1577                                            if depth == 0 {
1578                                                // Found the matching closing bracket
1579                                                close_pos = Some(i);
1580                                                break;
1581                                            } else {
1582                                                depth -= 1;
1583                                            }
1584                                        }
1585                                    }
1586                                }
1587                            }
1588
1589                            if let Some(pos) = close_pos {
1590                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1591                            } else {
1592                                Cow::Borrowed("")
1593                            }
1594                        } else {
1595                            Cow::Borrowed("")
1596                        };
1597
1598                        let reference_id = if is_reference && !ref_id.is_empty() {
1599                            Some(Cow::Owned(ref_id.to_lowercase()))
1600                        } else if is_reference {
1601                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1602                        } else {
1603                            None
1604                        };
1605
1606                        found_positions.insert(start_pos);
1607                        images.push(ParsedImage {
1608                            line: line_num,
1609                            start_col: col_start,
1610                            end_col: col_end,
1611                            byte_offset: start_pos,
1612                            byte_end: range.end,
1613                            alt_text,
1614                            url: Cow::Owned(url.to_string()),
1615                            is_reference,
1616                            reference_id,
1617                            link_type,
1618                        });
1619                    }
1620                }
1621                _ => {}
1622            }
1623        }
1624
1625        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1626        for cap in IMAGE_PATTERN.captures_iter(content) {
1627            let full_match = cap.get(0).unwrap();
1628            let match_start = full_match.start();
1629            let match_end = full_match.end();
1630
1631            // Skip if already found by pulldown-cmark
1632            if found_positions.contains(&match_start) {
1633                continue;
1634            }
1635
1636            // Skip if the ! is escaped
1637            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1638                continue;
1639            }
1640
1641            // Skip if in code block, code span, or HTML comment
1642            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1643                || Self::is_offset_in_code_span(code_spans, match_start)
1644                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1645            {
1646                continue;
1647            }
1648
1649            // Only process reference images (undefined references not found by pulldown-cmark)
1650            if let Some(ref_id) = cap.get(6) {
1651                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1652                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1653                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1654                let ref_id_str = ref_id.as_str();
1655                let normalized_ref = if ref_id_str.is_empty() {
1656                    Cow::Owned(alt_text.to_lowercase())
1657                } else {
1658                    Cow::Owned(ref_id_str.to_lowercase())
1659                };
1660
1661                images.push(ParsedImage {
1662                    line: line_num,
1663                    start_col: col_start,
1664                    end_col: col_end,
1665                    byte_offset: match_start,
1666                    byte_end: match_end,
1667                    alt_text: Cow::Borrowed(alt_text),
1668                    url: Cow::Borrowed(""),
1669                    is_reference: true,
1670                    reference_id: Some(normalized_ref),
1671                    link_type: LinkType::Reference, // Undefined references are reference-style
1672                });
1673            }
1674        }
1675
1676        images
1677    }
1678
1679    /// Parse reference definitions
1680    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1681        // Pre-size based on lines count as reference definitions are line-based
1682        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1683
1684        for (line_idx, line_info) in lines.iter().enumerate() {
1685            // Skip lines in code blocks
1686            if line_info.in_code_block {
1687                continue;
1688            }
1689
1690            let line = line_info.content(content);
1691            let line_num = line_idx + 1;
1692
1693            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1694                let id = cap.get(1).unwrap().as_str().to_lowercase();
1695                let url = cap.get(2).unwrap().as_str().to_string();
1696                let title_match = cap.get(3).or_else(|| cap.get(4));
1697                let title = title_match.map(|m| m.as_str().to_string());
1698
1699                // Calculate byte positions
1700                // The match starts at the beginning of the line (0) and extends to the end
1701                let match_obj = cap.get(0).unwrap();
1702                let byte_offset = line_info.byte_offset + match_obj.start();
1703                let byte_end = line_info.byte_offset + match_obj.end();
1704
1705                // Calculate title byte positions (includes the quote character before content)
1706                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1707                    // The match is the content inside quotes, so we include the quote before
1708                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1709                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1710                    (Some(start), Some(end))
1711                } else {
1712                    (None, None)
1713                };
1714
1715                refs.push(ReferenceDef {
1716                    line: line_num,
1717                    id,
1718                    url,
1719                    title,
1720                    byte_offset,
1721                    byte_end,
1722                    title_byte_start,
1723                    title_byte_end,
1724                });
1725            }
1726        }
1727
1728        refs
1729    }
1730
1731    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1732    /// Handles nested blockquotes like `> > > content`
1733    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1734    #[inline]
1735    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1736        let trimmed_start = line.trim_start();
1737        if !trimmed_start.starts_with('>') {
1738            return None;
1739        }
1740
1741        // Track total prefix length to handle nested blockquotes
1742        let mut remaining = line;
1743        let mut total_prefix_len = 0;
1744
1745        loop {
1746            let trimmed = remaining.trim_start();
1747            if !trimmed.starts_with('>') {
1748                break;
1749            }
1750
1751            // Add leading whitespace + '>' to prefix
1752            let leading_ws_len = remaining.len() - trimmed.len();
1753            total_prefix_len += leading_ws_len + 1;
1754
1755            let after_gt = &trimmed[1..];
1756
1757            // Handle optional whitespace after '>' (space or tab)
1758            if let Some(stripped) = after_gt.strip_prefix(' ') {
1759                total_prefix_len += 1;
1760                remaining = stripped;
1761            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1762                total_prefix_len += 1;
1763                remaining = stripped;
1764            } else {
1765                remaining = after_gt;
1766            }
1767        }
1768
1769        Some((&line[..total_prefix_len], remaining))
1770    }
1771
1772    /// Fast unordered list parser - replaces regex for 5-10x speedup
1773    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1774    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1775    #[inline]
1776    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1777        let bytes = line.as_bytes();
1778        let mut i = 0;
1779
1780        // Skip leading whitespace
1781        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1782            i += 1;
1783        }
1784
1785        // Check for marker
1786        if i >= bytes.len() {
1787            return None;
1788        }
1789        let marker = bytes[i] as char;
1790        if marker != '-' && marker != '*' && marker != '+' {
1791            return None;
1792        }
1793        let marker_pos = i;
1794        i += 1;
1795
1796        // Collect spacing after marker (space or tab only)
1797        let spacing_start = i;
1798        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1799            i += 1;
1800        }
1801
1802        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1803    }
1804
1805    /// Fast ordered list parser - replaces regex for 5-10x speedup
1806    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
1807    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
1808    #[inline]
1809    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1810        let bytes = line.as_bytes();
1811        let mut i = 0;
1812
1813        // Skip leading whitespace
1814        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1815            i += 1;
1816        }
1817
1818        // Collect digits
1819        let number_start = i;
1820        while i < bytes.len() && bytes[i].is_ascii_digit() {
1821            i += 1;
1822        }
1823        if i == number_start {
1824            return None; // No digits found
1825        }
1826
1827        // Check for delimiter
1828        if i >= bytes.len() {
1829            return None;
1830        }
1831        let delimiter = bytes[i] as char;
1832        if delimiter != '.' && delimiter != ')' {
1833            return None;
1834        }
1835        let delimiter_pos = i;
1836        i += 1;
1837
1838        // Collect spacing after delimiter (space or tab only)
1839        let spacing_start = i;
1840        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1841            i += 1;
1842        }
1843
1844        Some((
1845            &line[..number_start],
1846            &line[number_start..delimiter_pos],
1847            delimiter,
1848            &line[spacing_start..i],
1849            &line[i..],
1850        ))
1851    }
1852
1853    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
1854    /// Returns a Vec<bool> where index i indicates if line i is in a code block
1855    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1856        let num_lines = line_offsets.len();
1857        let mut in_code_block = vec![false; num_lines];
1858
1859        // For each code block, mark all lines within it
1860        for &(start, end) in code_blocks {
1861            // Ensure we're at valid UTF-8 boundaries
1862            let safe_start = if start > 0 && !content.is_char_boundary(start) {
1863                let mut boundary = start;
1864                while boundary > 0 && !content.is_char_boundary(boundary) {
1865                    boundary -= 1;
1866                }
1867                boundary
1868            } else {
1869                start
1870            };
1871
1872            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1873                let mut boundary = end;
1874                while boundary < content.len() && !content.is_char_boundary(boundary) {
1875                    boundary += 1;
1876                }
1877                boundary
1878            } else {
1879                end.min(content.len())
1880            };
1881
1882            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
1883            // That function now has proper list context awareness (see code_block_utils.rs)
1884            // and correctly distinguishes between:
1885            // - Fenced code blocks (``` or ~~~)
1886            // - Indented code blocks at document level (4 spaces + blank line before)
1887            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
1888            //
1889            // We no longer need to re-validate here. The original validation logic
1890            // was causing false positives by marking list continuation paragraphs as
1891            // code blocks when they have 4 spaces of indentation.
1892
1893            // Use binary search to find the first and last line indices
1894            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
1895            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
1896            //
1897            // Find the line that CONTAINS safe_start: the line with the largest
1898            // start offset that is <= safe_start. partition_point gives us the
1899            // first line that starts AFTER safe_start, so we subtract 1.
1900            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1901            let first_line = first_line_after.saturating_sub(1);
1902            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1903
1904            // Mark all lines in the range at once
1905            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1906                *flag = true;
1907            }
1908        }
1909
1910        in_code_block
1911    }
1912
1913    /// Pre-compute basic line information (without headings/blockquotes)
1914    fn compute_basic_line_info(
1915        content: &str,
1916        line_offsets: &[usize],
1917        code_blocks: &[(usize, usize)],
1918        flavor: MarkdownFlavor,
1919        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1920        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1921    ) -> Vec<LineInfo> {
1922        let content_lines: Vec<&str> = content.lines().collect();
1923        let mut lines = Vec::with_capacity(content_lines.len());
1924
1925        // Pre-compute which lines are in code blocks
1926        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1927
1928        // Detect front matter boundaries FIRST, before any other parsing
1929        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
1930        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1931
1932        for (i, line) in content_lines.iter().enumerate() {
1933            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1934            let indent = line.len() - line.trim_start().len();
1935
1936            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
1937            let blockquote_parse = Self::parse_blockquote_prefix(line);
1938
1939            // For blank detection, consider blockquote context
1940            let is_blank = if let Some((_, content)) = blockquote_parse {
1941                // In blockquote context, check if content after prefix is blank
1942                content.trim().is_empty()
1943            } else {
1944                line.trim().is_empty()
1945            };
1946
1947            // Use pre-computed map for O(1) lookup instead of O(m) iteration
1948            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1949
1950            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
1951            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1952                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1953            // Check if the ENTIRE line is within an HTML comment (not just the line start)
1954            // This ensures content after `-->` on the same line is not incorrectly skipped
1955            let line_end_offset = byte_offset + line.len();
1956            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1957                html_comment_ranges,
1958                byte_offset,
1959                line_end_offset,
1960            );
1961            let list_item = if !(in_code_block
1962                || is_blank
1963                || in_mkdocstrings
1964                || in_html_comment
1965                || (front_matter_end > 0 && i < front_matter_end))
1966            {
1967                // Strip blockquote prefix if present for list detection (reuse cached result)
1968                let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1969                    (content, prefix.len())
1970                } else {
1971                    (&**line, 0)
1972                };
1973
1974                if let Some((leading_spaces, marker, spacing, _content)) =
1975                    Self::parse_unordered_list(line_for_list_check)
1976                {
1977                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1978                    let content_column = marker_column + 1 + spacing.len();
1979
1980                    // According to CommonMark spec, unordered list items MUST have at least one space
1981                    // after the marker (-, *, or +). Without a space, it's not a list item.
1982                    // This also naturally handles cases like:
1983                    // - *emphasis* (not a list)
1984                    // - **bold** (not a list)
1985                    // - --- (horizontal rule, not a list)
1986                    if spacing.is_empty() {
1987                        None
1988                    } else {
1989                        Some(ListItemInfo {
1990                            marker: marker.to_string(),
1991                            is_ordered: false,
1992                            number: None,
1993                            marker_column,
1994                            content_column,
1995                        })
1996                    }
1997                } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1998                    Self::parse_ordered_list(line_for_list_check)
1999                {
2000                    let marker = format!("{number_str}{delimiter}");
2001                    let marker_column = blockquote_prefix_len + leading_spaces.len();
2002                    let content_column = marker_column + marker.len() + spacing.len();
2003
2004                    // According to CommonMark spec, ordered list items MUST have at least one space
2005                    // after the marker (period or parenthesis). Without a space, it's not a list item.
2006                    if spacing.is_empty() {
2007                        None
2008                    } else {
2009                        Some(ListItemInfo {
2010                            marker,
2011                            is_ordered: true,
2012                            number: number_str.parse().ok(),
2013                            marker_column,
2014                            content_column,
2015                        })
2016                    }
2017                } else {
2018                    None
2019                }
2020            } else {
2021                None
2022            };
2023
2024            // Detect horizontal rules (only outside code blocks and frontmatter)
2025            // Uses CommonMark-compliant check including leading indentation validation
2026            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2027            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2028
2029            lines.push(LineInfo {
2030                byte_offset,
2031                byte_len: line.len(),
2032                indent,
2033                is_blank,
2034                in_code_block,
2035                in_front_matter,
2036                in_html_block: false, // Will be populated after line creation
2037                in_html_comment,
2038                list_item,
2039                heading: None,    // Will be populated in second pass for Setext headings
2040                blockquote: None, // Will be populated after line creation
2041                in_mkdocstrings,
2042                in_esm_block: false, // Will be populated after line creation for MDX files
2043                in_code_span_continuation: false, // Will be populated after code spans are parsed
2044                is_horizontal_rule: is_hr,
2045            });
2046        }
2047
2048        lines
2049    }
2050
2051    /// Detect headings and blockquotes (called after HTML block detection)
2052    fn detect_headings_and_blockquotes(
2053        content: &str,
2054        lines: &mut [LineInfo],
2055        flavor: MarkdownFlavor,
2056        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2057        link_byte_ranges: &[(usize, usize)],
2058    ) {
2059        // Regex for heading detection
2060        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2061            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2062        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2063            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2064
2065        let content_lines: Vec<&str> = content.lines().collect();
2066
2067        // Detect front matter boundaries to skip those lines
2068        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2069
2070        // Detect headings (including Setext which needs look-ahead) and blockquotes
2071        for i in 0..lines.len() {
2072            if lines[i].in_code_block {
2073                continue;
2074            }
2075
2076            // Skip lines in front matter
2077            if front_matter_end > 0 && i < front_matter_end {
2078                continue;
2079            }
2080
2081            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2082            if lines[i].in_html_block {
2083                continue;
2084            }
2085
2086            let line = content_lines[i];
2087
2088            // Check for blockquotes (even on blank lines within blockquotes)
2089            if let Some(bq) = parse_blockquote_detailed(line) {
2090                let nesting_level = bq.markers.len(); // Each '>' is one level
2091                let marker_column = bq.indent.len();
2092
2093                // Build the prefix (indentation + markers + space)
2094                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2095
2096                // Check for various blockquote issues
2097                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2098                // Only flag multiple literal spaces, not tabs
2099                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
2100                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2101
2102                // Check if needs MD028 fix (empty blockquote line without proper spacing)
2103                // MD028 flags empty blockquote lines that don't have a single space after the marker
2104                // Lines like "> " or ">> " are already correct and don't need fixing
2105                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2106
2107                lines[i].blockquote = Some(BlockquoteInfo {
2108                    nesting_level,
2109                    indent: bq.indent.to_string(),
2110                    marker_column,
2111                    prefix,
2112                    content: bq.content.to_string(),
2113                    has_no_space_after_marker: has_no_space,
2114                    has_multiple_spaces_after_marker: has_multiple_spaces,
2115                    needs_md028_fix,
2116                });
2117            }
2118
2119            // Skip heading detection for blank lines
2120            if lines[i].is_blank {
2121                continue;
2122            }
2123
2124            // Check for ATX headings (but skip MkDocs snippet lines)
2125            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2126            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2127                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2128                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2129            } else {
2130                false
2131            };
2132
2133            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2134                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2135                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2136                    continue;
2137                }
2138                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2139                // This prevents false positives where `#fragment` is detected as a heading
2140                let line_offset = lines[i].byte_offset;
2141                if link_byte_ranges
2142                    .iter()
2143                    .any(|&(start, end)| line_offset > start && line_offset < end)
2144                {
2145                    continue;
2146                }
2147                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2148                let hashes = caps.get(2).map_or("", |m| m.as_str());
2149                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2150                let rest = caps.get(4).map_or("", |m| m.as_str());
2151
2152                let level = hashes.len() as u8;
2153                let marker_column = leading_spaces.len();
2154
2155                // Check for closing sequence, but handle custom IDs that might come after
2156                let (text, has_closing, closing_seq) = {
2157                    // First check if there's a custom ID at the end
2158                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2159                        // Check if this looks like a valid custom ID (ends with })
2160                        if rest[id_start..].trim_end().ends_with('}') {
2161                            // Split off the custom ID
2162                            (&rest[..id_start], &rest[id_start..])
2163                        } else {
2164                            (rest, "")
2165                        }
2166                    } else {
2167                        (rest, "")
2168                    };
2169
2170                    // Now look for closing hashes in the part before the custom ID
2171                    let trimmed_rest = rest_without_id.trim_end();
2172                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2173                        // Find the start of the hash sequence by walking backwards
2174                        // Use char_indices to get byte positions at char boundaries
2175                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2176
2177                        // Find which char index corresponds to last_hash_byte_pos
2178                        let last_hash_char_idx = char_positions
2179                            .iter()
2180                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2181
2182                        if let Some(mut char_idx) = last_hash_char_idx {
2183                            // Walk backwards to find start of hash sequence
2184                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2185                                char_idx -= 1;
2186                            }
2187
2188                            // Get the byte position of the start of hashes
2189                            let start_of_hashes = char_positions[char_idx].0;
2190
2191                            // Check if there's at least one space before the closing hashes
2192                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2193
2194                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2195                            let potential_closing = &trimmed_rest[start_of_hashes..];
2196                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2197
2198                            if is_all_hashes && has_space_before {
2199                                // This is a closing sequence
2200                                let closing_hashes = potential_closing.to_string();
2201                                // The text is everything before the closing hashes
2202                                // Don't include the custom ID here - it will be extracted later
2203                                let text_part = if !custom_id_part.is_empty() {
2204                                    // If we have a custom ID, append it back to get the full rest
2205                                    // This allows the extract_header_id function to handle it properly
2206                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2207                                } else {
2208                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2209                                };
2210                                (text_part, true, closing_hashes)
2211                            } else {
2212                                // Not a valid closing sequence, return the full content
2213                                (rest.to_string(), false, String::new())
2214                            }
2215                        } else {
2216                            // Couldn't find char boundary, return the full content
2217                            (rest.to_string(), false, String::new())
2218                        }
2219                    } else {
2220                        // No hashes found, return the full content
2221                        (rest.to_string(), false, String::new())
2222                    }
2223                };
2224
2225                let content_column = marker_column + hashes.len() + spaces_after.len();
2226
2227                // Extract custom header ID if present
2228                let raw_text = text.trim().to_string();
2229                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2230
2231                // If no custom ID was found on the header line, check the next line for standalone attr-list
2232                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2233                    let next_line = content_lines[i + 1];
2234                    if !lines[i + 1].in_code_block
2235                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2236                        && let Some(next_line_id) =
2237                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2238                    {
2239                        custom_id = Some(next_line_id);
2240                    }
2241                }
2242
2243                // ATX heading is "valid" for processing by heading rules if:
2244                // 1. Has space after # (CommonMark compliant): `# Heading`
2245                // 2. Is empty (just hashes): `#`
2246                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2247                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2248                //
2249                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2250                // - `#tag` - single # with lowercase (social hashtag)
2251                // - `#123` - single # with number (GitHub issue ref)
2252                let is_valid = !spaces_after.is_empty()
2253                    || rest.is_empty()
2254                    || level > 1
2255                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2256
2257                lines[i].heading = Some(HeadingInfo {
2258                    level,
2259                    style: HeadingStyle::ATX,
2260                    marker: hashes.to_string(),
2261                    marker_column,
2262                    content_column,
2263                    text: clean_text,
2264                    custom_id,
2265                    raw_text,
2266                    has_closing_sequence: has_closing,
2267                    closing_sequence: closing_seq,
2268                    is_valid,
2269                });
2270            }
2271            // Check for Setext headings (need to look at next line)
2272            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2273                let next_line = content_lines[i + 1];
2274                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2275                    // Skip if next line is front matter delimiter
2276                    if front_matter_end > 0 && i < front_matter_end {
2277                        continue;
2278                    }
2279
2280                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2281                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2282                    {
2283                        continue;
2284                    }
2285
2286                    let underline = next_line.trim();
2287
2288                    let level = if underline.starts_with('=') { 1 } else { 2 };
2289                    let style = if level == 1 {
2290                        HeadingStyle::Setext1
2291                    } else {
2292                        HeadingStyle::Setext2
2293                    };
2294
2295                    // Extract custom header ID if present
2296                    let raw_text = line.trim().to_string();
2297                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2298
2299                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2300                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2301                        let attr_line = content_lines[i + 2];
2302                        if !lines[i + 2].in_code_block
2303                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2304                            && let Some(attr_line_id) =
2305                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2306                        {
2307                            custom_id = Some(attr_line_id);
2308                        }
2309                    }
2310
2311                    lines[i].heading = Some(HeadingInfo {
2312                        level,
2313                        style,
2314                        marker: underline.to_string(),
2315                        marker_column: next_line.len() - next_line.trim_start().len(),
2316                        content_column: lines[i].indent,
2317                        text: clean_text,
2318                        custom_id,
2319                        raw_text,
2320                        has_closing_sequence: false,
2321                        closing_sequence: String::new(),
2322                        is_valid: true, // Setext headings are always valid
2323                    });
2324                }
2325            }
2326        }
2327    }
2328
2329    /// Detect HTML blocks in the content
2330    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2331        // HTML block elements that trigger block context
2332        // Includes HTML5 media, embedded content, and interactive elements
2333        const BLOCK_ELEMENTS: &[&str] = &[
2334            "address",
2335            "article",
2336            "aside",
2337            "audio",
2338            "blockquote",
2339            "canvas",
2340            "details",
2341            "dialog",
2342            "dd",
2343            "div",
2344            "dl",
2345            "dt",
2346            "embed",
2347            "fieldset",
2348            "figcaption",
2349            "figure",
2350            "footer",
2351            "form",
2352            "h1",
2353            "h2",
2354            "h3",
2355            "h4",
2356            "h5",
2357            "h6",
2358            "header",
2359            "hr",
2360            "iframe",
2361            "li",
2362            "main",
2363            "menu",
2364            "nav",
2365            "noscript",
2366            "object",
2367            "ol",
2368            "p",
2369            "picture",
2370            "pre",
2371            "script",
2372            "search",
2373            "section",
2374            "source",
2375            "style",
2376            "summary",
2377            "svg",
2378            "table",
2379            "tbody",
2380            "td",
2381            "template",
2382            "textarea",
2383            "tfoot",
2384            "th",
2385            "thead",
2386            "tr",
2387            "track",
2388            "ul",
2389            "video",
2390        ];
2391
2392        let mut i = 0;
2393        while i < lines.len() {
2394            // Skip if already in code block or front matter
2395            if lines[i].in_code_block || lines[i].in_front_matter {
2396                i += 1;
2397                continue;
2398            }
2399
2400            let trimmed = lines[i].content(content).trim_start();
2401
2402            // Check if line starts with an HTML tag
2403            if trimmed.starts_with('<') && trimmed.len() > 1 {
2404                // Extract tag name safely
2405                let after_bracket = &trimmed[1..];
2406                let is_closing = after_bracket.starts_with('/');
2407                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2408
2409                // Extract tag name (stop at space, >, /, or end of string)
2410                let tag_name = tag_start
2411                    .chars()
2412                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2413                    .collect::<String>()
2414                    .to_lowercase();
2415
2416                // Check if it's a block element
2417                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2418                    // Mark this line as in HTML block
2419                    lines[i].in_html_block = true;
2420
2421                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2422                    // This avoids complex nesting logic that might cause infinite loops
2423                    if !is_closing {
2424                        let closing_tag = format!("</{tag_name}>");
2425                        // style and script tags can contain blank lines (CSS/JS formatting)
2426                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2427                        let mut j = i + 1;
2428                        while j < lines.len() && j < i + 100 {
2429                            // Limit search to 100 lines
2430                            // Stop at blank lines (except for style/script tags)
2431                            if !allow_blank_lines && lines[j].is_blank {
2432                                break;
2433                            }
2434
2435                            lines[j].in_html_block = true;
2436
2437                            // Check if this line contains the closing tag
2438                            if lines[j].content(content).contains(&closing_tag) {
2439                                break;
2440                            }
2441                            j += 1;
2442                        }
2443                    }
2444                }
2445            }
2446
2447            i += 1;
2448        }
2449    }
2450
2451    /// Detect ESM import/export blocks in MDX files
2452    /// ESM blocks consist of contiguous import/export statements at the top of the file
2453    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2454        // Only process MDX files
2455        if !flavor.supports_esm_blocks() {
2456            return;
2457        }
2458
2459        let mut in_multiline_comment = false;
2460
2461        for line in lines.iter_mut() {
2462            // Skip blank lines and HTML comments
2463            if line.is_blank || line.in_html_comment {
2464                continue;
2465            }
2466
2467            let trimmed = line.content(content).trim_start();
2468
2469            // Handle continuation of multi-line JS comments
2470            if in_multiline_comment {
2471                if trimmed.contains("*/") {
2472                    in_multiline_comment = false;
2473                }
2474                continue;
2475            }
2476
2477            // Skip single-line JS comments (// and ///)
2478            if trimmed.starts_with("//") {
2479                continue;
2480            }
2481
2482            // Handle start of multi-line JS comment
2483            if trimmed.starts_with("/*") {
2484                if !trimmed.contains("*/") {
2485                    in_multiline_comment = true;
2486                }
2487                continue;
2488            }
2489
2490            // Check if line starts with import or export
2491            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2492                line.in_esm_block = true;
2493            } else {
2494                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2495                break;
2496            }
2497        }
2498    }
2499
2500    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2501    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2502        let mut code_spans = Vec::new();
2503
2504        // Quick check - if no backticks, no code spans
2505        if !content.contains('`') {
2506            return code_spans;
2507        }
2508
2509        // Use pulldown-cmark's streaming parser with byte offsets
2510        let parser = Parser::new(content).into_offset_iter();
2511
2512        for (event, range) in parser {
2513            if let Event::Code(_) = event {
2514                let start_pos = range.start;
2515                let end_pos = range.end;
2516
2517                // The range includes the backticks, extract the actual content
2518                let full_span = &content[start_pos..end_pos];
2519                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2520
2521                // Extract content between backticks, preserving spaces
2522                let content_start = start_pos + backtick_count;
2523                let content_end = end_pos - backtick_count;
2524                let span_content = if content_start < content_end {
2525                    content[content_start..content_end].to_string()
2526                } else {
2527                    String::new()
2528                };
2529
2530                // Use binary search to find line number - O(log n) instead of O(n)
2531                // Find the rightmost line whose byte_offset <= start_pos
2532                let line_idx = lines
2533                    .partition_point(|line| line.byte_offset <= start_pos)
2534                    .saturating_sub(1);
2535                let line_num = line_idx + 1;
2536                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2537
2538                // Find end column using binary search
2539                let end_line_idx = lines
2540                    .partition_point(|line| line.byte_offset <= end_pos)
2541                    .saturating_sub(1);
2542                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2543
2544                // Convert byte offsets to character positions for correct Unicode handling
2545                // This ensures consistency with warning.column which uses character positions
2546                let line_content = lines[line_idx].content(content);
2547                let col_start = if byte_col_start <= line_content.len() {
2548                    line_content[..byte_col_start].chars().count()
2549                } else {
2550                    line_content.chars().count()
2551                };
2552
2553                let end_line_content = lines[end_line_idx].content(content);
2554                let col_end = if byte_col_end <= end_line_content.len() {
2555                    end_line_content[..byte_col_end].chars().count()
2556                } else {
2557                    end_line_content.chars().count()
2558                };
2559
2560                code_spans.push(CodeSpan {
2561                    line: line_num,
2562                    end_line: end_line_idx + 1,
2563                    start_col: col_start,
2564                    end_col: col_end,
2565                    byte_offset: start_pos,
2566                    byte_end: end_pos,
2567                    backtick_count,
2568                    content: span_content,
2569                });
2570            }
2571        }
2572
2573        // Sort by position to ensure consistent ordering
2574        code_spans.sort_by_key(|span| span.byte_offset);
2575
2576        code_spans
2577    }
2578
2579    /// Parse all list blocks in the content (legacy line-by-line approach)
2580    ///
2581    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2582    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
2583    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
2584    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
2585    ///   treated as list continuation (based on the list marker width)
2586    ///
2587    /// When a new list item is encountered, we check if list-breaking content was seen
2588    /// since the last item. If so, we start a new list block.
2589    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2590        // Minimum indentation for unordered list continuation per CommonMark spec
2591        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2592
2593        /// Initialize or reset the forward-scanning tracking state.
2594        /// This helper eliminates code duplication across three initialization sites.
2595        #[inline]
2596        fn reset_tracking_state(
2597            list_item: &ListItemInfo,
2598            has_list_breaking_content: &mut bool,
2599            min_continuation: &mut usize,
2600        ) {
2601            *has_list_breaking_content = false;
2602            let marker_width = if list_item.is_ordered {
2603                list_item.marker.len() + 1 // Ordered markers need space after period/paren
2604            } else {
2605                list_item.marker.len()
2606            };
2607            *min_continuation = if list_item.is_ordered {
2608                marker_width
2609            } else {
2610                UNORDERED_LIST_MIN_CONTINUATION_INDENT
2611            };
2612        }
2613
2614        // Pre-size based on lines that could be list items
2615        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
2616        let mut current_block: Option<ListBlock> = None;
2617        let mut last_list_item_line = 0;
2618        let mut current_indent_level = 0;
2619        let mut last_marker_width = 0;
2620
2621        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
2622        let mut has_list_breaking_content_since_last_item = false;
2623        let mut min_continuation_for_tracking = 0;
2624
2625        for (line_idx, line_info) in lines.iter().enumerate() {
2626            let line_num = line_idx + 1;
2627
2628            // Enhanced code block handling using Design #3's context analysis
2629            if line_info.in_code_block {
2630                if let Some(ref mut block) = current_block {
2631                    // Calculate minimum indentation for list continuation
2632                    let min_continuation_indent =
2633                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2634
2635                    // Analyze code block context using the three-tier classification
2636                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2637
2638                    match context {
2639                        CodeBlockContext::Indented => {
2640                            // Code block is properly indented - continues the list
2641                            block.end_line = line_num;
2642                            continue;
2643                        }
2644                        CodeBlockContext::Standalone => {
2645                            // Code block separates lists - end current block
2646                            let completed_block = current_block.take().unwrap();
2647                            list_blocks.push(completed_block);
2648                            continue;
2649                        }
2650                        CodeBlockContext::Adjacent => {
2651                            // Edge case - use conservative behavior (continue list)
2652                            block.end_line = line_num;
2653                            continue;
2654                        }
2655                    }
2656                } else {
2657                    // No current list block - skip code block lines
2658                    continue;
2659                }
2660            }
2661
2662            // Extract blockquote prefix if any
2663            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2664                caps.get(0).unwrap().as_str().to_string()
2665            } else {
2666                String::new()
2667            };
2668
2669            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
2670            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
2671            if current_block.is_some()
2672                && line_info.list_item.is_none()
2673                && !line_info.is_blank
2674                && !line_info.in_code_span_continuation
2675            {
2676                let line_content = line_info.content(content).trim();
2677
2678                // Check for structural separators that break lists
2679                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
2680                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
2681                // as they indicate improper indentation rather than lazy continuation.
2682                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2683                let breaks_list = line_info.heading.is_some()
2684                    || line_content.starts_with("---")
2685                    || line_content.starts_with("***")
2686                    || line_content.starts_with("___")
2687                    || crate::utils::skip_context::is_table_line(line_content)
2688                    || line_content.starts_with(">")
2689                    || (line_info.indent > 0
2690                        && line_info.indent < min_continuation_for_tracking
2691                        && !is_lazy_continuation);
2692
2693                if breaks_list {
2694                    has_list_breaking_content_since_last_item = true;
2695                }
2696            }
2697
2698            // If this line is a code span continuation within an active list block,
2699            // extend the block's end_line to include this line (maintains list continuity)
2700            if line_info.in_code_span_continuation
2701                && line_info.list_item.is_none()
2702                && let Some(ref mut block) = current_block
2703            {
2704                block.end_line = line_num;
2705            }
2706
2707            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
2708            // properly indented lines within the list). This ensures the workaround at line 2448
2709            // works correctly when there are multiple continuation lines before a nested list item.
2710            // Also include lazy continuation lines (indent=0) per CommonMark spec.
2711            let is_valid_continuation =
2712                line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
2713            if !line_info.in_code_span_continuation
2714                && line_info.list_item.is_none()
2715                && !line_info.is_blank
2716                && !line_info.in_code_block
2717                && is_valid_continuation
2718                && let Some(ref mut block) = current_block
2719            {
2720                block.end_line = line_num;
2721            }
2722
2723            // Check if this line is a list item
2724            if let Some(list_item) = &line_info.list_item {
2725                // Calculate nesting level based on indentation
2726                let item_indent = list_item.marker_column;
2727                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
2728
2729                if let Some(ref mut block) = current_block {
2730                    // Check if this continues the current block
2731                    // For nested lists, we need to check if this is a nested item (higher nesting level)
2732                    // or a continuation at the same or lower level
2733                    let is_nested = nesting > block.nesting_level;
2734                    let same_type =
2735                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2736                    let same_context = block.blockquote_prefix == blockquote_prefix;
2737                    // Allow one blank line after last item, or lines immediately after block content
2738                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2739
2740                    // For unordered lists, also check marker consistency
2741                    let marker_compatible =
2742                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2743
2744                    // O(1) check: Use the tracked variable instead of O(n) nested loop
2745                    // This eliminates the quadratic bottleneck from issue #148
2746                    let has_non_list_content = has_list_breaking_content_since_last_item;
2747
2748                    // A list continues if:
2749                    // 1. It's a nested item (indented more than the parent), OR
2750                    // 2. It's the same type at the same level with reasonable distance
2751                    let mut continues_list = if is_nested {
2752                        // Nested items always continue the list if they're in the same context
2753                        same_context && reasonable_distance && !has_non_list_content
2754                    } else {
2755                        // Same-level items need to match type and markers
2756                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2757                    };
2758
2759                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
2760                    // This handles edge cases where content patterns might otherwise split lists incorrectly
2761                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2762                        // Check if the previous line was a list item or a continuation of a list item
2763                        // (including lazy continuation lines)
2764                        if block.item_lines.contains(&(line_num - 1)) {
2765                            // They're consecutive list items - force them to be in the same list
2766                            continues_list = true;
2767                        } else {
2768                            // Previous line is a continuation line within this block
2769                            // (e.g., lazy continuation with indent=0)
2770                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
2771                            continues_list = true;
2772                        }
2773                    }
2774
2775                    if continues_list {
2776                        // Extend current block
2777                        block.end_line = line_num;
2778                        block.item_lines.push(line_num);
2779
2780                        // Update max marker width
2781                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2782                            list_item.marker.len() + 1
2783                        } else {
2784                            list_item.marker.len()
2785                        });
2786
2787                        // Update marker consistency for unordered lists
2788                        if !block.is_ordered
2789                            && block.marker.is_some()
2790                            && block.marker.as_ref() != Some(&list_item.marker)
2791                        {
2792                            // Mixed markers, clear the marker field
2793                            block.marker = None;
2794                        }
2795
2796                        // Reset tracked state for issue #148 optimization
2797                        reset_tracking_state(
2798                            list_item,
2799                            &mut has_list_breaking_content_since_last_item,
2800                            &mut min_continuation_for_tracking,
2801                        );
2802                    } else {
2803                        // End current block and start a new one
2804
2805                        list_blocks.push(block.clone());
2806
2807                        *block = ListBlock {
2808                            start_line: line_num,
2809                            end_line: line_num,
2810                            is_ordered: list_item.is_ordered,
2811                            marker: if list_item.is_ordered {
2812                                None
2813                            } else {
2814                                Some(list_item.marker.clone())
2815                            },
2816                            blockquote_prefix: blockquote_prefix.clone(),
2817                            item_lines: vec![line_num],
2818                            nesting_level: nesting,
2819                            max_marker_width: if list_item.is_ordered {
2820                                list_item.marker.len() + 1
2821                            } else {
2822                                list_item.marker.len()
2823                            },
2824                        };
2825
2826                        // Initialize tracked state for new block (issue #148 optimization)
2827                        reset_tracking_state(
2828                            list_item,
2829                            &mut has_list_breaking_content_since_last_item,
2830                            &mut min_continuation_for_tracking,
2831                        );
2832                    }
2833                } else {
2834                    // Start a new block
2835                    current_block = Some(ListBlock {
2836                        start_line: line_num,
2837                        end_line: line_num,
2838                        is_ordered: list_item.is_ordered,
2839                        marker: if list_item.is_ordered {
2840                            None
2841                        } else {
2842                            Some(list_item.marker.clone())
2843                        },
2844                        blockquote_prefix,
2845                        item_lines: vec![line_num],
2846                        nesting_level: nesting,
2847                        max_marker_width: list_item.marker.len(),
2848                    });
2849
2850                    // Initialize tracked state for new block (issue #148 optimization)
2851                    reset_tracking_state(
2852                        list_item,
2853                        &mut has_list_breaking_content_since_last_item,
2854                        &mut min_continuation_for_tracking,
2855                    );
2856                }
2857
2858                last_list_item_line = line_num;
2859                current_indent_level = item_indent;
2860                last_marker_width = if list_item.is_ordered {
2861                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
2862                } else {
2863                    list_item.marker.len()
2864                };
2865            } else if let Some(ref mut block) = current_block {
2866                // Not a list item - check if it continues the current block
2867
2868                // For MD032 compatibility, we use a simple approach:
2869                // - Indented lines continue the list
2870                // - Blank lines followed by indented content continue the list
2871                // - Everything else ends the list
2872
2873                // Check if the last line in the list block ended with a backslash (hard line break)
2874                // This handles cases where list items use backslash for hard line breaks
2875                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2876                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2877                } else {
2878                    false
2879                };
2880
2881                // Calculate minimum indentation for list continuation
2882                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
2883                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
2884                let min_continuation_indent = if block.is_ordered {
2885                    current_indent_level + last_marker_width
2886                } else {
2887                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
2888                };
2889
2890                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2891                    // Indented line or backslash continuation continues the list
2892                    block.end_line = line_num;
2893                } else if line_info.is_blank {
2894                    // Blank line - check if it's internal to the list or ending it
2895                    // We only include blank lines that are followed by more list content
2896                    let mut check_idx = line_idx + 1;
2897                    let mut found_continuation = false;
2898
2899                    // Skip additional blank lines
2900                    while check_idx < lines.len() && lines[check_idx].is_blank {
2901                        check_idx += 1;
2902                    }
2903
2904                    if check_idx < lines.len() {
2905                        let next_line = &lines[check_idx];
2906                        // Check if followed by indented content (list continuation)
2907                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2908                            found_continuation = true;
2909                        }
2910                        // Check if followed by another list item at the same level
2911                        else if !next_line.in_code_block
2912                            && next_line.list_item.is_some()
2913                            && let Some(item) = &next_line.list_item
2914                        {
2915                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2916                                .find(next_line.content(content))
2917                                .map_or(String::new(), |m| m.as_str().to_string());
2918                            if item.marker_column == current_indent_level
2919                                && item.is_ordered == block.is_ordered
2920                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2921                            {
2922                                // Check if there was meaningful content between the list items (unused now)
2923                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
2924                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2925                                    if let Some(between_line) = lines.get(idx) {
2926                                        let between_content = between_line.content(content);
2927                                        let trimmed = between_content.trim();
2928                                        // Skip empty lines
2929                                        if trimmed.is_empty() {
2930                                            return false;
2931                                        }
2932                                        // Check for meaningful content
2933                                        let line_indent = between_content.len() - between_content.trim_start().len();
2934
2935                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
2936                                        if trimmed.starts_with("```")
2937                                            || trimmed.starts_with("~~~")
2938                                            || trimmed.starts_with("---")
2939                                            || trimmed.starts_with("***")
2940                                            || trimmed.starts_with("___")
2941                                            || trimmed.starts_with(">")
2942                                            || crate::utils::skip_context::is_table_line(trimmed)
2943                                            || between_line.heading.is_some()
2944                                        {
2945                                            return true; // These are structural separators - meaningful content that breaks lists
2946                                        }
2947
2948                                        // Only properly indented content continues the list
2949                                        line_indent >= min_continuation_indent
2950                                    } else {
2951                                        false
2952                                    }
2953                                });
2954
2955                                if block.is_ordered {
2956                                    // For ordered lists: don't continue if there are structural separators
2957                                    // Check if there are structural separators between the list items
2958                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2959                                        if let Some(between_line) = lines.get(idx) {
2960                                            let trimmed = between_line.content(content).trim();
2961                                            if trimmed.is_empty() {
2962                                                return false;
2963                                            }
2964                                            // Check for structural separators that break lists
2965                                            trimmed.starts_with("```")
2966                                                || trimmed.starts_with("~~~")
2967                                                || trimmed.starts_with("---")
2968                                                || trimmed.starts_with("***")
2969                                                || trimmed.starts_with("___")
2970                                                || trimmed.starts_with(">")
2971                                                || crate::utils::skip_context::is_table_line(trimmed)
2972                                                || between_line.heading.is_some()
2973                                        } else {
2974                                            false
2975                                        }
2976                                    });
2977                                    found_continuation = !has_structural_separators;
2978                                } else {
2979                                    // For unordered lists: also check for structural separators
2980                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2981                                        if let Some(between_line) = lines.get(idx) {
2982                                            let trimmed = between_line.content(content).trim();
2983                                            if trimmed.is_empty() {
2984                                                return false;
2985                                            }
2986                                            // Check for structural separators that break lists
2987                                            trimmed.starts_with("```")
2988                                                || trimmed.starts_with("~~~")
2989                                                || trimmed.starts_with("---")
2990                                                || trimmed.starts_with("***")
2991                                                || trimmed.starts_with("___")
2992                                                || trimmed.starts_with(">")
2993                                                || crate::utils::skip_context::is_table_line(trimmed)
2994                                                || between_line.heading.is_some()
2995                                        } else {
2996                                            false
2997                                        }
2998                                    });
2999                                    found_continuation = !has_structural_separators;
3000                                }
3001                            }
3002                        }
3003                    }
3004
3005                    if found_continuation {
3006                        // Include the blank line in the block
3007                        block.end_line = line_num;
3008                    } else {
3009                        // Blank line ends the list - don't include it
3010                        list_blocks.push(block.clone());
3011                        current_block = None;
3012                    }
3013                } else {
3014                    // Check for lazy continuation - non-indented line immediately after a list item
3015                    // But only if the line has sufficient indentation for the list type
3016                    let min_required_indent = if block.is_ordered {
3017                        current_indent_level + last_marker_width
3018                    } else {
3019                        current_indent_level + 2
3020                    };
3021
3022                    // For lazy continuation to apply, the line must either:
3023                    // 1. Have no indentation (true lazy continuation)
3024                    // 2. Have sufficient indentation for the list type
3025                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3026                    let line_content = line_info.content(content).trim();
3027
3028                    // Check for table-like patterns
3029                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3030
3031                    let is_structural_separator = line_info.heading.is_some()
3032                        || line_content.starts_with("```")
3033                        || line_content.starts_with("~~~")
3034                        || line_content.starts_with("---")
3035                        || line_content.starts_with("***")
3036                        || line_content.starts_with("___")
3037                        || line_content.starts_with(">")
3038                        || looks_like_table;
3039
3040                    // Allow lazy continuation if we're still within the same list block
3041                    // (not just immediately after a list item)
3042                    let is_lazy_continuation = !is_structural_separator
3043                        && !line_info.is_blank
3044                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3045
3046                    if is_lazy_continuation {
3047                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3048                        // it's probably not a continuation
3049                        let content_to_check = if !blockquote_prefix.is_empty() {
3050                            // Strip blockquote prefix to check the actual content
3051                            line_info
3052                                .content(content)
3053                                .strip_prefix(&blockquote_prefix)
3054                                .unwrap_or(line_info.content(content))
3055                                .trim()
3056                        } else {
3057                            line_info.content(content).trim()
3058                        };
3059
3060                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3061
3062                        // If it starts with uppercase and the previous line ended with punctuation,
3063                        // it's likely a new paragraph, not a continuation
3064                        if starts_with_uppercase && last_list_item_line > 0 {
3065                            // This looks like a new paragraph
3066                            list_blocks.push(block.clone());
3067                            current_block = None;
3068                        } else {
3069                            // This is a lazy continuation line
3070                            block.end_line = line_num;
3071                        }
3072                    } else {
3073                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3074                        list_blocks.push(block.clone());
3075                        current_block = None;
3076                    }
3077                }
3078            }
3079        }
3080
3081        // Don't forget the last block
3082        if let Some(block) = current_block {
3083            list_blocks.push(block);
3084        }
3085
3086        // Merge adjacent blocks that should be one
3087        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3088
3089        list_blocks
3090    }
3091
3092    /// Compute character frequency for fast content analysis
3093    fn compute_char_frequency(content: &str) -> CharFrequency {
3094        let mut frequency = CharFrequency::default();
3095
3096        for ch in content.chars() {
3097            match ch {
3098                '#' => frequency.hash_count += 1,
3099                '*' => frequency.asterisk_count += 1,
3100                '_' => frequency.underscore_count += 1,
3101                '-' => frequency.hyphen_count += 1,
3102                '+' => frequency.plus_count += 1,
3103                '>' => frequency.gt_count += 1,
3104                '|' => frequency.pipe_count += 1,
3105                '[' => frequency.bracket_count += 1,
3106                '`' => frequency.backtick_count += 1,
3107                '<' => frequency.lt_count += 1,
3108                '!' => frequency.exclamation_count += 1,
3109                '\n' => frequency.newline_count += 1,
3110                _ => {}
3111            }
3112        }
3113
3114        frequency
3115    }
3116
3117    /// Parse HTML tags in the content
3118    fn parse_html_tags(
3119        content: &str,
3120        lines: &[LineInfo],
3121        code_blocks: &[(usize, usize)],
3122        flavor: MarkdownFlavor,
3123    ) -> Vec<HtmlTag> {
3124        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3125            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3126
3127        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3128
3129        for cap in HTML_TAG_REGEX.captures_iter(content) {
3130            let full_match = cap.get(0).unwrap();
3131            let match_start = full_match.start();
3132            let match_end = full_match.end();
3133
3134            // Skip if in code block
3135            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3136                continue;
3137            }
3138
3139            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3140            let tag_name_original = cap.get(2).unwrap().as_str();
3141            let tag_name = tag_name_original.to_lowercase();
3142            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3143
3144            // Skip JSX components in MDX files (tags starting with uppercase letter)
3145            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3146            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3147                continue;
3148            }
3149
3150            // Find which line this tag is on
3151            let mut line_num = 1;
3152            let mut col_start = match_start;
3153            let mut col_end = match_end;
3154            for (idx, line_info) in lines.iter().enumerate() {
3155                if match_start >= line_info.byte_offset {
3156                    line_num = idx + 1;
3157                    col_start = match_start - line_info.byte_offset;
3158                    col_end = match_end - line_info.byte_offset;
3159                } else {
3160                    break;
3161                }
3162            }
3163
3164            html_tags.push(HtmlTag {
3165                line: line_num,
3166                start_col: col_start,
3167                end_col: col_end,
3168                byte_offset: match_start,
3169                byte_end: match_end,
3170                tag_name,
3171                is_closing,
3172                is_self_closing,
3173                raw_content: full_match.as_str().to_string(),
3174            });
3175        }
3176
3177        html_tags
3178    }
3179
3180    /// Parse emphasis spans in the content
3181    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3182        static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3183            LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3184
3185        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3186
3187        for cap in EMPHASIS_REGEX.captures_iter(content) {
3188            let full_match = cap.get(0).unwrap();
3189            let match_start = full_match.start();
3190            let match_end = full_match.end();
3191
3192            // Skip if in code block
3193            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3194                continue;
3195            }
3196
3197            let opening_markers = cap.get(1).unwrap().as_str();
3198            let content_part = cap.get(2).unwrap().as_str();
3199            let closing_markers = cap.get(3).unwrap().as_str();
3200
3201            // Validate matching markers
3202            if opening_markers.chars().next() != closing_markers.chars().next()
3203                || opening_markers.len() != closing_markers.len()
3204            {
3205                continue;
3206            }
3207
3208            let marker = opening_markers.chars().next().unwrap();
3209            let marker_count = opening_markers.len();
3210
3211            // Find which line this emphasis is on
3212            let mut line_num = 1;
3213            let mut col_start = match_start;
3214            let mut col_end = match_end;
3215            for (idx, line_info) in lines.iter().enumerate() {
3216                if match_start >= line_info.byte_offset {
3217                    line_num = idx + 1;
3218                    col_start = match_start - line_info.byte_offset;
3219                    col_end = match_end - line_info.byte_offset;
3220                } else {
3221                    break;
3222                }
3223            }
3224
3225            emphasis_spans.push(EmphasisSpan {
3226                line: line_num,
3227                start_col: col_start,
3228                end_col: col_end,
3229                byte_offset: match_start,
3230                byte_end: match_end,
3231                marker,
3232                marker_count,
3233                content: content_part.to_string(),
3234            });
3235        }
3236
3237        emphasis_spans
3238    }
3239
3240    /// Parse table rows in the content
3241    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3242        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3243
3244        for (line_idx, line_info) in lines.iter().enumerate() {
3245            // Skip lines in code blocks or blank lines
3246            if line_info.in_code_block || line_info.is_blank {
3247                continue;
3248            }
3249
3250            let line = line_info.content(content);
3251            let line_num = line_idx + 1;
3252
3253            // Check if this line contains pipes (potential table row)
3254            if !line.contains('|') {
3255                continue;
3256            }
3257
3258            // Count columns by splitting on pipes
3259            let parts: Vec<&str> = line.split('|').collect();
3260            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3261
3262            // Check if this is a separator row
3263            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3264            let mut column_alignments = Vec::new();
3265
3266            if is_separator {
3267                for part in &parts[1..parts.len() - 1] {
3268                    // Skip first and last empty parts
3269                    let trimmed = part.trim();
3270                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3271                        "center".to_string()
3272                    } else if trimmed.ends_with(':') {
3273                        "right".to_string()
3274                    } else if trimmed.starts_with(':') {
3275                        "left".to_string()
3276                    } else {
3277                        "none".to_string()
3278                    };
3279                    column_alignments.push(alignment);
3280                }
3281            }
3282
3283            table_rows.push(TableRow {
3284                line: line_num,
3285                is_separator,
3286                column_count,
3287                column_alignments,
3288            });
3289        }
3290
3291        table_rows
3292    }
3293
3294    /// Parse bare URLs and emails in the content
3295    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3296        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3297
3298        // Check for bare URLs (not in angle brackets or markdown links)
3299        for cap in BARE_URL_PATTERN.captures_iter(content) {
3300            let full_match = cap.get(0).unwrap();
3301            let match_start = full_match.start();
3302            let match_end = full_match.end();
3303
3304            // Skip if in code block
3305            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3306                continue;
3307            }
3308
3309            // Skip if already in angle brackets or markdown links
3310            let preceding_char = if match_start > 0 {
3311                content.chars().nth(match_start - 1)
3312            } else {
3313                None
3314            };
3315            let following_char = content.chars().nth(match_end);
3316
3317            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3318                continue;
3319            }
3320            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3321                continue;
3322            }
3323
3324            let url = full_match.as_str();
3325            let url_type = if url.starts_with("https://") {
3326                "https"
3327            } else if url.starts_with("http://") {
3328                "http"
3329            } else if url.starts_with("ftp://") {
3330                "ftp"
3331            } else {
3332                "other"
3333            };
3334
3335            // Find which line this URL is on
3336            let mut line_num = 1;
3337            let mut col_start = match_start;
3338            let mut col_end = match_end;
3339            for (idx, line_info) in lines.iter().enumerate() {
3340                if match_start >= line_info.byte_offset {
3341                    line_num = idx + 1;
3342                    col_start = match_start - line_info.byte_offset;
3343                    col_end = match_end - line_info.byte_offset;
3344                } else {
3345                    break;
3346                }
3347            }
3348
3349            bare_urls.push(BareUrl {
3350                line: line_num,
3351                start_col: col_start,
3352                end_col: col_end,
3353                byte_offset: match_start,
3354                byte_end: match_end,
3355                url: url.to_string(),
3356                url_type: url_type.to_string(),
3357            });
3358        }
3359
3360        // Check for bare email addresses
3361        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3362            let full_match = cap.get(0).unwrap();
3363            let match_start = full_match.start();
3364            let match_end = full_match.end();
3365
3366            // Skip if in code block
3367            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3368                continue;
3369            }
3370
3371            // Skip if already in angle brackets or markdown links
3372            let preceding_char = if match_start > 0 {
3373                content.chars().nth(match_start - 1)
3374            } else {
3375                None
3376            };
3377            let following_char = content.chars().nth(match_end);
3378
3379            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3380                continue;
3381            }
3382            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3383                continue;
3384            }
3385
3386            let email = full_match.as_str();
3387
3388            // Find which line this email is on
3389            let mut line_num = 1;
3390            let mut col_start = match_start;
3391            let mut col_end = match_end;
3392            for (idx, line_info) in lines.iter().enumerate() {
3393                if match_start >= line_info.byte_offset {
3394                    line_num = idx + 1;
3395                    col_start = match_start - line_info.byte_offset;
3396                    col_end = match_end - line_info.byte_offset;
3397                } else {
3398                    break;
3399                }
3400            }
3401
3402            bare_urls.push(BareUrl {
3403                line: line_num,
3404                start_col: col_start,
3405                end_col: col_end,
3406                byte_offset: match_start,
3407                byte_end: match_end,
3408                url: email.to_string(),
3409                url_type: "email".to_string(),
3410            });
3411        }
3412
3413        bare_urls
3414    }
3415
3416    /// Get an iterator over valid CommonMark headings
3417    ///
3418    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
3419    /// that should be flagged by MD018 but should not be processed by other heading rules.
3420    ///
3421    /// # Examples
3422    ///
3423    /// ```rust
3424    /// use rumdl_lib::lint_context::LintContext;
3425    /// use rumdl_lib::config::MarkdownFlavor;
3426    ///
3427    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
3428    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3429    ///
3430    /// for heading in ctx.valid_headings() {
3431    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
3432    /// }
3433    /// // Only prints valid headings, skips `#NoSpace`
3434    /// ```
3435    #[must_use]
3436    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3437        ValidHeadingsIter::new(&self.lines)
3438    }
3439
3440    /// Check if the document contains any valid CommonMark headings
3441    ///
3442    /// Returns `true` if there is at least one heading with proper space after `#`.
3443    #[must_use]
3444    pub fn has_valid_headings(&self) -> bool {
3445        self.lines
3446            .iter()
3447            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3448    }
3449}
3450
3451/// Merge adjacent list blocks that should be treated as one
3452fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3453    if list_blocks.len() < 2 {
3454        return;
3455    }
3456
3457    let mut merger = ListBlockMerger::new(content, lines);
3458    *list_blocks = merger.merge(list_blocks);
3459}
3460
3461/// Helper struct to manage the complex logic of merging list blocks
3462struct ListBlockMerger<'a> {
3463    content: &'a str,
3464    lines: &'a [LineInfo],
3465}
3466
3467impl<'a> ListBlockMerger<'a> {
3468    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3469        Self { content, lines }
3470    }
3471
3472    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3473        let mut merged = Vec::with_capacity(list_blocks.len());
3474        let mut current = list_blocks[0].clone();
3475
3476        for next in list_blocks.iter().skip(1) {
3477            if self.should_merge_blocks(&current, next) {
3478                current = self.merge_two_blocks(current, next);
3479            } else {
3480                merged.push(current);
3481                current = next.clone();
3482            }
3483        }
3484
3485        merged.push(current);
3486        merged
3487    }
3488
3489    /// Determine if two adjacent list blocks should be merged
3490    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3491        // Basic compatibility checks
3492        if !self.blocks_are_compatible(current, next) {
3493            return false;
3494        }
3495
3496        // Check spacing and content between blocks
3497        let spacing = self.analyze_spacing_between(current, next);
3498        match spacing {
3499            BlockSpacing::Consecutive => true,
3500            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3501            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3502                self.can_merge_with_content_between(current, next)
3503            }
3504        }
3505    }
3506
3507    /// Check if blocks have compatible structure for merging
3508    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3509        current.is_ordered == next.is_ordered
3510            && current.blockquote_prefix == next.blockquote_prefix
3511            && current.nesting_level == next.nesting_level
3512    }
3513
3514    /// Analyze the spacing between two list blocks
3515    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3516        let gap = next.start_line - current.end_line;
3517
3518        match gap {
3519            1 => BlockSpacing::Consecutive,
3520            2 => BlockSpacing::SingleBlank,
3521            _ if gap > 2 => {
3522                if self.has_only_blank_lines_between(current, next) {
3523                    BlockSpacing::MultipleBlanks
3524                } else {
3525                    BlockSpacing::ContentBetween
3526                }
3527            }
3528            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
3529        }
3530    }
3531
3532    /// Check if unordered lists can be merged with a single blank line between
3533    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3534        // Check if there are structural separators between the blocks
3535        // If has_meaningful_content_between returns true, it means there are structural separators
3536        if has_meaningful_content_between(self.content, current, next, self.lines) {
3537            return false; // Structural separators prevent merging
3538        }
3539
3540        // Only merge unordered lists with same marker across single blank
3541        !current.is_ordered && current.marker == next.marker
3542    }
3543
3544    /// Check if ordered lists can be merged when there's content between them
3545    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3546        // Do not merge lists if there are structural separators between them
3547        if has_meaningful_content_between(self.content, current, next, self.lines) {
3548            return false; // Structural separators prevent merging
3549        }
3550
3551        // Only consider merging ordered lists if there's no structural content between
3552        current.is_ordered && next.is_ordered
3553    }
3554
3555    /// Check if there are only blank lines between blocks
3556    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3557        for line_num in (current.end_line + 1)..next.start_line {
3558            if let Some(line_info) = self.lines.get(line_num - 1)
3559                && !line_info.content(self.content).trim().is_empty()
3560            {
3561                return false;
3562            }
3563        }
3564        true
3565    }
3566
3567    /// Merge two compatible list blocks into one
3568    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3569        current.end_line = next.end_line;
3570        current.item_lines.extend_from_slice(&next.item_lines);
3571
3572        // Update max marker width
3573        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3574
3575        // Handle marker consistency for unordered lists
3576        if !current.is_ordered && self.markers_differ(&current, next) {
3577            current.marker = None; // Mixed markers
3578        }
3579
3580        current
3581    }
3582
3583    /// Check if two blocks have different markers
3584    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3585        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3586    }
3587}
3588
3589/// Types of spacing between list blocks
3590#[derive(Debug, PartialEq)]
3591enum BlockSpacing {
3592    Consecutive,    // No gap between blocks
3593    SingleBlank,    // One blank line between blocks
3594    MultipleBlanks, // Multiple blank lines but no content
3595    ContentBetween, // Content exists between blocks
3596}
3597
3598/// Check if there's meaningful content (not just blank lines) between two list blocks
3599fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3600    // Check lines between current.end_line and next.start_line
3601    for line_num in (current.end_line + 1)..next.start_line {
3602        if let Some(line_info) = lines.get(line_num - 1) {
3603            // Convert to 0-indexed
3604            let trimmed = line_info.content(content).trim();
3605
3606            // Skip empty lines
3607            if trimmed.is_empty() {
3608                continue;
3609            }
3610
3611            // Check for structural separators that should separate lists (CommonMark compliant)
3612
3613            // Headings separate lists
3614            if line_info.heading.is_some() {
3615                return true; // Has meaningful content - headings separate lists
3616            }
3617
3618            // Horizontal rules separate lists (---, ***, ___)
3619            if is_horizontal_rule(trimmed) {
3620                return true; // Has meaningful content - horizontal rules separate lists
3621            }
3622
3623            // Tables separate lists
3624            if crate::utils::skip_context::is_table_line(trimmed) {
3625                return true; // Has meaningful content - tables separate lists
3626            }
3627
3628            // Blockquotes separate lists
3629            if trimmed.starts_with('>') {
3630                return true; // Has meaningful content - blockquotes separate lists
3631            }
3632
3633            // Code block fences separate lists (unless properly indented as list content)
3634            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3635                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3636
3637                // Check if this code block is properly indented as list continuation
3638                let min_continuation_indent = if current.is_ordered {
3639                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
3640                } else {
3641                    current.nesting_level + 2
3642                };
3643
3644                if line_indent < min_continuation_indent {
3645                    // This is a standalone code block that separates lists
3646                    return true; // Has meaningful content - standalone code blocks separate lists
3647                }
3648            }
3649
3650            // Check if this line has proper indentation for list continuation
3651            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3652
3653            // Calculate minimum indentation needed to be list continuation
3654            let min_indent = if current.is_ordered {
3655                current.nesting_level + current.max_marker_width
3656            } else {
3657                current.nesting_level + 2
3658            };
3659
3660            // If the line is not indented enough to be list continuation, it's meaningful content
3661            if line_indent < min_indent {
3662                return true; // Has meaningful content - content not indented as list continuation
3663            }
3664
3665            // If we reach here, the line is properly indented as list continuation
3666            // Continue checking other lines
3667        }
3668    }
3669
3670    // Only blank lines or properly indented list continuation content between blocks
3671    false
3672}
3673
3674/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
3675/// CommonMark rules for thematic breaks (horizontal rules):
3676/// - May have 0-3 spaces of leading indentation (but NOT tabs)
3677/// - Must have 3+ of the same character (-, *, or _)
3678/// - May have spaces between characters
3679/// - No other characters allowed
3680pub fn is_horizontal_rule_line(line: &str) -> bool {
3681    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
3682    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3683    if leading_spaces > 3 || line.starts_with('\t') {
3684        return false;
3685    }
3686
3687    is_horizontal_rule_content(line.trim())
3688}
3689
3690/// Check if trimmed content matches horizontal rule pattern.
3691/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
3692pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3693    if trimmed.len() < 3 {
3694        return false;
3695    }
3696
3697    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
3698    let chars: Vec<char> = trimmed.chars().collect();
3699    if let Some(&first_char) = chars.first()
3700        && (first_char == '-' || first_char == '*' || first_char == '_')
3701    {
3702        let mut count = 0;
3703        for &ch in &chars {
3704            if ch == first_char {
3705                count += 1;
3706            } else if ch != ' ' && ch != '\t' {
3707                return false; // Non-matching, non-whitespace character
3708            }
3709        }
3710        return count >= 3;
3711    }
3712    false
3713}
3714
3715/// Backwards-compatible alias for `is_horizontal_rule_content`
3716pub fn is_horizontal_rule(trimmed: &str) -> bool {
3717    is_horizontal_rule_content(trimmed)
3718}
3719
3720/// Check if content contains patterns that cause the markdown crate to panic
3721#[cfg(test)]
3722mod tests {
3723    use super::*;
3724
3725    #[test]
3726    fn test_empty_content() {
3727        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3728        assert_eq!(ctx.content, "");
3729        assert_eq!(ctx.line_offsets, vec![0]);
3730        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3731        assert_eq!(ctx.lines.len(), 0);
3732    }
3733
3734    #[test]
3735    fn test_single_line() {
3736        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3737        assert_eq!(ctx.content, "# Hello");
3738        assert_eq!(ctx.line_offsets, vec![0]);
3739        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3740        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3741    }
3742
3743    #[test]
3744    fn test_multi_line() {
3745        let content = "# Title\n\nSecond line\nThird line";
3746        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3747        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3748        // Test offset to line/col
3749        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
3750        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
3751        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
3752        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
3753        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
3754    }
3755
3756    #[test]
3757    fn test_line_info() {
3758        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
3759        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3760
3761        // Test line info
3762        assert_eq!(ctx.lines.len(), 7);
3763
3764        // Line 1: "# Title"
3765        let line1 = &ctx.lines[0];
3766        assert_eq!(line1.content(ctx.content), "# Title");
3767        assert_eq!(line1.byte_offset, 0);
3768        assert_eq!(line1.indent, 0);
3769        assert!(!line1.is_blank);
3770        assert!(!line1.in_code_block);
3771        assert!(line1.list_item.is_none());
3772
3773        // Line 2: "    indented"
3774        let line2 = &ctx.lines[1];
3775        assert_eq!(line2.content(ctx.content), "    indented");
3776        assert_eq!(line2.byte_offset, 8);
3777        assert_eq!(line2.indent, 4);
3778        assert!(!line2.is_blank);
3779
3780        // Line 3: "" (blank)
3781        let line3 = &ctx.lines[2];
3782        assert_eq!(line3.content(ctx.content), "");
3783        assert!(line3.is_blank);
3784
3785        // Test helper methods
3786        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3787        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3788        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3789        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3790    }
3791
3792    #[test]
3793    fn test_list_item_detection() {
3794        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
3795        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3796
3797        // Line 1: "- Unordered item"
3798        let line1 = &ctx.lines[0];
3799        assert!(line1.list_item.is_some());
3800        let list1 = line1.list_item.as_ref().unwrap();
3801        assert_eq!(list1.marker, "-");
3802        assert!(!list1.is_ordered);
3803        assert_eq!(list1.marker_column, 0);
3804        assert_eq!(list1.content_column, 2);
3805
3806        // Line 2: "  * Nested item"
3807        let line2 = &ctx.lines[1];
3808        assert!(line2.list_item.is_some());
3809        let list2 = line2.list_item.as_ref().unwrap();
3810        assert_eq!(list2.marker, "*");
3811        assert_eq!(list2.marker_column, 2);
3812
3813        // Line 3: "1. Ordered item"
3814        let line3 = &ctx.lines[2];
3815        assert!(line3.list_item.is_some());
3816        let list3 = line3.list_item.as_ref().unwrap();
3817        assert_eq!(list3.marker, "1.");
3818        assert!(list3.is_ordered);
3819        assert_eq!(list3.number, Some(1));
3820
3821        // Line 6: "Not a list"
3822        let line6 = &ctx.lines[5];
3823        assert!(line6.list_item.is_none());
3824    }
3825
3826    #[test]
3827    fn test_offset_to_line_col_edge_cases() {
3828        let content = "a\nb\nc";
3829        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3830        // line_offsets: [0, 2, 4]
3831        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
3832        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
3833        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
3834        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
3835        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
3836        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
3837    }
3838
3839    #[test]
3840    fn test_mdx_esm_blocks() {
3841        let content = r##"import {Chart} from './snowfall.js'
3842export const year = 2023
3843
3844# Last year's snowfall
3845
3846In {year}, the snowfall was above average.
3847It was followed by a warm spring which caused
3848flood conditions in many of the nearby rivers.
3849
3850<Chart color="#fcb32c" year={year} />
3851"##;
3852
3853        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3854
3855        // Check that lines 1 and 2 are marked as ESM blocks
3856        assert_eq!(ctx.lines.len(), 10);
3857        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3858        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3859        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3860        assert!(
3861            !ctx.lines[3].in_esm_block,
3862            "Line 4 (heading) should NOT be in_esm_block"
3863        );
3864        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3865        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3866    }
3867
3868    #[test]
3869    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3870        let content = r#"import {Chart} from './snowfall.js'
3871export const year = 2023
3872
3873# Last year's snowfall
3874"#;
3875
3876        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3877
3878        // ESM blocks should NOT be detected in Standard flavor
3879        assert!(
3880            !ctx.lines[0].in_esm_block,
3881            "Line 1 should NOT be in_esm_block in Standard flavor"
3882        );
3883        assert!(
3884            !ctx.lines[1].in_esm_block,
3885            "Line 2 should NOT be in_esm_block in Standard flavor"
3886        );
3887    }
3888}