rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
5use regex::Regex;
6use std::borrow::Cow;
7use std::path::PathBuf;
8use std::sync::LazyLock;
9
10/// Macro for profiling sections - only active in non-WASM builds
11#[cfg(not(target_arch = "wasm32"))]
12macro_rules! profile_section {
13    ($name:expr, $profile:expr, $code:expr) => {{
14        let start = std::time::Instant::now();
15        let result = $code;
16        if $profile {
17            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
18        }
19        result
20    }};
21}
22
23#[cfg(target_arch = "wasm32")]
24macro_rules! profile_section {
25    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
26}
27
28// Comprehensive link pattern that captures both inline and reference links
29// Use (?s) flag to make . match newlines
30static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31    Regex::new(
32        r#"(?sx)
33        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
34        (?:
35            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
36            |
37            \[([^\]]*)\]      # Reference ID in group 6
38        )"#
39    ).unwrap()
40});
41
42// Image pattern (similar to links but with ! prefix)
43// Use (?s) flag to make . match newlines
44static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45    Regex::new(
46        r#"(?sx)
47        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
48        (?:
49            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
50            |
51            \[([^\]]*)\]      # Reference ID in group 6
52        )"#
53    ).unwrap()
54});
55
56// Reference definition pattern
57static REF_DEF_PATTERN: LazyLock<Regex> =
58    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
59
60// Pattern for bare URLs
61static BARE_URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62    Regex::new(
63        r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
64    ).unwrap()
65});
66
67// Pattern for email addresses
68static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71// Pattern for blockquote prefix in parse_list_blocks
72static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74/// Pre-computed information about a line
75#[derive(Debug, Clone)]
76pub struct LineInfo {
77    /// Byte offset where this line starts in the document
78    pub byte_offset: usize,
79    /// Length of the line in bytes (without newline)
80    pub byte_len: usize,
81    /// Number of leading spaces/tabs
82    pub indent: usize,
83    /// Whether the line is blank (empty or only whitespace)
84    pub is_blank: bool,
85    /// Whether this line is inside a code block
86    pub in_code_block: bool,
87    /// Whether this line is inside front matter
88    pub in_front_matter: bool,
89    /// Whether this line is inside an HTML block
90    pub in_html_block: bool,
91    /// Whether this line is inside an HTML comment
92    pub in_html_comment: bool,
93    /// List item information if this line starts a list item
94    pub list_item: Option<ListItemInfo>,
95    /// Heading information if this line is a heading
96    pub heading: Option<HeadingInfo>,
97    /// Blockquote information if this line is a blockquote
98    pub blockquote: Option<BlockquoteInfo>,
99    /// Whether this line is inside a mkdocstrings autodoc block
100    pub in_mkdocstrings: bool,
101    /// Whether this line is part of an ESM import/export block (MDX only)
102    pub in_esm_block: bool,
103    /// Whether this line is a continuation of a multi-line code span from a previous line
104    pub in_code_span_continuation: bool,
105}
106
107impl LineInfo {
108    /// Get the line content as a string slice from the source document
109    pub fn content<'a>(&self, source: &'a str) -> &'a str {
110        &source[self.byte_offset..self.byte_offset + self.byte_len]
111    }
112}
113
114/// Information about a list item
115#[derive(Debug, Clone)]
116pub struct ListItemInfo {
117    /// The marker used (*, -, +, or number with . or ))
118    pub marker: String,
119    /// Whether it's ordered (true) or unordered (false)
120    pub is_ordered: bool,
121    /// The number for ordered lists
122    pub number: Option<usize>,
123    /// Column where the marker starts (0-based)
124    pub marker_column: usize,
125    /// Column where content after marker starts
126    pub content_column: usize,
127}
128
129/// Heading style type
130#[derive(Debug, Clone, PartialEq)]
131pub enum HeadingStyle {
132    /// ATX style heading (# Heading)
133    ATX,
134    /// Setext style heading with = underline
135    Setext1,
136    /// Setext style heading with - underline
137    Setext2,
138}
139
140/// Parsed link information
141#[derive(Debug, Clone)]
142pub struct ParsedLink<'a> {
143    /// Line number (1-indexed)
144    pub line: usize,
145    /// Start column (0-indexed) in the line
146    pub start_col: usize,
147    /// End column (0-indexed) in the line
148    pub end_col: usize,
149    /// Byte offset in document
150    pub byte_offset: usize,
151    /// End byte offset in document
152    pub byte_end: usize,
153    /// Link text
154    pub text: Cow<'a, str>,
155    /// Link URL or reference
156    pub url: Cow<'a, str>,
157    /// Whether this is a reference link [text][ref] vs inline [text](url)
158    pub is_reference: bool,
159    /// Reference ID for reference links
160    pub reference_id: Option<Cow<'a, str>>,
161    /// Link type from pulldown-cmark
162    pub link_type: LinkType,
163}
164
165/// Information about a broken link reported by pulldown-cmark
166#[derive(Debug, Clone)]
167pub struct BrokenLinkInfo {
168    /// The reference text that couldn't be resolved
169    pub reference: String,
170    /// Byte span in the source document
171    pub span: std::ops::Range<usize>,
172}
173
174/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
175#[derive(Debug, Clone)]
176pub struct FootnoteRef {
177    /// The footnote ID (without the ^ prefix)
178    pub id: String,
179    /// Line number (1-indexed)
180    pub line: usize,
181    /// Start byte offset in document
182    pub byte_offset: usize,
183    /// End byte offset in document
184    pub byte_end: usize,
185}
186
187/// Parsed image information
188#[derive(Debug, Clone)]
189pub struct ParsedImage<'a> {
190    /// Line number (1-indexed)
191    pub line: usize,
192    /// Start column (0-indexed) in the line
193    pub start_col: usize,
194    /// End column (0-indexed) in the line
195    pub end_col: usize,
196    /// Byte offset in document
197    pub byte_offset: usize,
198    /// End byte offset in document
199    pub byte_end: usize,
200    /// Alt text
201    pub alt_text: Cow<'a, str>,
202    /// Image URL or reference
203    pub url: Cow<'a, str>,
204    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
205    pub is_reference: bool,
206    /// Reference ID for reference images
207    pub reference_id: Option<Cow<'a, str>>,
208    /// Link type from pulldown-cmark
209    pub link_type: LinkType,
210}
211
212/// Reference definition [ref]: url "title"
213#[derive(Debug, Clone)]
214pub struct ReferenceDef {
215    /// Line number (1-indexed)
216    pub line: usize,
217    /// Reference ID (normalized to lowercase)
218    pub id: String,
219    /// URL
220    pub url: String,
221    /// Optional title
222    pub title: Option<String>,
223    /// Byte offset where the reference definition starts
224    pub byte_offset: usize,
225    /// Byte offset where the reference definition ends
226    pub byte_end: usize,
227    /// Byte offset where the title starts (if present, includes quote)
228    pub title_byte_start: Option<usize>,
229    /// Byte offset where the title ends (if present, includes quote)
230    pub title_byte_end: Option<usize>,
231}
232
233/// Parsed code span information
234#[derive(Debug, Clone)]
235pub struct CodeSpan {
236    /// Line number where the code span starts (1-indexed)
237    pub line: usize,
238    /// Line number where the code span ends (1-indexed)
239    pub end_line: usize,
240    /// Start column (0-indexed) in the line
241    pub start_col: usize,
242    /// End column (0-indexed) in the line
243    pub end_col: usize,
244    /// Byte offset in document
245    pub byte_offset: usize,
246    /// End byte offset in document
247    pub byte_end: usize,
248    /// Number of backticks used (1, 2, 3, etc.)
249    pub backtick_count: usize,
250    /// Content inside the code span (without backticks)
251    pub content: String,
252}
253
254/// Information about a heading
255#[derive(Debug, Clone)]
256pub struct HeadingInfo {
257    /// Heading level (1-6 for ATX, 1-2 for Setext)
258    pub level: u8,
259    /// Style of heading
260    pub style: HeadingStyle,
261    /// The heading marker (# characters or underline)
262    pub marker: String,
263    /// Column where the marker starts (0-based)
264    pub marker_column: usize,
265    /// Column where heading text starts
266    pub content_column: usize,
267    /// The heading text (without markers and without custom ID syntax)
268    pub text: String,
269    /// Custom header ID if present (e.g., from {#custom-id} syntax)
270    pub custom_id: Option<String>,
271    /// Original heading text including custom ID syntax
272    pub raw_text: String,
273    /// Whether it has a closing sequence (for ATX)
274    pub has_closing_sequence: bool,
275    /// The closing sequence if present
276    pub closing_sequence: String,
277    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
278    /// False for malformed headings like `#NoSpace` that MD018 should flag
279    pub is_valid: bool,
280}
281
282/// A valid heading from a filtered iteration
283///
284/// Only includes headings that are CommonMark-compliant (have space after #).
285/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
286#[derive(Debug, Clone)]
287pub struct ValidHeading<'a> {
288    /// The 1-indexed line number in the document
289    pub line_num: usize,
290    /// Reference to the heading information
291    pub heading: &'a HeadingInfo,
292    /// Reference to the full line info (for rules that need additional context)
293    pub line_info: &'a LineInfo,
294}
295
296/// Iterator over valid CommonMark headings in a document
297///
298/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
299/// but should not be processed by other heading rules.
300pub struct ValidHeadingsIter<'a> {
301    lines: &'a [LineInfo],
302    current_index: usize,
303}
304
305impl<'a> ValidHeadingsIter<'a> {
306    fn new(lines: &'a [LineInfo]) -> Self {
307        Self {
308            lines,
309            current_index: 0,
310        }
311    }
312}
313
314impl<'a> Iterator for ValidHeadingsIter<'a> {
315    type Item = ValidHeading<'a>;
316
317    fn next(&mut self) -> Option<Self::Item> {
318        while self.current_index < self.lines.len() {
319            let idx = self.current_index;
320            self.current_index += 1;
321
322            let line_info = &self.lines[idx];
323            if let Some(heading) = &line_info.heading
324                && heading.is_valid
325            {
326                return Some(ValidHeading {
327                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
328                    heading,
329                    line_info,
330                });
331            }
332        }
333        None
334    }
335}
336
337/// Information about a blockquote line
338#[derive(Debug, Clone)]
339pub struct BlockquoteInfo {
340    /// Nesting level (1 for >, 2 for >>, etc.)
341    pub nesting_level: usize,
342    /// The indentation before the blockquote marker
343    pub indent: String,
344    /// Column where the first > starts (0-based)
345    pub marker_column: usize,
346    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
347    pub prefix: String,
348    /// Content after the blockquote marker(s)
349    pub content: String,
350    /// Whether the line has no space after the marker
351    pub has_no_space_after_marker: bool,
352    /// Whether the line has multiple spaces after the marker
353    pub has_multiple_spaces_after_marker: bool,
354    /// Whether this is an empty blockquote line needing MD028 fix
355    pub needs_md028_fix: bool,
356}
357
358/// Information about a list block
359#[derive(Debug, Clone)]
360pub struct ListBlock {
361    /// Line number where the list starts (1-indexed)
362    pub start_line: usize,
363    /// Line number where the list ends (1-indexed)
364    pub end_line: usize,
365    /// Whether it's ordered or unordered
366    pub is_ordered: bool,
367    /// The consistent marker for unordered lists (if any)
368    pub marker: Option<String>,
369    /// Blockquote prefix for this list (empty if not in blockquote)
370    pub blockquote_prefix: String,
371    /// Lines that are list items within this block
372    pub item_lines: Vec<usize>,
373    /// Nesting level (0 for top-level lists)
374    pub nesting_level: usize,
375    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
376    pub max_marker_width: usize,
377}
378
379use std::sync::{Arc, OnceLock};
380
381/// Character frequency data for fast content analysis
382#[derive(Debug, Clone, Default)]
383pub struct CharFrequency {
384    /// Count of # characters (headings)
385    pub hash_count: usize,
386    /// Count of * characters (emphasis, lists, horizontal rules)
387    pub asterisk_count: usize,
388    /// Count of _ characters (emphasis, horizontal rules)
389    pub underscore_count: usize,
390    /// Count of - characters (lists, horizontal rules, setext headings)
391    pub hyphen_count: usize,
392    /// Count of + characters (lists)
393    pub plus_count: usize,
394    /// Count of > characters (blockquotes)
395    pub gt_count: usize,
396    /// Count of | characters (tables)
397    pub pipe_count: usize,
398    /// Count of [ characters (links, images)
399    pub bracket_count: usize,
400    /// Count of ` characters (code spans, code blocks)
401    pub backtick_count: usize,
402    /// Count of < characters (HTML tags, autolinks)
403    pub lt_count: usize,
404    /// Count of ! characters (images)
405    pub exclamation_count: usize,
406    /// Count of newline characters
407    pub newline_count: usize,
408}
409
410/// Pre-parsed HTML tag information
411#[derive(Debug, Clone)]
412pub struct HtmlTag {
413    /// Line number (1-indexed)
414    pub line: usize,
415    /// Start column (0-indexed) in the line
416    pub start_col: usize,
417    /// End column (0-indexed) in the line
418    pub end_col: usize,
419    /// Byte offset in document
420    pub byte_offset: usize,
421    /// End byte offset in document
422    pub byte_end: usize,
423    /// Tag name (e.g., "div", "img", "br")
424    pub tag_name: String,
425    /// Whether it's a closing tag (`</tag>`)
426    pub is_closing: bool,
427    /// Whether it's self-closing (`<tag />`)
428    pub is_self_closing: bool,
429    /// Raw tag content
430    pub raw_content: String,
431}
432
433/// Pre-parsed emphasis span information
434#[derive(Debug, Clone)]
435pub struct EmphasisSpan {
436    /// Line number (1-indexed)
437    pub line: usize,
438    /// Start column (0-indexed) in the line
439    pub start_col: usize,
440    /// End column (0-indexed) in the line
441    pub end_col: usize,
442    /// Byte offset in document
443    pub byte_offset: usize,
444    /// End byte offset in document
445    pub byte_end: usize,
446    /// Type of emphasis ('*' or '_')
447    pub marker: char,
448    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
449    pub marker_count: usize,
450    /// Content inside the emphasis
451    pub content: String,
452}
453
454/// Pre-parsed table row information
455#[derive(Debug, Clone)]
456pub struct TableRow {
457    /// Line number (1-indexed)
458    pub line: usize,
459    /// Whether this is a separator row (contains only |, -, :, and spaces)
460    pub is_separator: bool,
461    /// Number of columns (pipe-separated cells)
462    pub column_count: usize,
463    /// Alignment info from separator row
464    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
465}
466
467/// Pre-parsed bare URL information (not in links)
468#[derive(Debug, Clone)]
469pub struct BareUrl {
470    /// Line number (1-indexed)
471    pub line: usize,
472    /// Start column (0-indexed) in the line
473    pub start_col: usize,
474    /// End column (0-indexed) in the line
475    pub end_col: usize,
476    /// Byte offset in document
477    pub byte_offset: usize,
478    /// End byte offset in document
479    pub byte_end: usize,
480    /// The URL string
481    pub url: String,
482    /// Type of URL ("http", "https", "ftp", "email")
483    pub url_type: String,
484}
485
486pub struct LintContext<'a> {
487    pub content: &'a str,
488    pub line_offsets: Vec<usize>,
489    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
490    pub lines: Vec<LineInfo>,             // Pre-computed line information
491    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
492    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
493    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
494    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
495    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
496    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
497    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
498    pub char_frequency: CharFrequency,    // Character frequency analysis
499    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
500    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
501    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
502    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
503    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
504    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
505    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
506    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
507    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
508    pub flavor: MarkdownFlavor,           // Markdown flavor being used
509    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
510}
511
512/// Detailed blockquote parse result with all components
513struct BlockquoteComponents<'a> {
514    indent: &'a str,
515    markers: &'a str,
516    spaces_after: &'a str,
517    content: &'a str,
518}
519
520/// Parse blockquote prefix with detailed components using manual parsing
521#[inline]
522fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
523    let bytes = line.as_bytes();
524    let mut pos = 0;
525
526    // Parse leading whitespace (indent)
527    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
528        pos += 1;
529    }
530    let indent_end = pos;
531
532    // Must have at least one '>' marker
533    if pos >= bytes.len() || bytes[pos] != b'>' {
534        return None;
535    }
536
537    // Parse '>' markers
538    while pos < bytes.len() && bytes[pos] == b'>' {
539        pos += 1;
540    }
541    let markers_end = pos;
542
543    // Parse spaces after markers
544    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
545        pos += 1;
546    }
547    let spaces_end = pos;
548
549    Some(BlockquoteComponents {
550        indent: &line[0..indent_end],
551        markers: &line[indent_end..markers_end],
552        spaces_after: &line[markers_end..spaces_end],
553        content: &line[spaces_end..],
554    })
555}
556
557impl<'a> LintContext<'a> {
558    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
559        #[cfg(not(target_arch = "wasm32"))]
560        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
561        #[cfg(target_arch = "wasm32")]
562        let profile = false;
563
564        let line_offsets = profile_section!("Line offsets", profile, {
565            let mut offsets = vec![0];
566            for (i, c) in content.char_indices() {
567                if c == '\n' {
568                    offsets.push(i + 1);
569                }
570            }
571            offsets
572        });
573
574        // Detect code blocks once and cache them
575        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
576
577        // Pre-compute HTML comment ranges ONCE for all operations
578        let html_comment_ranges = profile_section!(
579            "HTML comment ranges",
580            profile,
581            crate::utils::skip_context::compute_html_comment_ranges(content)
582        );
583
584        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
585        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
586            if flavor == MarkdownFlavor::MkDocs {
587                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
588            } else {
589                Vec::new()
590            }
591        });
592
593        // Pre-compute line information (without headings/blockquotes yet)
594        let mut lines = profile_section!(
595            "Basic line info",
596            profile,
597            Self::compute_basic_line_info(
598                content,
599                &line_offsets,
600                &code_blocks,
601                flavor,
602                &html_comment_ranges,
603                &autodoc_ranges,
604            )
605        );
606
607        // Detect HTML blocks BEFORE heading detection
608        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
609
610        // Detect ESM import/export blocks in MDX files BEFORE heading detection
611        profile_section!(
612            "ESM blocks",
613            profile,
614            Self::detect_esm_blocks(content, &mut lines, flavor)
615        );
616
617        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
618        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
619
620        // Now detect headings and blockquotes
621        profile_section!(
622            "Headings & blockquotes",
623            profile,
624            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
625        );
626
627        // Parse code spans early so we can exclude them from link/image parsing
628        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
629
630        // Mark lines that are continuations of multi-line code spans
631        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
632        for span in &code_spans {
633            if span.end_line > span.line {
634                // Mark lines after the first line as continuations
635                for line_num in (span.line + 1)..=span.end_line {
636                    if let Some(line_info) = lines.get_mut(line_num - 1) {
637                        line_info.in_code_span_continuation = true;
638                    }
639                }
640            }
641        }
642
643        // Parse links, images, references, and list blocks
644        let (links, broken_links, footnote_refs) = profile_section!(
645            "Links",
646            profile,
647            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
648        );
649
650        let images = profile_section!(
651            "Images",
652            profile,
653            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
654        );
655
656        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
657
658        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
659
660        // Compute character frequency for fast content analysis
661        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
662
663        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
664        let table_blocks = profile_section!(
665            "Table blocks",
666            profile,
667            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
668                content,
669                &code_blocks,
670                &code_spans,
671                &html_comment_ranges,
672            )
673        );
674
675        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
676        let line_index = profile_section!(
677            "Line index",
678            profile,
679            crate::utils::range_utils::LineIndex::new(content)
680        );
681
682        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
683        let jinja_ranges = profile_section!(
684            "Jinja ranges",
685            profile,
686            crate::utils::jinja_utils::find_jinja_ranges(content)
687        );
688
689        Self {
690            content,
691            line_offsets,
692            code_blocks,
693            lines,
694            links,
695            images,
696            broken_links,
697            footnote_refs,
698            reference_defs,
699            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
700            list_blocks,
701            char_frequency,
702            html_tags_cache: OnceLock::new(),
703            emphasis_spans_cache: OnceLock::new(),
704            table_rows_cache: OnceLock::new(),
705            bare_urls_cache: OnceLock::new(),
706            has_mixed_list_nesting_cache: OnceLock::new(),
707            html_comment_ranges,
708            table_blocks,
709            line_index,
710            jinja_ranges,
711            flavor,
712            source_file,
713        }
714    }
715
716    /// Get code spans - computed lazily on first access
717    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
718        Arc::clone(
719            self.code_spans_cache
720                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
721        )
722    }
723
724    /// Get HTML comment ranges - pre-computed during LintContext construction
725    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
726        &self.html_comment_ranges
727    }
728
729    /// Get HTML tags - computed lazily on first access
730    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
731        Arc::clone(self.html_tags_cache.get_or_init(|| {
732            Arc::new(Self::parse_html_tags(
733                self.content,
734                &self.lines,
735                &self.code_blocks,
736                self.flavor,
737            ))
738        }))
739    }
740
741    /// Get emphasis spans - computed lazily on first access
742    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
743        Arc::clone(
744            self.emphasis_spans_cache
745                .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
746        )
747    }
748
749    /// Get table rows - computed lazily on first access
750    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
751        Arc::clone(
752            self.table_rows_cache
753                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
754        )
755    }
756
757    /// Get bare URLs - computed lazily on first access
758    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
759        Arc::clone(
760            self.bare_urls_cache
761                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
762        )
763    }
764
765    /// Check if document has mixed ordered/unordered list nesting.
766    /// Result is cached after first computation (document-level invariant).
767    /// This is used by MD007 for smart style auto-detection.
768    pub fn has_mixed_list_nesting(&self) -> bool {
769        *self
770            .has_mixed_list_nesting_cache
771            .get_or_init(|| self.compute_mixed_list_nesting())
772    }
773
774    /// Internal computation for mixed list nesting (only called once per LintContext).
775    fn compute_mixed_list_nesting(&self) -> bool {
776        // Track parent list items by their marker position and type
777        // Using marker_column instead of indent because it works correctly
778        // for blockquoted content where indent doesn't account for the prefix
779        // Stack stores: (marker_column, is_ordered)
780        let mut stack: Vec<(usize, bool)> = Vec::new();
781        let mut last_was_blank = false;
782
783        for line_info in &self.lines {
784            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
785            if line_info.in_code_block
786                || line_info.in_front_matter
787                || line_info.in_mkdocstrings
788                || line_info.in_html_comment
789                || line_info.in_esm_block
790            {
791                continue;
792            }
793
794            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
795            if line_info.is_blank {
796                last_was_blank = true;
797                continue;
798            }
799
800            if let Some(list_item) = &line_info.list_item {
801                // Normalize column 1 to column 0 (consistent with MD007 check function)
802                let current_pos = if list_item.marker_column == 1 {
803                    0
804                } else {
805                    list_item.marker_column
806                };
807
808                // If there was a blank line and this item is at root level, reset stack
809                if last_was_blank && current_pos == 0 {
810                    stack.clear();
811                }
812                last_was_blank = false;
813
814                // Pop items at same or greater position (they're siblings or deeper, not parents)
815                while let Some(&(pos, _)) = stack.last() {
816                    if pos >= current_pos {
817                        stack.pop();
818                    } else {
819                        break;
820                    }
821                }
822
823                // Check if immediate parent has different type - this is mixed nesting
824                if let Some(&(_, parent_is_ordered)) = stack.last()
825                    && parent_is_ordered != list_item.is_ordered
826                {
827                    return true; // Found mixed nesting - early exit
828                }
829
830                stack.push((current_pos, list_item.is_ordered));
831            } else {
832                // Non-list line (but not blank) - could be paragraph or other content
833                last_was_blank = false;
834            }
835        }
836
837        false
838    }
839
840    /// Map a byte offset to (line, column)
841    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
842        match self.line_offsets.binary_search(&offset) {
843            Ok(line) => (line + 1, 1),
844            Err(line) => {
845                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
846                (line, offset - line_start + 1)
847            }
848        }
849    }
850
851    /// Check if a position is within a code block or code span
852    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
853        // Check code blocks first
854        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
855            return true;
856        }
857
858        // Check inline code spans (lazy load if needed)
859        self.code_spans()
860            .iter()
861            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
862    }
863
864    /// Get line information by line number (1-indexed)
865    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
866        if line_num > 0 {
867            self.lines.get(line_num - 1)
868        } else {
869            None
870        }
871    }
872
873    /// Get byte offset for a line number (1-indexed)
874    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
875        self.line_info(line_num).map(|info| info.byte_offset)
876    }
877
878    /// Get URL for a reference link/image by its ID
879    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
880        let normalized_id = ref_id.to_lowercase();
881        self.reference_defs
882            .iter()
883            .find(|def| def.id == normalized_id)
884            .map(|def| def.url.as_str())
885    }
886
887    /// Check if a line is part of a list block
888    pub fn is_in_list_block(&self, line_num: usize) -> bool {
889        self.list_blocks
890            .iter()
891            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
892    }
893
894    /// Get the list block containing a specific line
895    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
896        self.list_blocks
897            .iter()
898            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
899    }
900
901    // Compatibility methods for DocumentStructure migration
902
903    /// Check if a line is within a code block
904    pub fn is_in_code_block(&self, line_num: usize) -> bool {
905        if line_num == 0 || line_num > self.lines.len() {
906            return false;
907        }
908        self.lines[line_num - 1].in_code_block
909    }
910
911    /// Check if a line is within front matter
912    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
913        if line_num == 0 || line_num > self.lines.len() {
914            return false;
915        }
916        self.lines[line_num - 1].in_front_matter
917    }
918
919    /// Check if a line is within an HTML block
920    pub fn is_in_html_block(&self, line_num: usize) -> bool {
921        if line_num == 0 || line_num > self.lines.len() {
922            return false;
923        }
924        self.lines[line_num - 1].in_html_block
925    }
926
927    /// Check if a line and column is within a code span
928    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
929        if line_num == 0 || line_num > self.lines.len() {
930            return false;
931        }
932
933        // Use the code spans cache to check
934        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
935        // Convert col to 0-indexed for comparison
936        let col_0indexed = if col > 0 { col - 1 } else { 0 };
937        let code_spans = self.code_spans();
938        code_spans.iter().any(|span| {
939            // Check if line is within the span's line range
940            if line_num < span.line || line_num > span.end_line {
941                return false;
942            }
943
944            if span.line == span.end_line {
945                // Single-line span: check column bounds
946                col_0indexed >= span.start_col && col_0indexed < span.end_col
947            } else if line_num == span.line {
948                // First line of multi-line span: anything after start_col is in span
949                col_0indexed >= span.start_col
950            } else if line_num == span.end_line {
951                // Last line of multi-line span: anything before end_col is in span
952                col_0indexed < span.end_col
953            } else {
954                // Middle line of multi-line span: entire line is in span
955                true
956            }
957        })
958    }
959
960    /// Check if a byte offset is within a code span
961    #[inline]
962    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
963        let code_spans = self.code_spans();
964        code_spans
965            .iter()
966            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
967    }
968
969    /// Check if a byte position is within a reference definition
970    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
971    #[inline]
972    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
973        self.reference_defs
974            .iter()
975            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
976    }
977
978    /// Check if a byte position is within an HTML comment
979    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
980    /// where k is the number of HTML comments (typically very small)
981    #[inline]
982    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
983        self.html_comment_ranges
984            .iter()
985            .any(|range| byte_pos >= range.start && byte_pos < range.end)
986    }
987
988    /// Check if a byte position is within an HTML tag (including multiline tags)
989    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
990    #[inline]
991    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
992        self.html_tags()
993            .iter()
994            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
995    }
996
997    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
998    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
999        self.jinja_ranges
1000            .iter()
1001            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1002    }
1003
1004    /// Check if a byte position is within a link reference definition title
1005    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1006        self.reference_defs.iter().any(|def| {
1007            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1008                byte_pos >= start && byte_pos < end
1009            } else {
1010                false
1011            }
1012        })
1013    }
1014
1015    /// Check if content has any instances of a specific character (fast)
1016    pub fn has_char(&self, ch: char) -> bool {
1017        match ch {
1018            '#' => self.char_frequency.hash_count > 0,
1019            '*' => self.char_frequency.asterisk_count > 0,
1020            '_' => self.char_frequency.underscore_count > 0,
1021            '-' => self.char_frequency.hyphen_count > 0,
1022            '+' => self.char_frequency.plus_count > 0,
1023            '>' => self.char_frequency.gt_count > 0,
1024            '|' => self.char_frequency.pipe_count > 0,
1025            '[' => self.char_frequency.bracket_count > 0,
1026            '`' => self.char_frequency.backtick_count > 0,
1027            '<' => self.char_frequency.lt_count > 0,
1028            '!' => self.char_frequency.exclamation_count > 0,
1029            '\n' => self.char_frequency.newline_count > 0,
1030            _ => self.content.contains(ch), // Fallback for other characters
1031        }
1032    }
1033
1034    /// Get count of a specific character (fast)
1035    pub fn char_count(&self, ch: char) -> usize {
1036        match ch {
1037            '#' => self.char_frequency.hash_count,
1038            '*' => self.char_frequency.asterisk_count,
1039            '_' => self.char_frequency.underscore_count,
1040            '-' => self.char_frequency.hyphen_count,
1041            '+' => self.char_frequency.plus_count,
1042            '>' => self.char_frequency.gt_count,
1043            '|' => self.char_frequency.pipe_count,
1044            '[' => self.char_frequency.bracket_count,
1045            '`' => self.char_frequency.backtick_count,
1046            '<' => self.char_frequency.lt_count,
1047            '!' => self.char_frequency.exclamation_count,
1048            '\n' => self.char_frequency.newline_count,
1049            _ => self.content.matches(ch).count(), // Fallback for other characters
1050        }
1051    }
1052
1053    /// Check if content likely contains headings (fast)
1054    pub fn likely_has_headings(&self) -> bool {
1055        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1056    }
1057
1058    /// Check if content likely contains lists (fast)
1059    pub fn likely_has_lists(&self) -> bool {
1060        self.char_frequency.asterisk_count > 0
1061            || self.char_frequency.hyphen_count > 0
1062            || self.char_frequency.plus_count > 0
1063    }
1064
1065    /// Check if content likely contains emphasis (fast)
1066    pub fn likely_has_emphasis(&self) -> bool {
1067        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1068    }
1069
1070    /// Check if content likely contains tables (fast)
1071    pub fn likely_has_tables(&self) -> bool {
1072        self.char_frequency.pipe_count > 2
1073    }
1074
1075    /// Check if content likely contains blockquotes (fast)
1076    pub fn likely_has_blockquotes(&self) -> bool {
1077        self.char_frequency.gt_count > 0
1078    }
1079
1080    /// Check if content likely contains code (fast)
1081    pub fn likely_has_code(&self) -> bool {
1082        self.char_frequency.backtick_count > 0
1083    }
1084
1085    /// Check if content likely contains links or images (fast)
1086    pub fn likely_has_links_or_images(&self) -> bool {
1087        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1088    }
1089
1090    /// Check if content likely contains HTML (fast)
1091    pub fn likely_has_html(&self) -> bool {
1092        self.char_frequency.lt_count > 0
1093    }
1094
1095    /// Get HTML tags on a specific line
1096    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1097        self.html_tags()
1098            .iter()
1099            .filter(|tag| tag.line == line_num)
1100            .cloned()
1101            .collect()
1102    }
1103
1104    /// Get emphasis spans on a specific line
1105    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1106        self.emphasis_spans()
1107            .iter()
1108            .filter(|span| span.line == line_num)
1109            .cloned()
1110            .collect()
1111    }
1112
1113    /// Get table rows on a specific line
1114    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1115        self.table_rows()
1116            .iter()
1117            .filter(|row| row.line == line_num)
1118            .cloned()
1119            .collect()
1120    }
1121
1122    /// Get bare URLs on a specific line
1123    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1124        self.bare_urls()
1125            .iter()
1126            .filter(|url| url.line == line_num)
1127            .cloned()
1128            .collect()
1129    }
1130
1131    /// Find the line index for a given byte offset using binary search.
1132    /// Returns (line_index, line_number, column) where:
1133    /// - line_index is the 0-based index in the lines array
1134    /// - line_number is the 1-based line number
1135    /// - column is the byte offset within that line
1136    #[inline]
1137    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1138        // Binary search to find the line containing this byte offset
1139        let idx = match lines.binary_search_by(|line| {
1140            if byte_offset < line.byte_offset {
1141                std::cmp::Ordering::Greater
1142            } else if byte_offset > line.byte_offset + line.byte_len {
1143                std::cmp::Ordering::Less
1144            } else {
1145                std::cmp::Ordering::Equal
1146            }
1147        }) {
1148            Ok(idx) => idx,
1149            Err(idx) => idx.saturating_sub(1),
1150        };
1151
1152        let line = &lines[idx];
1153        let line_num = idx + 1;
1154        let col = byte_offset.saturating_sub(line.byte_offset);
1155
1156        (idx, line_num, col)
1157    }
1158
1159    /// Check if a byte offset is within a code span using binary search
1160    #[inline]
1161    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1162        // Since spans are sorted by byte_offset, use partition_point for binary search
1163        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1164
1165        // Check the span that starts at or before our offset
1166        if idx > 0 {
1167            let span = &code_spans[idx - 1];
1168            if offset >= span.byte_offset && offset < span.byte_end {
1169                return true;
1170            }
1171        }
1172
1173        false
1174    }
1175
1176    /// Collect byte ranges of all links using pulldown-cmark
1177    /// This is used to skip heading detection for lines that fall within link syntax
1178    /// (e.g., multiline links like `[text](url\n#fragment)`)
1179    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1180        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1181
1182        let mut link_ranges = Vec::new();
1183        let mut options = Options::empty();
1184        options.insert(Options::ENABLE_WIKILINKS);
1185        options.insert(Options::ENABLE_FOOTNOTES);
1186
1187        let parser = Parser::new_ext(content, options).into_offset_iter();
1188        let mut link_stack: Vec<usize> = Vec::new();
1189
1190        for (event, range) in parser {
1191            match event {
1192                Event::Start(Tag::Link { .. }) => {
1193                    link_stack.push(range.start);
1194                }
1195                Event::End(TagEnd::Link) => {
1196                    if let Some(start_pos) = link_stack.pop() {
1197                        link_ranges.push((start_pos, range.end));
1198                    }
1199                }
1200                _ => {}
1201            }
1202        }
1203
1204        link_ranges
1205    }
1206
1207    /// Parse all links in the content
1208    fn parse_links(
1209        content: &'a str,
1210        lines: &[LineInfo],
1211        code_blocks: &[(usize, usize)],
1212        code_spans: &[CodeSpan],
1213        flavor: MarkdownFlavor,
1214        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1215    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1216        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1217        use std::collections::HashSet;
1218
1219        let mut links = Vec::with_capacity(content.len() / 500);
1220        let mut broken_links = Vec::new();
1221        let mut footnote_refs = Vec::new();
1222
1223        // Track byte positions of links found by pulldown-cmark
1224        let mut found_positions = HashSet::new();
1225
1226        // Use pulldown-cmark's streaming parser with BrokenLink callback
1227        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1228        // This automatically handles:
1229        // - Escaped links (won't generate events)
1230        // - Links in code blocks/spans (won't generate Link events)
1231        // - Images (generates Tag::Image instead)
1232        // - Reference resolution (dest_url is already resolved!)
1233        // - Broken references (callback is invoked)
1234        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1235        let mut options = Options::empty();
1236        options.insert(Options::ENABLE_WIKILINKS);
1237        options.insert(Options::ENABLE_FOOTNOTES);
1238
1239        let parser = Parser::new_with_broken_link_callback(
1240            content,
1241            options,
1242            Some(|link: BrokenLink<'_>| {
1243                broken_links.push(BrokenLinkInfo {
1244                    reference: link.reference.to_string(),
1245                    span: link.span.clone(),
1246                });
1247                None
1248            }),
1249        )
1250        .into_offset_iter();
1251
1252        let mut link_stack: Vec<(
1253            usize,
1254            usize,
1255            pulldown_cmark::CowStr<'a>,
1256            LinkType,
1257            pulldown_cmark::CowStr<'a>,
1258        )> = Vec::new();
1259        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1260
1261        for (event, range) in parser {
1262            match event {
1263                Event::Start(Tag::Link {
1264                    link_type,
1265                    dest_url,
1266                    id,
1267                    ..
1268                }) => {
1269                    // Link start - record position, URL, and reference ID
1270                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1271                    text_chunks.clear();
1272                }
1273                Event::Text(text) if !link_stack.is_empty() => {
1274                    // Track text content with its byte range
1275                    text_chunks.push((text.to_string(), range.start, range.end));
1276                }
1277                Event::Code(code) if !link_stack.is_empty() => {
1278                    // Include inline code in link text (with backticks)
1279                    let code_text = format!("`{code}`");
1280                    text_chunks.push((code_text, range.start, range.end));
1281                }
1282                Event::End(TagEnd::Link) => {
1283                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1284                        // Skip if in HTML comment
1285                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1286                            text_chunks.clear();
1287                            continue;
1288                        }
1289
1290                        // Find line and column information
1291                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1292
1293                        // Skip if this link is on a MkDocs snippet line
1294                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1295                            text_chunks.clear();
1296                            continue;
1297                        }
1298
1299                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1300
1301                        let is_reference = matches!(
1302                            link_type,
1303                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1304                        );
1305
1306                        // Extract link text directly from source bytes to preserve escaping
1307                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1308                        let link_text = if start_pos < content.len() {
1309                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1310
1311                            // Find MATCHING ] by tracking bracket depth for nested brackets
1312                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1313                            // Brackets inside code spans (between backticks) should be ignored
1314                            let mut close_pos = None;
1315                            let mut depth = 0;
1316                            let mut in_code_span = false;
1317
1318                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1319                                // Count preceding backslashes
1320                                let mut backslash_count = 0;
1321                                let mut j = i;
1322                                while j > 0 && link_bytes[j - 1] == b'\\' {
1323                                    backslash_count += 1;
1324                                    j -= 1;
1325                                }
1326                                let is_escaped = backslash_count % 2 != 0;
1327
1328                                // Track code spans - backticks toggle in/out of code
1329                                if byte == b'`' && !is_escaped {
1330                                    in_code_span = !in_code_span;
1331                                }
1332
1333                                // Only count brackets when NOT in a code span
1334                                if !is_escaped && !in_code_span {
1335                                    if byte == b'[' {
1336                                        depth += 1;
1337                                    } else if byte == b']' {
1338                                        if depth == 0 {
1339                                            // Found the matching closing bracket
1340                                            close_pos = Some(i);
1341                                            break;
1342                                        } else {
1343                                            depth -= 1;
1344                                        }
1345                                    }
1346                                }
1347                            }
1348
1349                            if let Some(pos) = close_pos {
1350                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1351                            } else {
1352                                Cow::Borrowed("")
1353                            }
1354                        } else {
1355                            Cow::Borrowed("")
1356                        };
1357
1358                        // For reference links, use the actual reference ID from pulldown-cmark
1359                        let reference_id = if is_reference && !ref_id.is_empty() {
1360                            Some(Cow::Owned(ref_id.to_lowercase()))
1361                        } else if is_reference {
1362                            // For collapsed/shortcut references without explicit ID, use the link text
1363                            Some(Cow::Owned(link_text.to_lowercase()))
1364                        } else {
1365                            None
1366                        };
1367
1368                        // WORKAROUND: pulldown-cmark bug with escaped brackets
1369                        // Check for escaped image syntax: \![text](url)
1370                        // The byte_offset points to the '[', so we check 2 bytes back for '\!'
1371                        let has_escaped_bang = start_pos >= 2
1372                            && content.as_bytes().get(start_pos - 2) == Some(&b'\\')
1373                            && content.as_bytes().get(start_pos - 1) == Some(&b'!');
1374
1375                        // Check for escaped bracket: \[text](url)
1376                        // The byte_offset points to the '[', so we check 1 byte back for '\'
1377                        let has_escaped_bracket =
1378                            start_pos >= 1 && content.as_bytes().get(start_pos - 1) == Some(&b'\\');
1379
1380                        if has_escaped_bang || has_escaped_bracket {
1381                            text_chunks.clear();
1382                            continue; // Skip: this is escaped markdown, not a real link
1383                        }
1384
1385                        // Track this position as found
1386                        found_positions.insert(start_pos);
1387
1388                        links.push(ParsedLink {
1389                            line: line_num,
1390                            start_col: col_start,
1391                            end_col: col_end,
1392                            byte_offset: start_pos,
1393                            byte_end: range.end,
1394                            text: link_text,
1395                            url: Cow::Owned(url.to_string()),
1396                            is_reference,
1397                            reference_id,
1398                            link_type,
1399                        });
1400
1401                        text_chunks.clear();
1402                    }
1403                }
1404                Event::FootnoteReference(footnote_id) => {
1405                    // Capture footnote references like [^1], [^note]
1406                    // Skip if in HTML comment
1407                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1408                        continue;
1409                    }
1410
1411                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1412                    footnote_refs.push(FootnoteRef {
1413                        id: footnote_id.to_string(),
1414                        line: line_num,
1415                        byte_offset: range.start,
1416                        byte_end: range.end,
1417                    });
1418                }
1419                _ => {}
1420            }
1421        }
1422
1423        // Also find undefined references using regex
1424        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1425        // because the reference is undefined
1426        for cap in LINK_PATTERN.captures_iter(content) {
1427            let full_match = cap.get(0).unwrap();
1428            let match_start = full_match.start();
1429            let match_end = full_match.end();
1430
1431            // Skip if this was already found by pulldown-cmark (it's a valid link)
1432            if found_positions.contains(&match_start) {
1433                continue;
1434            }
1435
1436            // Skip if escaped
1437            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1438                continue;
1439            }
1440
1441            // Skip if it's an image
1442            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1443                continue;
1444            }
1445
1446            // Skip if in code block
1447            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1448                continue;
1449            }
1450
1451            // Skip if in code span
1452            if Self::is_offset_in_code_span(code_spans, match_start) {
1453                continue;
1454            }
1455
1456            // Skip if in HTML comment
1457            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1458                continue;
1459            }
1460
1461            // Find line and column information
1462            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1463
1464            // Skip if this link is on a MkDocs snippet line
1465            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1466                continue;
1467            }
1468
1469            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1470
1471            let text = cap.get(1).map_or("", |m| m.as_str());
1472
1473            // Only process reference links (group 6)
1474            if let Some(ref_id) = cap.get(6) {
1475                let ref_id_str = ref_id.as_str();
1476                let normalized_ref = if ref_id_str.is_empty() {
1477                    Cow::Owned(text.to_lowercase()) // Implicit reference
1478                } else {
1479                    Cow::Owned(ref_id_str.to_lowercase())
1480                };
1481
1482                // This is an undefined reference (pulldown-cmark didn't parse it)
1483                links.push(ParsedLink {
1484                    line: line_num,
1485                    start_col: col_start,
1486                    end_col: col_end,
1487                    byte_offset: match_start,
1488                    byte_end: match_end,
1489                    text: Cow::Borrowed(text),
1490                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1491                    is_reference: true,
1492                    reference_id: Some(normalized_ref),
1493                    link_type: LinkType::Reference, // Undefined references are reference-style
1494                });
1495            }
1496        }
1497
1498        (links, broken_links, footnote_refs)
1499    }
1500
1501    /// Parse all images in the content
1502    fn parse_images(
1503        content: &'a str,
1504        lines: &[LineInfo],
1505        code_blocks: &[(usize, usize)],
1506        code_spans: &[CodeSpan],
1507        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1508    ) -> Vec<ParsedImage<'a>> {
1509        use crate::utils::skip_context::is_in_html_comment_ranges;
1510        use std::collections::HashSet;
1511
1512        // Pre-size based on a heuristic: images are less common than links
1513        let mut images = Vec::with_capacity(content.len() / 1000);
1514        let mut found_positions = HashSet::new();
1515
1516        // Use pulldown-cmark for parsing - more accurate and faster
1517        let parser = Parser::new(content).into_offset_iter();
1518        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1519            Vec::new();
1520        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1521
1522        for (event, range) in parser {
1523            match event {
1524                Event::Start(Tag::Image {
1525                    link_type,
1526                    dest_url,
1527                    id,
1528                    ..
1529                }) => {
1530                    image_stack.push((range.start, dest_url, link_type, id));
1531                    text_chunks.clear();
1532                }
1533                Event::Text(text) if !image_stack.is_empty() => {
1534                    text_chunks.push((text.to_string(), range.start, range.end));
1535                }
1536                Event::Code(code) if !image_stack.is_empty() => {
1537                    let code_text = format!("`{code}`");
1538                    text_chunks.push((code_text, range.start, range.end));
1539                }
1540                Event::End(TagEnd::Image) => {
1541                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1542                        // Skip if in code block
1543                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1544                            continue;
1545                        }
1546
1547                        // Skip if in code span
1548                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1549                            continue;
1550                        }
1551
1552                        // Skip if in HTML comment
1553                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1554                            continue;
1555                        }
1556
1557                        // Find line and column using binary search
1558                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1559                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1560
1561                        let is_reference = matches!(
1562                            link_type,
1563                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1564                        );
1565
1566                        // Extract alt text directly from source bytes to preserve escaping
1567                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1568                        let alt_text = if start_pos < content.len() {
1569                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1570
1571                            // Find MATCHING ] by tracking bracket depth for nested brackets
1572                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1573                            let mut close_pos = None;
1574                            let mut depth = 0;
1575
1576                            if image_bytes.len() > 2 {
1577                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1578                                    // Count preceding backslashes
1579                                    let mut backslash_count = 0;
1580                                    let mut j = i;
1581                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1582                                        backslash_count += 1;
1583                                        j -= 1;
1584                                    }
1585                                    let is_escaped = backslash_count % 2 != 0;
1586
1587                                    if !is_escaped {
1588                                        if byte == b'[' {
1589                                            depth += 1;
1590                                        } else if byte == b']' {
1591                                            if depth == 0 {
1592                                                // Found the matching closing bracket
1593                                                close_pos = Some(i);
1594                                                break;
1595                                            } else {
1596                                                depth -= 1;
1597                                            }
1598                                        }
1599                                    }
1600                                }
1601                            }
1602
1603                            if let Some(pos) = close_pos {
1604                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1605                            } else {
1606                                Cow::Borrowed("")
1607                            }
1608                        } else {
1609                            Cow::Borrowed("")
1610                        };
1611
1612                        let reference_id = if is_reference && !ref_id.is_empty() {
1613                            Some(Cow::Owned(ref_id.to_lowercase()))
1614                        } else if is_reference {
1615                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1616                        } else {
1617                            None
1618                        };
1619
1620                        found_positions.insert(start_pos);
1621                        images.push(ParsedImage {
1622                            line: line_num,
1623                            start_col: col_start,
1624                            end_col: col_end,
1625                            byte_offset: start_pos,
1626                            byte_end: range.end,
1627                            alt_text,
1628                            url: Cow::Owned(url.to_string()),
1629                            is_reference,
1630                            reference_id,
1631                            link_type,
1632                        });
1633                    }
1634                }
1635                _ => {}
1636            }
1637        }
1638
1639        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1640        for cap in IMAGE_PATTERN.captures_iter(content) {
1641            let full_match = cap.get(0).unwrap();
1642            let match_start = full_match.start();
1643            let match_end = full_match.end();
1644
1645            // Skip if already found by pulldown-cmark
1646            if found_positions.contains(&match_start) {
1647                continue;
1648            }
1649
1650            // Skip if the ! is escaped
1651            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652                continue;
1653            }
1654
1655            // Skip if in code block, code span, or HTML comment
1656            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1657                || Self::is_offset_in_code_span(code_spans, match_start)
1658                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1659            {
1660                continue;
1661            }
1662
1663            // Only process reference images (undefined references not found by pulldown-cmark)
1664            if let Some(ref_id) = cap.get(6) {
1665                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1666                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1667                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1668                let ref_id_str = ref_id.as_str();
1669                let normalized_ref = if ref_id_str.is_empty() {
1670                    Cow::Owned(alt_text.to_lowercase())
1671                } else {
1672                    Cow::Owned(ref_id_str.to_lowercase())
1673                };
1674
1675                images.push(ParsedImage {
1676                    line: line_num,
1677                    start_col: col_start,
1678                    end_col: col_end,
1679                    byte_offset: match_start,
1680                    byte_end: match_end,
1681                    alt_text: Cow::Borrowed(alt_text),
1682                    url: Cow::Borrowed(""),
1683                    is_reference: true,
1684                    reference_id: Some(normalized_ref),
1685                    link_type: LinkType::Reference, // Undefined references are reference-style
1686                });
1687            }
1688        }
1689
1690        images
1691    }
1692
1693    /// Parse reference definitions
1694    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1695        // Pre-size based on lines count as reference definitions are line-based
1696        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1697
1698        for (line_idx, line_info) in lines.iter().enumerate() {
1699            // Skip lines in code blocks
1700            if line_info.in_code_block {
1701                continue;
1702            }
1703
1704            let line = line_info.content(content);
1705            let line_num = line_idx + 1;
1706
1707            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1708                let id = cap.get(1).unwrap().as_str().to_lowercase();
1709                let url = cap.get(2).unwrap().as_str().to_string();
1710                let title_match = cap.get(3).or_else(|| cap.get(4));
1711                let title = title_match.map(|m| m.as_str().to_string());
1712
1713                // Calculate byte positions
1714                // The match starts at the beginning of the line (0) and extends to the end
1715                let match_obj = cap.get(0).unwrap();
1716                let byte_offset = line_info.byte_offset + match_obj.start();
1717                let byte_end = line_info.byte_offset + match_obj.end();
1718
1719                // Calculate title byte positions (includes the quote character before content)
1720                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1721                    // The match is the content inside quotes, so we include the quote before
1722                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1723                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1724                    (Some(start), Some(end))
1725                } else {
1726                    (None, None)
1727                };
1728
1729                refs.push(ReferenceDef {
1730                    line: line_num,
1731                    id,
1732                    url,
1733                    title,
1734                    byte_offset,
1735                    byte_end,
1736                    title_byte_start,
1737                    title_byte_end,
1738                });
1739            }
1740        }
1741
1742        refs
1743    }
1744
1745    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1746    /// Handles nested blockquotes like `> > > content`
1747    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1748    #[inline]
1749    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1750        let trimmed_start = line.trim_start();
1751        if !trimmed_start.starts_with('>') {
1752            return None;
1753        }
1754
1755        // Track total prefix length to handle nested blockquotes
1756        let mut remaining = line;
1757        let mut total_prefix_len = 0;
1758
1759        loop {
1760            let trimmed = remaining.trim_start();
1761            if !trimmed.starts_with('>') {
1762                break;
1763            }
1764
1765            // Add leading whitespace + '>' to prefix
1766            let leading_ws_len = remaining.len() - trimmed.len();
1767            total_prefix_len += leading_ws_len + 1;
1768
1769            let after_gt = &trimmed[1..];
1770
1771            // Handle optional whitespace after '>' (space or tab)
1772            if let Some(stripped) = after_gt.strip_prefix(' ') {
1773                total_prefix_len += 1;
1774                remaining = stripped;
1775            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1776                total_prefix_len += 1;
1777                remaining = stripped;
1778            } else {
1779                remaining = after_gt;
1780            }
1781        }
1782
1783        Some((&line[..total_prefix_len], remaining))
1784    }
1785
1786    /// Fast unordered list parser - replaces regex for 5-10x speedup
1787    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1788    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1789    #[inline]
1790    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1791        let bytes = line.as_bytes();
1792        let mut i = 0;
1793
1794        // Skip leading whitespace
1795        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1796            i += 1;
1797        }
1798
1799        // Check for marker
1800        if i >= bytes.len() {
1801            return None;
1802        }
1803        let marker = bytes[i] as char;
1804        if marker != '-' && marker != '*' && marker != '+' {
1805            return None;
1806        }
1807        let marker_pos = i;
1808        i += 1;
1809
1810        // Collect spacing after marker (space or tab only)
1811        let spacing_start = i;
1812        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1813            i += 1;
1814        }
1815
1816        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1817    }
1818
1819    /// Fast ordered list parser - replaces regex for 5-10x speedup
1820    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
1821    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
1822    #[inline]
1823    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1824        let bytes = line.as_bytes();
1825        let mut i = 0;
1826
1827        // Skip leading whitespace
1828        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1829            i += 1;
1830        }
1831
1832        // Collect digits
1833        let number_start = i;
1834        while i < bytes.len() && bytes[i].is_ascii_digit() {
1835            i += 1;
1836        }
1837        if i == number_start {
1838            return None; // No digits found
1839        }
1840
1841        // Check for delimiter
1842        if i >= bytes.len() {
1843            return None;
1844        }
1845        let delimiter = bytes[i] as char;
1846        if delimiter != '.' && delimiter != ')' {
1847            return None;
1848        }
1849        let delimiter_pos = i;
1850        i += 1;
1851
1852        // Collect spacing after delimiter (space or tab only)
1853        let spacing_start = i;
1854        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1855            i += 1;
1856        }
1857
1858        Some((
1859            &line[..number_start],
1860            &line[number_start..delimiter_pos],
1861            delimiter,
1862            &line[spacing_start..i],
1863            &line[i..],
1864        ))
1865    }
1866
1867    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
1868    /// Returns a Vec<bool> where index i indicates if line i is in a code block
1869    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1870        let num_lines = line_offsets.len();
1871        let mut in_code_block = vec![false; num_lines];
1872
1873        // For each code block, mark all lines within it
1874        for &(start, end) in code_blocks {
1875            // Ensure we're at valid UTF-8 boundaries
1876            let safe_start = if start > 0 && !content.is_char_boundary(start) {
1877                let mut boundary = start;
1878                while boundary > 0 && !content.is_char_boundary(boundary) {
1879                    boundary -= 1;
1880                }
1881                boundary
1882            } else {
1883                start
1884            };
1885
1886            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1887                let mut boundary = end;
1888                while boundary < content.len() && !content.is_char_boundary(boundary) {
1889                    boundary += 1;
1890                }
1891                boundary
1892            } else {
1893                end.min(content.len())
1894            };
1895
1896            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
1897            // That function now has proper list context awareness (see code_block_utils.rs)
1898            // and correctly distinguishes between:
1899            // - Fenced code blocks (``` or ~~~)
1900            // - Indented code blocks at document level (4 spaces + blank line before)
1901            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
1902            //
1903            // We no longer need to re-validate here. The original validation logic
1904            // was causing false positives by marking list continuation paragraphs as
1905            // code blocks when they have 4 spaces of indentation.
1906
1907            // Use binary search to find the first and last line indices
1908            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
1909            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
1910            //
1911            // Find the line that CONTAINS safe_start: the line with the largest
1912            // start offset that is <= safe_start. partition_point gives us the
1913            // first line that starts AFTER safe_start, so we subtract 1.
1914            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1915            let first_line = first_line_after.saturating_sub(1);
1916            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1917
1918            // Mark all lines in the range at once
1919            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1920                *flag = true;
1921            }
1922        }
1923
1924        in_code_block
1925    }
1926
1927    /// Pre-compute basic line information (without headings/blockquotes)
1928    fn compute_basic_line_info(
1929        content: &str,
1930        line_offsets: &[usize],
1931        code_blocks: &[(usize, usize)],
1932        flavor: MarkdownFlavor,
1933        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1934        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1935    ) -> Vec<LineInfo> {
1936        let content_lines: Vec<&str> = content.lines().collect();
1937        let mut lines = Vec::with_capacity(content_lines.len());
1938
1939        // Pre-compute which lines are in code blocks
1940        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1941
1942        // Detect front matter boundaries FIRST, before any other parsing
1943        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
1944        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1945
1946        for (i, line) in content_lines.iter().enumerate() {
1947            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1948            let indent = line.len() - line.trim_start().len();
1949
1950            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
1951            let blockquote_parse = Self::parse_blockquote_prefix(line);
1952
1953            // For blank detection, consider blockquote context
1954            let is_blank = if let Some((_, content)) = blockquote_parse {
1955                // In blockquote context, check if content after prefix is blank
1956                content.trim().is_empty()
1957            } else {
1958                line.trim().is_empty()
1959            };
1960
1961            // Use pre-computed map for O(1) lookup instead of O(m) iteration
1962            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1963
1964            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
1965            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1966                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1967            // Check if the ENTIRE line is within an HTML comment (not just the line start)
1968            // This ensures content after `-->` on the same line is not incorrectly skipped
1969            let line_end_offset = byte_offset + line.len();
1970            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1971                html_comment_ranges,
1972                byte_offset,
1973                line_end_offset,
1974            );
1975            let list_item = if !(in_code_block
1976                || is_blank
1977                || in_mkdocstrings
1978                || in_html_comment
1979                || (front_matter_end > 0 && i < front_matter_end))
1980            {
1981                // Strip blockquote prefix if present for list detection (reuse cached result)
1982                let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1983                    (content, prefix.len())
1984                } else {
1985                    (&**line, 0)
1986                };
1987
1988                if let Some((leading_spaces, marker, spacing, _content)) =
1989                    Self::parse_unordered_list(line_for_list_check)
1990                {
1991                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1992                    let content_column = marker_column + 1 + spacing.len();
1993
1994                    // According to CommonMark spec, unordered list items MUST have at least one space
1995                    // after the marker (-, *, or +). Without a space, it's not a list item.
1996                    // This also naturally handles cases like:
1997                    // - *emphasis* (not a list)
1998                    // - **bold** (not a list)
1999                    // - --- (horizontal rule, not a list)
2000                    if spacing.is_empty() {
2001                        None
2002                    } else {
2003                        Some(ListItemInfo {
2004                            marker: marker.to_string(),
2005                            is_ordered: false,
2006                            number: None,
2007                            marker_column,
2008                            content_column,
2009                        })
2010                    }
2011                } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2012                    Self::parse_ordered_list(line_for_list_check)
2013                {
2014                    let marker = format!("{number_str}{delimiter}");
2015                    let marker_column = blockquote_prefix_len + leading_spaces.len();
2016                    let content_column = marker_column + marker.len() + spacing.len();
2017
2018                    // According to CommonMark spec, ordered list items MUST have at least one space
2019                    // after the marker (period or parenthesis). Without a space, it's not a list item.
2020                    if spacing.is_empty() {
2021                        None
2022                    } else {
2023                        Some(ListItemInfo {
2024                            marker,
2025                            is_ordered: true,
2026                            number: number_str.parse().ok(),
2027                            marker_column,
2028                            content_column,
2029                        })
2030                    }
2031                } else {
2032                    None
2033                }
2034            } else {
2035                None
2036            };
2037
2038            lines.push(LineInfo {
2039                byte_offset,
2040                byte_len: line.len(),
2041                indent,
2042                is_blank,
2043                in_code_block,
2044                in_front_matter: front_matter_end > 0 && i < front_matter_end,
2045                in_html_block: false, // Will be populated after line creation
2046                in_html_comment,
2047                list_item,
2048                heading: None,    // Will be populated in second pass for Setext headings
2049                blockquote: None, // Will be populated after line creation
2050                in_mkdocstrings,
2051                in_esm_block: false, // Will be populated after line creation for MDX files
2052                in_code_span_continuation: false, // Will be populated after code spans are parsed
2053            });
2054        }
2055
2056        lines
2057    }
2058
2059    /// Detect headings and blockquotes (called after HTML block detection)
2060    fn detect_headings_and_blockquotes(
2061        content: &str,
2062        lines: &mut [LineInfo],
2063        flavor: MarkdownFlavor,
2064        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2065        link_byte_ranges: &[(usize, usize)],
2066    ) {
2067        // Regex for heading detection
2068        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2069            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2070        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2071            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2072
2073        let content_lines: Vec<&str> = content.lines().collect();
2074
2075        // Detect front matter boundaries to skip those lines
2076        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2077
2078        // Detect headings (including Setext which needs look-ahead) and blockquotes
2079        for i in 0..lines.len() {
2080            if lines[i].in_code_block {
2081                continue;
2082            }
2083
2084            // Skip lines in front matter
2085            if front_matter_end > 0 && i < front_matter_end {
2086                continue;
2087            }
2088
2089            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2090            if lines[i].in_html_block {
2091                continue;
2092            }
2093
2094            let line = content_lines[i];
2095
2096            // Check for blockquotes (even on blank lines within blockquotes)
2097            if let Some(bq) = parse_blockquote_detailed(line) {
2098                let nesting_level = bq.markers.len(); // Each '>' is one level
2099                let marker_column = bq.indent.len();
2100
2101                // Build the prefix (indentation + markers + space)
2102                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2103
2104                // Check for various blockquote issues
2105                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2106                // Only flag multiple literal spaces, not tabs
2107                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
2108                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2109
2110                // Check if needs MD028 fix (empty blockquote line without proper spacing)
2111                // MD028 flags empty blockquote lines that don't have a single space after the marker
2112                // Lines like "> " or ">> " are already correct and don't need fixing
2113                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2114
2115                lines[i].blockquote = Some(BlockquoteInfo {
2116                    nesting_level,
2117                    indent: bq.indent.to_string(),
2118                    marker_column,
2119                    prefix,
2120                    content: bq.content.to_string(),
2121                    has_no_space_after_marker: has_no_space,
2122                    has_multiple_spaces_after_marker: has_multiple_spaces,
2123                    needs_md028_fix,
2124                });
2125            }
2126
2127            // Skip heading detection for blank lines
2128            if lines[i].is_blank {
2129                continue;
2130            }
2131
2132            // Check for ATX headings (but skip MkDocs snippet lines)
2133            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2134            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2135                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2136                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2137            } else {
2138                false
2139            };
2140
2141            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2142                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2143                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2144                    continue;
2145                }
2146                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2147                // This prevents false positives where `#fragment` is detected as a heading
2148                let line_offset = lines[i].byte_offset;
2149                if link_byte_ranges
2150                    .iter()
2151                    .any(|&(start, end)| line_offset > start && line_offset < end)
2152                {
2153                    continue;
2154                }
2155                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2156                let hashes = caps.get(2).map_or("", |m| m.as_str());
2157                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2158                let rest = caps.get(4).map_or("", |m| m.as_str());
2159
2160                let level = hashes.len() as u8;
2161                let marker_column = leading_spaces.len();
2162
2163                // Check for closing sequence, but handle custom IDs that might come after
2164                let (text, has_closing, closing_seq) = {
2165                    // First check if there's a custom ID at the end
2166                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2167                        // Check if this looks like a valid custom ID (ends with })
2168                        if rest[id_start..].trim_end().ends_with('}') {
2169                            // Split off the custom ID
2170                            (&rest[..id_start], &rest[id_start..])
2171                        } else {
2172                            (rest, "")
2173                        }
2174                    } else {
2175                        (rest, "")
2176                    };
2177
2178                    // Now look for closing hashes in the part before the custom ID
2179                    let trimmed_rest = rest_without_id.trim_end();
2180                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2181                        // Find the start of the hash sequence by walking backwards
2182                        // Use char_indices to get byte positions at char boundaries
2183                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2184
2185                        // Find which char index corresponds to last_hash_byte_pos
2186                        let last_hash_char_idx = char_positions
2187                            .iter()
2188                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2189
2190                        if let Some(mut char_idx) = last_hash_char_idx {
2191                            // Walk backwards to find start of hash sequence
2192                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2193                                char_idx -= 1;
2194                            }
2195
2196                            // Get the byte position of the start of hashes
2197                            let start_of_hashes = char_positions[char_idx].0;
2198
2199                            // Check if there's at least one space before the closing hashes
2200                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2201
2202                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2203                            let potential_closing = &trimmed_rest[start_of_hashes..];
2204                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2205
2206                            if is_all_hashes && has_space_before {
2207                                // This is a closing sequence
2208                                let closing_hashes = potential_closing.to_string();
2209                                // The text is everything before the closing hashes
2210                                // Don't include the custom ID here - it will be extracted later
2211                                let text_part = if !custom_id_part.is_empty() {
2212                                    // If we have a custom ID, append it back to get the full rest
2213                                    // This allows the extract_header_id function to handle it properly
2214                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2215                                } else {
2216                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2217                                };
2218                                (text_part, true, closing_hashes)
2219                            } else {
2220                                // Not a valid closing sequence, return the full content
2221                                (rest.to_string(), false, String::new())
2222                            }
2223                        } else {
2224                            // Couldn't find char boundary, return the full content
2225                            (rest.to_string(), false, String::new())
2226                        }
2227                    } else {
2228                        // No hashes found, return the full content
2229                        (rest.to_string(), false, String::new())
2230                    }
2231                };
2232
2233                let content_column = marker_column + hashes.len() + spaces_after.len();
2234
2235                // Extract custom header ID if present
2236                let raw_text = text.trim().to_string();
2237                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2238
2239                // If no custom ID was found on the header line, check the next line for standalone attr-list
2240                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2241                    let next_line = content_lines[i + 1];
2242                    if !lines[i + 1].in_code_block
2243                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2244                        && let Some(next_line_id) =
2245                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2246                    {
2247                        custom_id = Some(next_line_id);
2248                    }
2249                }
2250
2251                // ATX heading is "valid" for processing by heading rules if:
2252                // 1. Has space after # (CommonMark compliant): `# Heading`
2253                // 2. Is empty (just hashes): `#`
2254                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2255                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2256                //
2257                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2258                // - `#tag` - single # with lowercase (social hashtag)
2259                // - `#123` - single # with number (GitHub issue ref)
2260                let is_valid = !spaces_after.is_empty()
2261                    || rest.is_empty()
2262                    || level > 1
2263                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2264
2265                lines[i].heading = Some(HeadingInfo {
2266                    level,
2267                    style: HeadingStyle::ATX,
2268                    marker: hashes.to_string(),
2269                    marker_column,
2270                    content_column,
2271                    text: clean_text,
2272                    custom_id,
2273                    raw_text,
2274                    has_closing_sequence: has_closing,
2275                    closing_sequence: closing_seq,
2276                    is_valid,
2277                });
2278            }
2279            // Check for Setext headings (need to look at next line)
2280            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2281                let next_line = content_lines[i + 1];
2282                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2283                    // Skip if next line is front matter delimiter
2284                    if front_matter_end > 0 && i < front_matter_end {
2285                        continue;
2286                    }
2287
2288                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2289                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2290                    {
2291                        continue;
2292                    }
2293
2294                    let underline = next_line.trim();
2295
2296                    let level = if underline.starts_with('=') { 1 } else { 2 };
2297                    let style = if level == 1 {
2298                        HeadingStyle::Setext1
2299                    } else {
2300                        HeadingStyle::Setext2
2301                    };
2302
2303                    // Extract custom header ID if present
2304                    let raw_text = line.trim().to_string();
2305                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2306
2307                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2308                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2309                        let attr_line = content_lines[i + 2];
2310                        if !lines[i + 2].in_code_block
2311                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2312                            && let Some(attr_line_id) =
2313                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2314                        {
2315                            custom_id = Some(attr_line_id);
2316                        }
2317                    }
2318
2319                    lines[i].heading = Some(HeadingInfo {
2320                        level,
2321                        style,
2322                        marker: underline.to_string(),
2323                        marker_column: next_line.len() - next_line.trim_start().len(),
2324                        content_column: lines[i].indent,
2325                        text: clean_text,
2326                        custom_id,
2327                        raw_text,
2328                        has_closing_sequence: false,
2329                        closing_sequence: String::new(),
2330                        is_valid: true, // Setext headings are always valid
2331                    });
2332                }
2333            }
2334        }
2335    }
2336
2337    /// Detect HTML blocks in the content
2338    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2339        // HTML block elements that trigger block context
2340        // Includes HTML5 media, embedded content, and interactive elements
2341        const BLOCK_ELEMENTS: &[&str] = &[
2342            "address",
2343            "article",
2344            "aside",
2345            "audio",
2346            "blockquote",
2347            "canvas",
2348            "details",
2349            "dialog",
2350            "dd",
2351            "div",
2352            "dl",
2353            "dt",
2354            "embed",
2355            "fieldset",
2356            "figcaption",
2357            "figure",
2358            "footer",
2359            "form",
2360            "h1",
2361            "h2",
2362            "h3",
2363            "h4",
2364            "h5",
2365            "h6",
2366            "header",
2367            "hr",
2368            "iframe",
2369            "li",
2370            "main",
2371            "menu",
2372            "nav",
2373            "noscript",
2374            "object",
2375            "ol",
2376            "p",
2377            "picture",
2378            "pre",
2379            "script",
2380            "search",
2381            "section",
2382            "source",
2383            "style",
2384            "summary",
2385            "svg",
2386            "table",
2387            "tbody",
2388            "td",
2389            "template",
2390            "textarea",
2391            "tfoot",
2392            "th",
2393            "thead",
2394            "tr",
2395            "track",
2396            "ul",
2397            "video",
2398        ];
2399
2400        let mut i = 0;
2401        while i < lines.len() {
2402            // Skip if already in code block or front matter
2403            if lines[i].in_code_block || lines[i].in_front_matter {
2404                i += 1;
2405                continue;
2406            }
2407
2408            let trimmed = lines[i].content(content).trim_start();
2409
2410            // Check if line starts with an HTML tag
2411            if trimmed.starts_with('<') && trimmed.len() > 1 {
2412                // Extract tag name safely
2413                let after_bracket = &trimmed[1..];
2414                let is_closing = after_bracket.starts_with('/');
2415                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2416
2417                // Extract tag name (stop at space, >, /, or end of string)
2418                let tag_name = tag_start
2419                    .chars()
2420                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2421                    .collect::<String>()
2422                    .to_lowercase();
2423
2424                // Check if it's a block element
2425                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2426                    // Mark this line as in HTML block
2427                    lines[i].in_html_block = true;
2428
2429                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2430                    // This avoids complex nesting logic that might cause infinite loops
2431                    if !is_closing {
2432                        let closing_tag = format!("</{tag_name}>");
2433                        // style and script tags can contain blank lines (CSS/JS formatting)
2434                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2435                        let mut j = i + 1;
2436                        while j < lines.len() && j < i + 100 {
2437                            // Limit search to 100 lines
2438                            // Stop at blank lines (except for style/script tags)
2439                            if !allow_blank_lines && lines[j].is_blank {
2440                                break;
2441                            }
2442
2443                            lines[j].in_html_block = true;
2444
2445                            // Check if this line contains the closing tag
2446                            if lines[j].content(content).contains(&closing_tag) {
2447                                break;
2448                            }
2449                            j += 1;
2450                        }
2451                    }
2452                }
2453            }
2454
2455            i += 1;
2456        }
2457    }
2458
2459    /// Detect ESM import/export blocks in MDX files
2460    /// ESM blocks consist of contiguous import/export statements at the top of the file
2461    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2462        // Only process MDX files
2463        if !flavor.supports_esm_blocks() {
2464            return;
2465        }
2466
2467        let mut in_multiline_comment = false;
2468
2469        for line in lines.iter_mut() {
2470            // Skip blank lines and HTML comments
2471            if line.is_blank || line.in_html_comment {
2472                continue;
2473            }
2474
2475            let trimmed = line.content(content).trim_start();
2476
2477            // Handle continuation of multi-line JS comments
2478            if in_multiline_comment {
2479                if trimmed.contains("*/") {
2480                    in_multiline_comment = false;
2481                }
2482                continue;
2483            }
2484
2485            // Skip single-line JS comments (// and ///)
2486            if trimmed.starts_with("//") {
2487                continue;
2488            }
2489
2490            // Handle start of multi-line JS comment
2491            if trimmed.starts_with("/*") {
2492                if !trimmed.contains("*/") {
2493                    in_multiline_comment = true;
2494                }
2495                continue;
2496            }
2497
2498            // Check if line starts with import or export
2499            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2500                line.in_esm_block = true;
2501            } else {
2502                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2503                break;
2504            }
2505        }
2506    }
2507
2508    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2509    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2510        let mut code_spans = Vec::new();
2511
2512        // Quick check - if no backticks, no code spans
2513        if !content.contains('`') {
2514            return code_spans;
2515        }
2516
2517        // Use pulldown-cmark's streaming parser with byte offsets
2518        let parser = Parser::new(content).into_offset_iter();
2519
2520        for (event, range) in parser {
2521            if let Event::Code(_) = event {
2522                let start_pos = range.start;
2523                let end_pos = range.end;
2524
2525                // The range includes the backticks, extract the actual content
2526                let full_span = &content[start_pos..end_pos];
2527                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2528
2529                // Extract content between backticks, preserving spaces
2530                let content_start = start_pos + backtick_count;
2531                let content_end = end_pos - backtick_count;
2532                let span_content = if content_start < content_end {
2533                    content[content_start..content_end].to_string()
2534                } else {
2535                    String::new()
2536                };
2537
2538                // Use binary search to find line number - O(log n) instead of O(n)
2539                // Find the rightmost line whose byte_offset <= start_pos
2540                let line_idx = lines
2541                    .partition_point(|line| line.byte_offset <= start_pos)
2542                    .saturating_sub(1);
2543                let line_num = line_idx + 1;
2544                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2545
2546                // Find end column using binary search
2547                let end_line_idx = lines
2548                    .partition_point(|line| line.byte_offset <= end_pos)
2549                    .saturating_sub(1);
2550                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2551
2552                // Convert byte offsets to character positions for correct Unicode handling
2553                // This ensures consistency with warning.column which uses character positions
2554                let line_content = lines[line_idx].content(content);
2555                let col_start = if byte_col_start <= line_content.len() {
2556                    line_content[..byte_col_start].chars().count()
2557                } else {
2558                    line_content.chars().count()
2559                };
2560
2561                let end_line_content = lines[end_line_idx].content(content);
2562                let col_end = if byte_col_end <= end_line_content.len() {
2563                    end_line_content[..byte_col_end].chars().count()
2564                } else {
2565                    end_line_content.chars().count()
2566                };
2567
2568                code_spans.push(CodeSpan {
2569                    line: line_num,
2570                    end_line: end_line_idx + 1,
2571                    start_col: col_start,
2572                    end_col: col_end,
2573                    byte_offset: start_pos,
2574                    byte_end: end_pos,
2575                    backtick_count,
2576                    content: span_content,
2577                });
2578            }
2579        }
2580
2581        // Sort by position to ensure consistent ordering
2582        code_spans.sort_by_key(|span| span.byte_offset);
2583
2584        code_spans
2585    }
2586
2587    /// Parse all list blocks in the content (legacy line-by-line approach)
2588    ///
2589    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2590    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
2591    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
2592    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
2593    ///   treated as list continuation (based on the list marker width)
2594    ///
2595    /// When a new list item is encountered, we check if list-breaking content was seen
2596    /// since the last item. If so, we start a new list block.
2597    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2598        // Minimum indentation for unordered list continuation per CommonMark spec
2599        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2600
2601        /// Initialize or reset the forward-scanning tracking state.
2602        /// This helper eliminates code duplication across three initialization sites.
2603        #[inline]
2604        fn reset_tracking_state(
2605            list_item: &ListItemInfo,
2606            has_list_breaking_content: &mut bool,
2607            min_continuation: &mut usize,
2608        ) {
2609            *has_list_breaking_content = false;
2610            let marker_width = if list_item.is_ordered {
2611                list_item.marker.len() + 1 // Ordered markers need space after period/paren
2612            } else {
2613                list_item.marker.len()
2614            };
2615            *min_continuation = if list_item.is_ordered {
2616                marker_width
2617            } else {
2618                UNORDERED_LIST_MIN_CONTINUATION_INDENT
2619            };
2620        }
2621
2622        // Pre-size based on lines that could be list items
2623        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
2624        let mut current_block: Option<ListBlock> = None;
2625        let mut last_list_item_line = 0;
2626        let mut current_indent_level = 0;
2627        let mut last_marker_width = 0;
2628
2629        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
2630        let mut has_list_breaking_content_since_last_item = false;
2631        let mut min_continuation_for_tracking = 0;
2632
2633        for (line_idx, line_info) in lines.iter().enumerate() {
2634            let line_num = line_idx + 1;
2635
2636            // Enhanced code block handling using Design #3's context analysis
2637            if line_info.in_code_block {
2638                if let Some(ref mut block) = current_block {
2639                    // Calculate minimum indentation for list continuation
2640                    let min_continuation_indent =
2641                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2642
2643                    // Analyze code block context using the three-tier classification
2644                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2645
2646                    match context {
2647                        CodeBlockContext::Indented => {
2648                            // Code block is properly indented - continues the list
2649                            block.end_line = line_num;
2650                            continue;
2651                        }
2652                        CodeBlockContext::Standalone => {
2653                            // Code block separates lists - end current block
2654                            let completed_block = current_block.take().unwrap();
2655                            list_blocks.push(completed_block);
2656                            continue;
2657                        }
2658                        CodeBlockContext::Adjacent => {
2659                            // Edge case - use conservative behavior (continue list)
2660                            block.end_line = line_num;
2661                            continue;
2662                        }
2663                    }
2664                } else {
2665                    // No current list block - skip code block lines
2666                    continue;
2667                }
2668            }
2669
2670            // Extract blockquote prefix if any
2671            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2672                caps.get(0).unwrap().as_str().to_string()
2673            } else {
2674                String::new()
2675            };
2676
2677            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
2678            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
2679            if current_block.is_some()
2680                && line_info.list_item.is_none()
2681                && !line_info.is_blank
2682                && !line_info.in_code_span_continuation
2683            {
2684                let line_content = line_info.content(content).trim();
2685
2686                // Check for structural separators that break lists
2687                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
2688                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
2689                // as they indicate improper indentation rather than lazy continuation.
2690                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2691                let breaks_list = line_info.heading.is_some()
2692                    || line_content.starts_with("---")
2693                    || line_content.starts_with("***")
2694                    || line_content.starts_with("___")
2695                    || crate::utils::skip_context::is_table_line(line_content)
2696                    || line_content.starts_with(">")
2697                    || (line_info.indent > 0
2698                        && line_info.indent < min_continuation_for_tracking
2699                        && !is_lazy_continuation);
2700
2701                if breaks_list {
2702                    has_list_breaking_content_since_last_item = true;
2703                }
2704            }
2705
2706            // If this line is a code span continuation within an active list block,
2707            // extend the block's end_line to include this line (maintains list continuity)
2708            if line_info.in_code_span_continuation
2709                && line_info.list_item.is_none()
2710                && let Some(ref mut block) = current_block
2711            {
2712                block.end_line = line_num;
2713            }
2714
2715            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
2716            // properly indented lines within the list). This ensures the workaround at line 2448
2717            // works correctly when there are multiple continuation lines before a nested list item.
2718            // Also include lazy continuation lines (indent=0) per CommonMark spec.
2719            let is_valid_continuation =
2720                line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
2721            if !line_info.in_code_span_continuation
2722                && line_info.list_item.is_none()
2723                && !line_info.is_blank
2724                && !line_info.in_code_block
2725                && is_valid_continuation
2726                && let Some(ref mut block) = current_block
2727            {
2728                block.end_line = line_num;
2729            }
2730
2731            // Check if this line is a list item
2732            if let Some(list_item) = &line_info.list_item {
2733                // Calculate nesting level based on indentation
2734                let item_indent = list_item.marker_column;
2735                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
2736
2737                if let Some(ref mut block) = current_block {
2738                    // Check if this continues the current block
2739                    // For nested lists, we need to check if this is a nested item (higher nesting level)
2740                    // or a continuation at the same or lower level
2741                    let is_nested = nesting > block.nesting_level;
2742                    let same_type =
2743                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2744                    let same_context = block.blockquote_prefix == blockquote_prefix;
2745                    // Allow one blank line after last item, or lines immediately after block content
2746                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2747
2748                    // For unordered lists, also check marker consistency
2749                    let marker_compatible =
2750                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2751
2752                    // O(1) check: Use the tracked variable instead of O(n) nested loop
2753                    // This eliminates the quadratic bottleneck from issue #148
2754                    let has_non_list_content = has_list_breaking_content_since_last_item;
2755
2756                    // A list continues if:
2757                    // 1. It's a nested item (indented more than the parent), OR
2758                    // 2. It's the same type at the same level with reasonable distance
2759                    let mut continues_list = if is_nested {
2760                        // Nested items always continue the list if they're in the same context
2761                        same_context && reasonable_distance && !has_non_list_content
2762                    } else {
2763                        // Same-level items need to match type and markers
2764                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2765                    };
2766
2767                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
2768                    // This handles edge cases where content patterns might otherwise split lists incorrectly
2769                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2770                        // Check if the previous line was a list item or a continuation of a list item
2771                        // (including lazy continuation lines)
2772                        if block.item_lines.contains(&(line_num - 1)) {
2773                            // They're consecutive list items - force them to be in the same list
2774                            continues_list = true;
2775                        } else {
2776                            // Previous line is a continuation line within this block
2777                            // (e.g., lazy continuation with indent=0)
2778                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
2779                            continues_list = true;
2780                        }
2781                    }
2782
2783                    if continues_list {
2784                        // Extend current block
2785                        block.end_line = line_num;
2786                        block.item_lines.push(line_num);
2787
2788                        // Update max marker width
2789                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2790                            list_item.marker.len() + 1
2791                        } else {
2792                            list_item.marker.len()
2793                        });
2794
2795                        // Update marker consistency for unordered lists
2796                        if !block.is_ordered
2797                            && block.marker.is_some()
2798                            && block.marker.as_ref() != Some(&list_item.marker)
2799                        {
2800                            // Mixed markers, clear the marker field
2801                            block.marker = None;
2802                        }
2803
2804                        // Reset tracked state for issue #148 optimization
2805                        reset_tracking_state(
2806                            list_item,
2807                            &mut has_list_breaking_content_since_last_item,
2808                            &mut min_continuation_for_tracking,
2809                        );
2810                    } else {
2811                        // End current block and start a new one
2812
2813                        list_blocks.push(block.clone());
2814
2815                        *block = ListBlock {
2816                            start_line: line_num,
2817                            end_line: line_num,
2818                            is_ordered: list_item.is_ordered,
2819                            marker: if list_item.is_ordered {
2820                                None
2821                            } else {
2822                                Some(list_item.marker.clone())
2823                            },
2824                            blockquote_prefix: blockquote_prefix.clone(),
2825                            item_lines: vec![line_num],
2826                            nesting_level: nesting,
2827                            max_marker_width: if list_item.is_ordered {
2828                                list_item.marker.len() + 1
2829                            } else {
2830                                list_item.marker.len()
2831                            },
2832                        };
2833
2834                        // Initialize tracked state for new block (issue #148 optimization)
2835                        reset_tracking_state(
2836                            list_item,
2837                            &mut has_list_breaking_content_since_last_item,
2838                            &mut min_continuation_for_tracking,
2839                        );
2840                    }
2841                } else {
2842                    // Start a new block
2843                    current_block = Some(ListBlock {
2844                        start_line: line_num,
2845                        end_line: line_num,
2846                        is_ordered: list_item.is_ordered,
2847                        marker: if list_item.is_ordered {
2848                            None
2849                        } else {
2850                            Some(list_item.marker.clone())
2851                        },
2852                        blockquote_prefix,
2853                        item_lines: vec![line_num],
2854                        nesting_level: nesting,
2855                        max_marker_width: list_item.marker.len(),
2856                    });
2857
2858                    // Initialize tracked state for new block (issue #148 optimization)
2859                    reset_tracking_state(
2860                        list_item,
2861                        &mut has_list_breaking_content_since_last_item,
2862                        &mut min_continuation_for_tracking,
2863                    );
2864                }
2865
2866                last_list_item_line = line_num;
2867                current_indent_level = item_indent;
2868                last_marker_width = if list_item.is_ordered {
2869                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
2870                } else {
2871                    list_item.marker.len()
2872                };
2873            } else if let Some(ref mut block) = current_block {
2874                // Not a list item - check if it continues the current block
2875
2876                // For MD032 compatibility, we use a simple approach:
2877                // - Indented lines continue the list
2878                // - Blank lines followed by indented content continue the list
2879                // - Everything else ends the list
2880
2881                // Check if the last line in the list block ended with a backslash (hard line break)
2882                // This handles cases where list items use backslash for hard line breaks
2883                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2884                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2885                } else {
2886                    false
2887                };
2888
2889                // Calculate minimum indentation for list continuation
2890                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
2891                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
2892                let min_continuation_indent = if block.is_ordered {
2893                    current_indent_level + last_marker_width
2894                } else {
2895                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
2896                };
2897
2898                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2899                    // Indented line or backslash continuation continues the list
2900                    block.end_line = line_num;
2901                } else if line_info.is_blank {
2902                    // Blank line - check if it's internal to the list or ending it
2903                    // We only include blank lines that are followed by more list content
2904                    let mut check_idx = line_idx + 1;
2905                    let mut found_continuation = false;
2906
2907                    // Skip additional blank lines
2908                    while check_idx < lines.len() && lines[check_idx].is_blank {
2909                        check_idx += 1;
2910                    }
2911
2912                    if check_idx < lines.len() {
2913                        let next_line = &lines[check_idx];
2914                        // Check if followed by indented content (list continuation)
2915                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2916                            found_continuation = true;
2917                        }
2918                        // Check if followed by another list item at the same level
2919                        else if !next_line.in_code_block
2920                            && next_line.list_item.is_some()
2921                            && let Some(item) = &next_line.list_item
2922                        {
2923                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2924                                .find(next_line.content(content))
2925                                .map_or(String::new(), |m| m.as_str().to_string());
2926                            if item.marker_column == current_indent_level
2927                                && item.is_ordered == block.is_ordered
2928                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2929                            {
2930                                // Check if there was meaningful content between the list items (unused now)
2931                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
2932                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2933                                    if let Some(between_line) = lines.get(idx) {
2934                                        let between_content = between_line.content(content);
2935                                        let trimmed = between_content.trim();
2936                                        // Skip empty lines
2937                                        if trimmed.is_empty() {
2938                                            return false;
2939                                        }
2940                                        // Check for meaningful content
2941                                        let line_indent = between_content.len() - between_content.trim_start().len();
2942
2943                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
2944                                        if trimmed.starts_with("```")
2945                                            || trimmed.starts_with("~~~")
2946                                            || trimmed.starts_with("---")
2947                                            || trimmed.starts_with("***")
2948                                            || trimmed.starts_with("___")
2949                                            || trimmed.starts_with(">")
2950                                            || crate::utils::skip_context::is_table_line(trimmed)
2951                                            || between_line.heading.is_some()
2952                                        {
2953                                            return true; // These are structural separators - meaningful content that breaks lists
2954                                        }
2955
2956                                        // Only properly indented content continues the list
2957                                        line_indent >= min_continuation_indent
2958                                    } else {
2959                                        false
2960                                    }
2961                                });
2962
2963                                if block.is_ordered {
2964                                    // For ordered lists: don't continue if there are structural separators
2965                                    // Check if there are structural separators between the list items
2966                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2967                                        if let Some(between_line) = lines.get(idx) {
2968                                            let trimmed = between_line.content(content).trim();
2969                                            if trimmed.is_empty() {
2970                                                return false;
2971                                            }
2972                                            // Check for structural separators that break lists
2973                                            trimmed.starts_with("```")
2974                                                || trimmed.starts_with("~~~")
2975                                                || trimmed.starts_with("---")
2976                                                || trimmed.starts_with("***")
2977                                                || trimmed.starts_with("___")
2978                                                || trimmed.starts_with(">")
2979                                                || crate::utils::skip_context::is_table_line(trimmed)
2980                                                || between_line.heading.is_some()
2981                                        } else {
2982                                            false
2983                                        }
2984                                    });
2985                                    found_continuation = !has_structural_separators;
2986                                } else {
2987                                    // For unordered lists: also check for structural separators
2988                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2989                                        if let Some(between_line) = lines.get(idx) {
2990                                            let trimmed = between_line.content(content).trim();
2991                                            if trimmed.is_empty() {
2992                                                return false;
2993                                            }
2994                                            // Check for structural separators that break lists
2995                                            trimmed.starts_with("```")
2996                                                || trimmed.starts_with("~~~")
2997                                                || trimmed.starts_with("---")
2998                                                || trimmed.starts_with("***")
2999                                                || trimmed.starts_with("___")
3000                                                || trimmed.starts_with(">")
3001                                                || crate::utils::skip_context::is_table_line(trimmed)
3002                                                || between_line.heading.is_some()
3003                                        } else {
3004                                            false
3005                                        }
3006                                    });
3007                                    found_continuation = !has_structural_separators;
3008                                }
3009                            }
3010                        }
3011                    }
3012
3013                    if found_continuation {
3014                        // Include the blank line in the block
3015                        block.end_line = line_num;
3016                    } else {
3017                        // Blank line ends the list - don't include it
3018                        list_blocks.push(block.clone());
3019                        current_block = None;
3020                    }
3021                } else {
3022                    // Check for lazy continuation - non-indented line immediately after a list item
3023                    // But only if the line has sufficient indentation for the list type
3024                    let min_required_indent = if block.is_ordered {
3025                        current_indent_level + last_marker_width
3026                    } else {
3027                        current_indent_level + 2
3028                    };
3029
3030                    // For lazy continuation to apply, the line must either:
3031                    // 1. Have no indentation (true lazy continuation)
3032                    // 2. Have sufficient indentation for the list type
3033                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3034                    let line_content = line_info.content(content).trim();
3035
3036                    // Check for table-like patterns
3037                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3038
3039                    let is_structural_separator = line_info.heading.is_some()
3040                        || line_content.starts_with("```")
3041                        || line_content.starts_with("~~~")
3042                        || line_content.starts_with("---")
3043                        || line_content.starts_with("***")
3044                        || line_content.starts_with("___")
3045                        || line_content.starts_with(">")
3046                        || looks_like_table;
3047
3048                    // Allow lazy continuation if we're still within the same list block
3049                    // (not just immediately after a list item)
3050                    let is_lazy_continuation = !is_structural_separator
3051                        && !line_info.is_blank
3052                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3053
3054                    if is_lazy_continuation {
3055                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3056                        // it's probably not a continuation
3057                        let content_to_check = if !blockquote_prefix.is_empty() {
3058                            // Strip blockquote prefix to check the actual content
3059                            line_info
3060                                .content(content)
3061                                .strip_prefix(&blockquote_prefix)
3062                                .unwrap_or(line_info.content(content))
3063                                .trim()
3064                        } else {
3065                            line_info.content(content).trim()
3066                        };
3067
3068                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3069
3070                        // If it starts with uppercase and the previous line ended with punctuation,
3071                        // it's likely a new paragraph, not a continuation
3072                        if starts_with_uppercase && last_list_item_line > 0 {
3073                            // This looks like a new paragraph
3074                            list_blocks.push(block.clone());
3075                            current_block = None;
3076                        } else {
3077                            // This is a lazy continuation line
3078                            block.end_line = line_num;
3079                        }
3080                    } else {
3081                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3082                        list_blocks.push(block.clone());
3083                        current_block = None;
3084                    }
3085                }
3086            }
3087        }
3088
3089        // Don't forget the last block
3090        if let Some(block) = current_block {
3091            list_blocks.push(block);
3092        }
3093
3094        // Merge adjacent blocks that should be one
3095        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3096
3097        list_blocks
3098    }
3099
3100    /// Compute character frequency for fast content analysis
3101    fn compute_char_frequency(content: &str) -> CharFrequency {
3102        let mut frequency = CharFrequency::default();
3103
3104        for ch in content.chars() {
3105            match ch {
3106                '#' => frequency.hash_count += 1,
3107                '*' => frequency.asterisk_count += 1,
3108                '_' => frequency.underscore_count += 1,
3109                '-' => frequency.hyphen_count += 1,
3110                '+' => frequency.plus_count += 1,
3111                '>' => frequency.gt_count += 1,
3112                '|' => frequency.pipe_count += 1,
3113                '[' => frequency.bracket_count += 1,
3114                '`' => frequency.backtick_count += 1,
3115                '<' => frequency.lt_count += 1,
3116                '!' => frequency.exclamation_count += 1,
3117                '\n' => frequency.newline_count += 1,
3118                _ => {}
3119            }
3120        }
3121
3122        frequency
3123    }
3124
3125    /// Parse HTML tags in the content
3126    fn parse_html_tags(
3127        content: &str,
3128        lines: &[LineInfo],
3129        code_blocks: &[(usize, usize)],
3130        flavor: MarkdownFlavor,
3131    ) -> Vec<HtmlTag> {
3132        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3133            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3134
3135        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3136
3137        for cap in HTML_TAG_REGEX.captures_iter(content) {
3138            let full_match = cap.get(0).unwrap();
3139            let match_start = full_match.start();
3140            let match_end = full_match.end();
3141
3142            // Skip if in code block
3143            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3144                continue;
3145            }
3146
3147            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3148            let tag_name_original = cap.get(2).unwrap().as_str();
3149            let tag_name = tag_name_original.to_lowercase();
3150            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3151
3152            // Skip JSX components in MDX files (tags starting with uppercase letter)
3153            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3154            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3155                continue;
3156            }
3157
3158            // Find which line this tag is on
3159            let mut line_num = 1;
3160            let mut col_start = match_start;
3161            let mut col_end = match_end;
3162            for (idx, line_info) in lines.iter().enumerate() {
3163                if match_start >= line_info.byte_offset {
3164                    line_num = idx + 1;
3165                    col_start = match_start - line_info.byte_offset;
3166                    col_end = match_end - line_info.byte_offset;
3167                } else {
3168                    break;
3169                }
3170            }
3171
3172            html_tags.push(HtmlTag {
3173                line: line_num,
3174                start_col: col_start,
3175                end_col: col_end,
3176                byte_offset: match_start,
3177                byte_end: match_end,
3178                tag_name,
3179                is_closing,
3180                is_self_closing,
3181                raw_content: full_match.as_str().to_string(),
3182            });
3183        }
3184
3185        html_tags
3186    }
3187
3188    /// Parse emphasis spans in the content
3189    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3190        static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3191            LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3192
3193        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3194
3195        for cap in EMPHASIS_REGEX.captures_iter(content) {
3196            let full_match = cap.get(0).unwrap();
3197            let match_start = full_match.start();
3198            let match_end = full_match.end();
3199
3200            // Skip if in code block
3201            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3202                continue;
3203            }
3204
3205            let opening_markers = cap.get(1).unwrap().as_str();
3206            let content_part = cap.get(2).unwrap().as_str();
3207            let closing_markers = cap.get(3).unwrap().as_str();
3208
3209            // Validate matching markers
3210            if opening_markers.chars().next() != closing_markers.chars().next()
3211                || opening_markers.len() != closing_markers.len()
3212            {
3213                continue;
3214            }
3215
3216            let marker = opening_markers.chars().next().unwrap();
3217            let marker_count = opening_markers.len();
3218
3219            // Find which line this emphasis is on
3220            let mut line_num = 1;
3221            let mut col_start = match_start;
3222            let mut col_end = match_end;
3223            for (idx, line_info) in lines.iter().enumerate() {
3224                if match_start >= line_info.byte_offset {
3225                    line_num = idx + 1;
3226                    col_start = match_start - line_info.byte_offset;
3227                    col_end = match_end - line_info.byte_offset;
3228                } else {
3229                    break;
3230                }
3231            }
3232
3233            emphasis_spans.push(EmphasisSpan {
3234                line: line_num,
3235                start_col: col_start,
3236                end_col: col_end,
3237                byte_offset: match_start,
3238                byte_end: match_end,
3239                marker,
3240                marker_count,
3241                content: content_part.to_string(),
3242            });
3243        }
3244
3245        emphasis_spans
3246    }
3247
3248    /// Parse table rows in the content
3249    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3250        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3251
3252        for (line_idx, line_info) in lines.iter().enumerate() {
3253            // Skip lines in code blocks or blank lines
3254            if line_info.in_code_block || line_info.is_blank {
3255                continue;
3256            }
3257
3258            let line = line_info.content(content);
3259            let line_num = line_idx + 1;
3260
3261            // Check if this line contains pipes (potential table row)
3262            if !line.contains('|') {
3263                continue;
3264            }
3265
3266            // Count columns by splitting on pipes
3267            let parts: Vec<&str> = line.split('|').collect();
3268            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3269
3270            // Check if this is a separator row
3271            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3272            let mut column_alignments = Vec::new();
3273
3274            if is_separator {
3275                for part in &parts[1..parts.len() - 1] {
3276                    // Skip first and last empty parts
3277                    let trimmed = part.trim();
3278                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3279                        "center".to_string()
3280                    } else if trimmed.ends_with(':') {
3281                        "right".to_string()
3282                    } else if trimmed.starts_with(':') {
3283                        "left".to_string()
3284                    } else {
3285                        "none".to_string()
3286                    };
3287                    column_alignments.push(alignment);
3288                }
3289            }
3290
3291            table_rows.push(TableRow {
3292                line: line_num,
3293                is_separator,
3294                column_count,
3295                column_alignments,
3296            });
3297        }
3298
3299        table_rows
3300    }
3301
3302    /// Parse bare URLs and emails in the content
3303    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3304        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3305
3306        // Check for bare URLs (not in angle brackets or markdown links)
3307        for cap in BARE_URL_PATTERN.captures_iter(content) {
3308            let full_match = cap.get(0).unwrap();
3309            let match_start = full_match.start();
3310            let match_end = full_match.end();
3311
3312            // Skip if in code block
3313            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3314                continue;
3315            }
3316
3317            // Skip if already in angle brackets or markdown links
3318            let preceding_char = if match_start > 0 {
3319                content.chars().nth(match_start - 1)
3320            } else {
3321                None
3322            };
3323            let following_char = content.chars().nth(match_end);
3324
3325            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3326                continue;
3327            }
3328            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3329                continue;
3330            }
3331
3332            let url = full_match.as_str();
3333            let url_type = if url.starts_with("https://") {
3334                "https"
3335            } else if url.starts_with("http://") {
3336                "http"
3337            } else if url.starts_with("ftp://") {
3338                "ftp"
3339            } else {
3340                "other"
3341            };
3342
3343            // Find which line this URL is on
3344            let mut line_num = 1;
3345            let mut col_start = match_start;
3346            let mut col_end = match_end;
3347            for (idx, line_info) in lines.iter().enumerate() {
3348                if match_start >= line_info.byte_offset {
3349                    line_num = idx + 1;
3350                    col_start = match_start - line_info.byte_offset;
3351                    col_end = match_end - line_info.byte_offset;
3352                } else {
3353                    break;
3354                }
3355            }
3356
3357            bare_urls.push(BareUrl {
3358                line: line_num,
3359                start_col: col_start,
3360                end_col: col_end,
3361                byte_offset: match_start,
3362                byte_end: match_end,
3363                url: url.to_string(),
3364                url_type: url_type.to_string(),
3365            });
3366        }
3367
3368        // Check for bare email addresses
3369        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3370            let full_match = cap.get(0).unwrap();
3371            let match_start = full_match.start();
3372            let match_end = full_match.end();
3373
3374            // Skip if in code block
3375            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3376                continue;
3377            }
3378
3379            // Skip if already in angle brackets or markdown links
3380            let preceding_char = if match_start > 0 {
3381                content.chars().nth(match_start - 1)
3382            } else {
3383                None
3384            };
3385            let following_char = content.chars().nth(match_end);
3386
3387            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3388                continue;
3389            }
3390            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3391                continue;
3392            }
3393
3394            let email = full_match.as_str();
3395
3396            // Find which line this email is on
3397            let mut line_num = 1;
3398            let mut col_start = match_start;
3399            let mut col_end = match_end;
3400            for (idx, line_info) in lines.iter().enumerate() {
3401                if match_start >= line_info.byte_offset {
3402                    line_num = idx + 1;
3403                    col_start = match_start - line_info.byte_offset;
3404                    col_end = match_end - line_info.byte_offset;
3405                } else {
3406                    break;
3407                }
3408            }
3409
3410            bare_urls.push(BareUrl {
3411                line: line_num,
3412                start_col: col_start,
3413                end_col: col_end,
3414                byte_offset: match_start,
3415                byte_end: match_end,
3416                url: email.to_string(),
3417                url_type: "email".to_string(),
3418            });
3419        }
3420
3421        bare_urls
3422    }
3423
3424    /// Get an iterator over valid CommonMark headings
3425    ///
3426    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
3427    /// that should be flagged by MD018 but should not be processed by other heading rules.
3428    ///
3429    /// # Examples
3430    ///
3431    /// ```rust
3432    /// use rumdl_lib::lint_context::LintContext;
3433    /// use rumdl_lib::config::MarkdownFlavor;
3434    ///
3435    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
3436    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3437    ///
3438    /// for heading in ctx.valid_headings() {
3439    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
3440    /// }
3441    /// // Only prints valid headings, skips `#NoSpace`
3442    /// ```
3443    #[must_use]
3444    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3445        ValidHeadingsIter::new(&self.lines)
3446    }
3447
3448    /// Check if the document contains any valid CommonMark headings
3449    ///
3450    /// Returns `true` if there is at least one heading with proper space after `#`.
3451    #[must_use]
3452    pub fn has_valid_headings(&self) -> bool {
3453        self.lines
3454            .iter()
3455            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3456    }
3457}
3458
3459/// Merge adjacent list blocks that should be treated as one
3460fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3461    if list_blocks.len() < 2 {
3462        return;
3463    }
3464
3465    let mut merger = ListBlockMerger::new(content, lines);
3466    *list_blocks = merger.merge(list_blocks);
3467}
3468
3469/// Helper struct to manage the complex logic of merging list blocks
3470struct ListBlockMerger<'a> {
3471    content: &'a str,
3472    lines: &'a [LineInfo],
3473}
3474
3475impl<'a> ListBlockMerger<'a> {
3476    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3477        Self { content, lines }
3478    }
3479
3480    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3481        let mut merged = Vec::with_capacity(list_blocks.len());
3482        let mut current = list_blocks[0].clone();
3483
3484        for next in list_blocks.iter().skip(1) {
3485            if self.should_merge_blocks(&current, next) {
3486                current = self.merge_two_blocks(current, next);
3487            } else {
3488                merged.push(current);
3489                current = next.clone();
3490            }
3491        }
3492
3493        merged.push(current);
3494        merged
3495    }
3496
3497    /// Determine if two adjacent list blocks should be merged
3498    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3499        // Basic compatibility checks
3500        if !self.blocks_are_compatible(current, next) {
3501            return false;
3502        }
3503
3504        // Check spacing and content between blocks
3505        let spacing = self.analyze_spacing_between(current, next);
3506        match spacing {
3507            BlockSpacing::Consecutive => true,
3508            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3509            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3510                self.can_merge_with_content_between(current, next)
3511            }
3512        }
3513    }
3514
3515    /// Check if blocks have compatible structure for merging
3516    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3517        current.is_ordered == next.is_ordered
3518            && current.blockquote_prefix == next.blockquote_prefix
3519            && current.nesting_level == next.nesting_level
3520    }
3521
3522    /// Analyze the spacing between two list blocks
3523    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3524        let gap = next.start_line - current.end_line;
3525
3526        match gap {
3527            1 => BlockSpacing::Consecutive,
3528            2 => BlockSpacing::SingleBlank,
3529            _ if gap > 2 => {
3530                if self.has_only_blank_lines_between(current, next) {
3531                    BlockSpacing::MultipleBlanks
3532                } else {
3533                    BlockSpacing::ContentBetween
3534                }
3535            }
3536            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
3537        }
3538    }
3539
3540    /// Check if unordered lists can be merged with a single blank line between
3541    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3542        // Check if there are structural separators between the blocks
3543        // If has_meaningful_content_between returns true, it means there are structural separators
3544        if has_meaningful_content_between(self.content, current, next, self.lines) {
3545            return false; // Structural separators prevent merging
3546        }
3547
3548        // Only merge unordered lists with same marker across single blank
3549        !current.is_ordered && current.marker == next.marker
3550    }
3551
3552    /// Check if ordered lists can be merged when there's content between them
3553    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3554        // Do not merge lists if there are structural separators between them
3555        if has_meaningful_content_between(self.content, current, next, self.lines) {
3556            return false; // Structural separators prevent merging
3557        }
3558
3559        // Only consider merging ordered lists if there's no structural content between
3560        current.is_ordered && next.is_ordered
3561    }
3562
3563    /// Check if there are only blank lines between blocks
3564    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3565        for line_num in (current.end_line + 1)..next.start_line {
3566            if let Some(line_info) = self.lines.get(line_num - 1)
3567                && !line_info.content(self.content).trim().is_empty()
3568            {
3569                return false;
3570            }
3571        }
3572        true
3573    }
3574
3575    /// Merge two compatible list blocks into one
3576    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3577        current.end_line = next.end_line;
3578        current.item_lines.extend_from_slice(&next.item_lines);
3579
3580        // Update max marker width
3581        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3582
3583        // Handle marker consistency for unordered lists
3584        if !current.is_ordered && self.markers_differ(&current, next) {
3585            current.marker = None; // Mixed markers
3586        }
3587
3588        current
3589    }
3590
3591    /// Check if two blocks have different markers
3592    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3593        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3594    }
3595}
3596
3597/// Types of spacing between list blocks
3598#[derive(Debug, PartialEq)]
3599enum BlockSpacing {
3600    Consecutive,    // No gap between blocks
3601    SingleBlank,    // One blank line between blocks
3602    MultipleBlanks, // Multiple blank lines but no content
3603    ContentBetween, // Content exists between blocks
3604}
3605
3606/// Check if there's meaningful content (not just blank lines) between two list blocks
3607fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3608    // Check lines between current.end_line and next.start_line
3609    for line_num in (current.end_line + 1)..next.start_line {
3610        if let Some(line_info) = lines.get(line_num - 1) {
3611            // Convert to 0-indexed
3612            let trimmed = line_info.content(content).trim();
3613
3614            // Skip empty lines
3615            if trimmed.is_empty() {
3616                continue;
3617            }
3618
3619            // Check for structural separators that should separate lists (CommonMark compliant)
3620
3621            // Headings separate lists
3622            if line_info.heading.is_some() {
3623                return true; // Has meaningful content - headings separate lists
3624            }
3625
3626            // Horizontal rules separate lists (---, ***, ___)
3627            if is_horizontal_rule(trimmed) {
3628                return true; // Has meaningful content - horizontal rules separate lists
3629            }
3630
3631            // Tables separate lists
3632            if crate::utils::skip_context::is_table_line(trimmed) {
3633                return true; // Has meaningful content - tables separate lists
3634            }
3635
3636            // Blockquotes separate lists
3637            if trimmed.starts_with('>') {
3638                return true; // Has meaningful content - blockquotes separate lists
3639            }
3640
3641            // Code block fences separate lists (unless properly indented as list content)
3642            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3643                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3644
3645                // Check if this code block is properly indented as list continuation
3646                let min_continuation_indent = if current.is_ordered {
3647                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
3648                } else {
3649                    current.nesting_level + 2
3650                };
3651
3652                if line_indent < min_continuation_indent {
3653                    // This is a standalone code block that separates lists
3654                    return true; // Has meaningful content - standalone code blocks separate lists
3655                }
3656            }
3657
3658            // Check if this line has proper indentation for list continuation
3659            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3660
3661            // Calculate minimum indentation needed to be list continuation
3662            let min_indent = if current.is_ordered {
3663                current.nesting_level + current.max_marker_width
3664            } else {
3665                current.nesting_level + 2
3666            };
3667
3668            // If the line is not indented enough to be list continuation, it's meaningful content
3669            if line_indent < min_indent {
3670                return true; // Has meaningful content - content not indented as list continuation
3671            }
3672
3673            // If we reach here, the line is properly indented as list continuation
3674            // Continue checking other lines
3675        }
3676    }
3677
3678    // Only blank lines or properly indented list continuation content between blocks
3679    false
3680}
3681
3682/// Check if a line is a horizontal rule (---, ***, ___)
3683fn is_horizontal_rule(trimmed: &str) -> bool {
3684    if trimmed.len() < 3 {
3685        return false;
3686    }
3687
3688    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
3689    let chars: Vec<char> = trimmed.chars().collect();
3690    if let Some(&first_char) = chars.first()
3691        && (first_char == '-' || first_char == '*' || first_char == '_')
3692    {
3693        let mut count = 0;
3694        for &ch in &chars {
3695            if ch == first_char {
3696                count += 1;
3697            } else if ch != ' ' && ch != '\t' {
3698                return false; // Non-matching, non-whitespace character
3699            }
3700        }
3701        return count >= 3;
3702    }
3703    false
3704}
3705
3706/// Check if content contains patterns that cause the markdown crate to panic
3707#[cfg(test)]
3708mod tests {
3709    use super::*;
3710
3711    #[test]
3712    fn test_empty_content() {
3713        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3714        assert_eq!(ctx.content, "");
3715        assert_eq!(ctx.line_offsets, vec![0]);
3716        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3717        assert_eq!(ctx.lines.len(), 0);
3718    }
3719
3720    #[test]
3721    fn test_single_line() {
3722        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3723        assert_eq!(ctx.content, "# Hello");
3724        assert_eq!(ctx.line_offsets, vec![0]);
3725        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3726        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3727    }
3728
3729    #[test]
3730    fn test_multi_line() {
3731        let content = "# Title\n\nSecond line\nThird line";
3732        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3733        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3734        // Test offset to line/col
3735        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
3736        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
3737        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
3738        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
3739        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
3740    }
3741
3742    #[test]
3743    fn test_line_info() {
3744        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
3745        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3746
3747        // Test line info
3748        assert_eq!(ctx.lines.len(), 7);
3749
3750        // Line 1: "# Title"
3751        let line1 = &ctx.lines[0];
3752        assert_eq!(line1.content(ctx.content), "# Title");
3753        assert_eq!(line1.byte_offset, 0);
3754        assert_eq!(line1.indent, 0);
3755        assert!(!line1.is_blank);
3756        assert!(!line1.in_code_block);
3757        assert!(line1.list_item.is_none());
3758
3759        // Line 2: "    indented"
3760        let line2 = &ctx.lines[1];
3761        assert_eq!(line2.content(ctx.content), "    indented");
3762        assert_eq!(line2.byte_offset, 8);
3763        assert_eq!(line2.indent, 4);
3764        assert!(!line2.is_blank);
3765
3766        // Line 3: "" (blank)
3767        let line3 = &ctx.lines[2];
3768        assert_eq!(line3.content(ctx.content), "");
3769        assert!(line3.is_blank);
3770
3771        // Test helper methods
3772        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3773        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3774        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3775        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3776    }
3777
3778    #[test]
3779    fn test_list_item_detection() {
3780        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
3781        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3782
3783        // Line 1: "- Unordered item"
3784        let line1 = &ctx.lines[0];
3785        assert!(line1.list_item.is_some());
3786        let list1 = line1.list_item.as_ref().unwrap();
3787        assert_eq!(list1.marker, "-");
3788        assert!(!list1.is_ordered);
3789        assert_eq!(list1.marker_column, 0);
3790        assert_eq!(list1.content_column, 2);
3791
3792        // Line 2: "  * Nested item"
3793        let line2 = &ctx.lines[1];
3794        assert!(line2.list_item.is_some());
3795        let list2 = line2.list_item.as_ref().unwrap();
3796        assert_eq!(list2.marker, "*");
3797        assert_eq!(list2.marker_column, 2);
3798
3799        // Line 3: "1. Ordered item"
3800        let line3 = &ctx.lines[2];
3801        assert!(line3.list_item.is_some());
3802        let list3 = line3.list_item.as_ref().unwrap();
3803        assert_eq!(list3.marker, "1.");
3804        assert!(list3.is_ordered);
3805        assert_eq!(list3.number, Some(1));
3806
3807        // Line 6: "Not a list"
3808        let line6 = &ctx.lines[5];
3809        assert!(line6.list_item.is_none());
3810    }
3811
3812    #[test]
3813    fn test_offset_to_line_col_edge_cases() {
3814        let content = "a\nb\nc";
3815        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3816        // line_offsets: [0, 2, 4]
3817        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
3818        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
3819        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
3820        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
3821        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
3822        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
3823    }
3824
3825    #[test]
3826    fn test_mdx_esm_blocks() {
3827        let content = r##"import {Chart} from './snowfall.js'
3828export const year = 2023
3829
3830# Last year's snowfall
3831
3832In {year}, the snowfall was above average.
3833It was followed by a warm spring which caused
3834flood conditions in many of the nearby rivers.
3835
3836<Chart color="#fcb32c" year={year} />
3837"##;
3838
3839        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3840
3841        // Check that lines 1 and 2 are marked as ESM blocks
3842        assert_eq!(ctx.lines.len(), 10);
3843        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3844        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3845        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3846        assert!(
3847            !ctx.lines[3].in_esm_block,
3848            "Line 4 (heading) should NOT be in_esm_block"
3849        );
3850        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3851        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3852    }
3853
3854    #[test]
3855    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3856        let content = r#"import {Chart} from './snowfall.js'
3857export const year = 2023
3858
3859# Last year's snowfall
3860"#;
3861
3862        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3863
3864        // ESM blocks should NOT be detected in Standard flavor
3865        assert!(
3866            !ctx.lines[0].in_esm_block,
3867            "Line 1 should NOT be in_esm_block in Standard flavor"
3868        );
3869        assert!(
3870            !ctx.lines[1].in_esm_block,
3871            "Line 2 should NOT be in_esm_block in Standard flavor"
3872        );
3873    }
3874}